当前位置: 首页 > news >正文

Linux5.0 NVMe驱动详细注释

本篇文章非常全面的介绍了基于linux5.0的nvme驱动的所有函数,基本每个函数都有非常详细的注释。同时,本篇文章全部是代码+注释的方式呈现,非常的清晰易懂。希望大家加一个关注,下一篇文章详细讲解nvme驱动中的各个结构体

//一、nvme_probe介绍
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{int node, result = -ENOMEM;struct nvme_dev *dev;unsigned long quirks = id->driver_data;size_t alloc_size;node = dev_to_node(&pdev->dev);if (node == NUMA_NO_NODE)set_dev_node(&pdev->dev, first_memory_node);\\为nvme dev分配空间dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);if (!dev)return -ENOMEM;\\ IO queues + admin queue空间分配 dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue),GFP_KERNEL, node);\\if (!dev->queues)goto free;dev->dev = get_device(&pdev->dev);// 增加pdev->dev引用计数pci_set_drvdata(pdev, dev);//映射bar空间,初始化dev->bar(nvme寄存器), dev->bar_mapped_size(8192) dev->dbs(doorbell寄存器地址)result = nvme_dev_map(dev);if (result)goto put_pci;//初始化队列INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);mutex_init(&dev->shutdown_lock);//创建dma pool 分配256B和4K大小的内存,初始化dev->prp_page_pool和dev->prp_small_poolresult = nvme_setup_prp_pools(dev);if (result)goto unmap;quirks |= check_vendor_combination_bug(pdev);/** Double check that our mempool alloc size will cover the biggest* command we support.*/alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,NVME_MAX_SEGS, true);WARN_ON_ONCE(alloc_size > PAGE_SIZE);//初始化dev->iod_mempool内存池,作为内存备用dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,mempool_kfree,(void *) alloc_size,GFP_KERNEL, node);if (!dev->iod_mempool) {result = -ENOMEM;goto release_pools;}//初始化nvme controller结构体result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,quirks);if (result)goto release_mempool;dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));nvme_get_ctrl(&dev->ctrl);//调用reset_work,也就是nvme_reset_workasync_schedule(nvme_async_probe, dev);return 0;release_mempool:mempool_destroy(dev->iod_mempool);release_pools:nvme_release_prp_pools(dev);unmap:nvme_dev_unmap(dev);put_pci:put_device(dev->dev);free:kfree(dev->queues);kfree(dev);return result;
}
//二、nvme_reset_work介绍
static void nvme_reset_work(struct work_struct *work)
{struct nvme_dev *dev =container_of(work, struct nvme_dev, ctrl.reset_work);bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);int result = -ENODEV;enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))goto out;/** If we're called to reset a live controller first shut it down before* moving on.*/if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)nvme_dev_disable(dev, false);mutex_lock(&dev->shutdown_lock);//只要是设置pci的配置空间寄存器以及NVME控制机的寄存器,下面详解result = nvme_pci_enable(dev);if (result)goto out_unlock;//申请admin queue,包含SQ与CQ,下面详解result = nvme_pci_configure_admin_queue(dev);if (result)goto out_unlock;result = nvme_alloc_admin_tags(dev);if (result)goto out_unlock;/** Limit the max command size to prevent iod->sg allocations going* over a single page.*/dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;//单次可传输的最大扇区数。dev->ctrl.max_segments = NVME_MAX_SEGS;//最大分段数mutex_unlock(&dev->shutdown_lock);/** Introduce CONNECTING state from nvme-fc/rdma transports to mark the* initializing procedure here.*///读取dev->ctrl->stateif (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {dev_warn(dev->ctrl.device,"failed to mark controller CONNECTING\n");goto out;}result = nvme_init_identify(&dev->ctrl);if (result)goto out;if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {if (!dev->ctrl.opal_dev)dev->ctrl.opal_dev =init_opal_dev(&dev->ctrl, &nvme_sec_submit);else if (was_suspend)opal_unlock_from_suspend(dev->ctrl.opal_dev);} else {free_opal_dev(dev->ctrl.opal_dev);dev->ctrl.opal_dev = NULL;}//如果支持门铃缓冲配置命令(Doorbell Buffer Config command)if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {//为dev->dbbuf_dbs和dev->dbbuf_eis分配内存//物理内存存在dev->dbbuf_dbs_dma_addr和dev->dbbuf_eis_dma_addr中result = nvme_dbbuf_dma_alloc(dev);}//主机内存缓冲区首选大小已经设置if (dev->ctrl.hmpre) {//建立set featrue command(opcode=0x09 fid=0x0d),设置Host Memory Buffer//dword11-15参数可参考Figure 330: Host Memory Buffer – Command Dword 11 - Figure 334result = nvme_setup_host_mem(dev);}result = nvme_setup_io_queues(dev);if (result)goto out;/** Keep the controller around but remove all namespaces if we don't have* any working I/O queue.*/if (dev->online_queues < 2) {dev_warn(dev->ctrl.device, "IO queues not created\n");nvme_kill_queues(&dev->ctrl);nvme_remove_namespaces(&dev->ctrl);new_state = NVME_CTRL_ADMIN_ONLY;} else {nvme_start_queues(&dev->ctrl);nvme_wait_freeze(&dev->ctrl);/* hit this only when allocate tagset fails */if (nvme_dev_add(dev))new_state = NVME_CTRL_ADMIN_ONLY;nvme_unfreeze(&dev->ctrl);}/** If only admin queue live, keep it to do further investigation or* recovery.*/if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {dev_warn(dev->ctrl.device,"failed to mark controller state %d\n", new_state);goto out;}nvme_start_ctrl(&dev->ctrl);return;out_unlock:mutex_unlock(&dev->shutdown_lock);out:nvme_remove_dead_ctrl(dev, result);
}
//2.1 nvme_reset_work-->nvme_pci_enable介绍
static int nvme_pci_enable(struct nvme_dev *dev)
{int result = -ENOMEM;struct pci_dev *pdev = to_pci_dev(dev->dev);//使能nvme设备的内存空间,设置pci配置空间的COMMAND位的bit10 //使能COMMAND寄存器的 I / O 和 Memory Space 位之后, 才能访问该设备的存储器或者 I / O 地址空间。if (pci_enable_device_mem(pdev))return result;//设置PCI 设备的配置空间的COMMAND寄存器的bit2(Bus Master位),设置PCI设备为主设备pci_set_master(pdev);if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))goto disable;//读nvme的status寄存器if (readl(dev->bar + NVME_REG_CSTS) == -1) {result = -ENODEV;goto disable;}/** Some devices and/or platforms don't advertise or work with INTx* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll* adjust this later.*///设置IRQresult = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);if (result < 0)return result;//读取nvme控制器的CAP寄存器dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);//读取nvme控制器的CAP寄存器的MQES位(该位表示控制器支持的最大单个队列长度),//io_queue_depth取MQES与io_queue_depth较小值dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,io_queue_depth);//读取CAP寄存器的DSTRD(Doorbell stride)位,该位表示doorbell寄存器之间的间隔,//间隔为2^(2+DSTRD)dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);//doorbell寄存器的起始地址,即SQ0TDBL的地址dev->dbs = dev->bar + 4096;nvme_map_cmb(dev);pci_enable_pcie_error_reporting(pdev);pci_save_state(pdev);return 0;disable:pci_disable_device(pdev);return result;
}
//2.2 nvme_reset_work-->nvme_pci_configure_admin_queue介绍
static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
{int result;u32 aqa;struct nvme_queue *nvmeq;//将pci bar通过ioremap到虚拟地址空间//初始化dev->bar,dev->bar_mapped_size(8192),dev->dbs寄存器的地址result = nvme_remap_bar(dev, db_bar_size(dev, 0));if (result < 0)return result;//读取nvme控制器的VS寄存器,获取版本,如果>v1.1.0,d读取状态寄存器的NSSRO位赋给dev->subsystemdev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?NVME_CAP_NSSRC(dev->ctrl.cap) : 0;if (dev->subsystem &&(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))//写控制器状态寄存器NSSRO位为1,支持nvm subsystem resetwritel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);//将NVME的CC寄存器EN(清0时,控制器不在处理命令)位以及SHN位清0,等待STATUS RDY位清0result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);if (result < 0)return result;//1.使用dma_alloc_coherent分配Complete Queue,虚拟地址保存到nvmeq->cqes,物理地址保存到nvmeq->cq_dma_addr//2.调用nvme_alloc_sq_cmds-->dma_alloc_coherent来处理submission queue(qid为0 ,无法使用pci_alloc_p2pmem 分配内存)//  虚拟地址nvmeq->sq_cmds,物理地址保存到nvmeq->sq_dma_addr//3. 初始化dev->queues[0]结构体result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);if (result)return result;nvmeq = &dev->queues[0];aqa = nvmeq->q_depth - 1;aqa |= aqa << 16;//设置NVME控制寄存器的AQA寄存器,将q_depth写入该寄存器的ACQS位(定义管理完成队列大小)//和ASQS位(定义管理提交队列的大小)writel(aqa, dev->bar + NVME_REG_AQA);//将上面分配的SQ物理地址写入ASQ寄存器(该寄存器保存提交队列的内存基地址)中lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);//将上面分配的CQ物理地址写入ASQ寄存器(该寄存器保存完成队列的内存基地址)中lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);//设置NVMe控制器的CC寄存器,等待CSTS寄存器的RDYresult = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);if (result)return result;nvmeq->cq_vector = 0;//1. 初始化dev->queues[0]一些队列相关的结构体//2. 使用memeset将cqes队列清0//3. 调用 nvme_dbbuf_init 函数来初始化设备的 Doorbell 缓冲区。nvme_init_queue(nvmeq, 0);//请求分配一个队列的中断,nvmeq->cq_vector保存终端向量号,nvme_irq为中断处理函数//nvmeq为传递给中断处理函数的参数result = queue_request_irq(nvmeq);if (result) {nvmeq->cq_vector = -1;return result;}//将nvmeq->flags的第0位置1,set_bit(NVMEQ_ENABLED, &nvmeq->flags);return result;
}
//2.3 nvme_reset_work --> nvme_alloc_admin_tags详解
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{if (!dev->ctrl.admin_q) {//struct blk_mq_tag_set结构体主要是包含了块设备的硬件配置信息//设置操作函数dev->admin_tagset.ops = &nvme_mq_admin_ops;//队列个数dev->admin_tagset.nr_hw_queues = 1;//队列深度dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;//admin command超时时间dev->admin_tagset.timeout = ADMIN_TIMEOUT;dev->admin_tagset.numa_node = dev_to_node(dev->dev);//cmd长度dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false);dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;dev->admin_tagset.driver_data = dev;//1. 硬件调度队列数>cpu数时,硬件调度队列数=cpu数//2. 给blk_mq_tag_set*set->tags[]分配nr_cpu个blk_mq_tags指针,//3. 给dev->admin_tagset->map[i].mq_map分配nr_cpu个,用于软硬队列映射,下标为cpu的编号,数组成员是硬件队列编号//4. 调用blk_mq_map_queues在 CPU 和队列之间进行顺序映射。//5. 调用blk_mq_alloc_rq_maps对每个硬件队列,根据队列深度来分配tag bitmap和request,//    分配的request指针最终保存到tags->static_rqs[i]if (blk_mq_alloc_tag_set(&dev->admin_tagset))return -ENOMEM;dev->ctrl.admin_tagset = &dev->admin_tagset;//1. 分配request queue,初始化struct request_queue(dev->ctrl.admin_q)//2. 分配软件队列与硬件队列,初始化并建立二者的联系dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);if (IS_ERR(dev->ctrl.admin_q)) {blk_mq_free_tag_set(&dev->admin_tagset);return -ENOMEM;}if (!blk_get_queue(dev->ctrl.admin_q)) {nvme_dev_remove_admin(dev);dev->ctrl.admin_q = NULL;return -ENODEV;}} elseblk_mq_unquiesce_queue(dev->ctrl.admin_q);return 0;
}
//2.4 nvme_reset_work --> nvme_init_identify详解
int nvme_init_identify(struct nvme_ctrl *ctrl)
{struct nvme_id_ctrl *id;u64 cap;int ret, page_shift;u32 max_hw_sectors;bool prev_apst_enabled;//读取版本号ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);if (ret) {dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);return ret;}//获取cap寄存器的值ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);if (ret) {dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);return ret;}//获取nvme控制寄存器的cap寄存器的MPSMIN位(最小内存页大小),最小内存页为2^(12 + MPSMIN)page_shift = NVME_CAP_MPSMIN(cap) + 12;//nvmeV1.1.0以上的版本支持子系统if (ctrl->vs >= NVME_VS(1, 1, 0))ctrl->subsystem = NVME_CAP_NSSRC(cap);//建立identify command(opcode=0x06 cns=1),并submit执行,下发后,返回4KB的– Identify Controller Data Structure//返回值保存在id中//id结构体的描述位于NVM-Express-base-specification-2.0c-2022.10.04:Figure 275: Identify – Identify Controller Data Structure, I/O Command Set Independent P258ret = nvme_identify_ctrl(ctrl, &id);if (ret) {dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);return -EIO;}//lpa的bit1如果为1,表示支持Commands Supported and Effects log pageif (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {//建立get log page 命令(opcode=0x2),返回的结果保存在ctrl->effects//注:返回值描述位于NVM-Express-base-specification-2.0c-2022.10.04:Figure 210: Commands Supported and Effects Log Page P200ret = nvme_get_effects_log(ctrl);if (ret < 0)goto out_free;}//identified用于标识控制器是否已被识别,未被识别,进入初始化if (!ctrl->identified) {int i;//获取的id值来初始化nvme子系统ret = nvme_init_subsystem(ctrl, id);if (ret)goto out_free;for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {if (quirk_matches(id, &core_quirks[i]))ctrl->quirks |= core_quirks[i].quirks;}}if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;}//用获取的id数据初始化控制器请求超时数据,参数详见Figure 92: Completion Queue Entry: Status Field//当CQE CRD字段为1时,命令重发延时选用crd1,为2时,用crd2...ctrl->crdt[0] = le16_to_cpu(id->crdt1);ctrl->crdt[1] = le16_to_cpu(id->crdt2);ctrl->crdt[2] = le16_to_cpu(id->crdt3);//支持的命令集ctrl->oacs = le16_to_cpu(id->oacs);//可选命令集ctrl->oncs = le16_to_cpup(&id->oncs);//可选异步时间支持ctrl->oaes = le32_to_cpu(id->oaes);//abort命令的最大个数atomic_set(&ctrl->abort_limit, id->acl + 1);//写写入缓存开关ctrl->vwc = id->vwc;//最大传输数据大小if (id->mdts)max_hw_sectors = 1 << (id->mdts + page_shift - 9);elsemax_hw_sectors = UINT_MAX;//单次最大可传输的扇区数ctrl->max_hw_sectors =min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);//设置request queue参数nvme_set_queue_limits(ctrl, ctrl->admin_q);//是否支持sglctrl->sgls = le32_to_cpu(id->sgls);ctrl->kas = le16_to_cpu(id->kas);//最大命名空间数ctrl->max_namespaces = le32_to_cpu(id->mnan);//控制器属性ctrl->ctratt = le32_to_cpu(id->ctratt);//进入D3的延迟if (id->rtd3e) {/* us -> s */u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;//换算为秒ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,shutdown_timeout, 60);if (ctrl->shutdown_timeout != shutdown_timeout)dev_info(ctrl->device,"Shutdown timeout set to %u seconds\n",ctrl->shutdown_timeout);} elsectrl->shutdown_timeout = shutdown_timeout;ctrl->npss = id->npss;// NVMe 控制器支持的不同电源状态的数量ctrl->apsta = id->apsta;//用于控制控制器是否允许自主进行电源状态的转换prev_apst_enabled = ctrl->apst_enabled;//APST(省电状态传输)是否已启用if (ctrl->quirks & NVME_QUIRK_NO_APST) {if (force_apst && id->apsta) {dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");ctrl->apst_enabled = true;} else {ctrl->apst_enabled = false;}} else {ctrl->apst_enabled = id->apsta;}memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));//电源状态描述符if (ctrl->ops->flags & NVME_F_FABRICS) {//fabrics设备ctrl->icdoff = le16_to_cpu(id->icdoff);ctrl->ioccsz = le32_to_cpu(id->ioccsz);ctrl->iorcsz = le32_to_cpu(id->iorcsz);ctrl->maxcmd = le16_to_cpu(id->maxcmd);/** In fabrics we need to verify the cntlid matches the* admin connect*/if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {ret = -EINVAL;goto out_free;}if (!ctrl->opts->discovery_nqn && !ctrl->kas) {dev_err(ctrl->device,"keep-alive support is mandatory for fabrics\n");ret = -EINVAL;goto out_free;}} else {ctrl->cntlid = le16_to_cpu(id->cntlid);//控制器IDctrl->hmpre = le32_to_cpu(id->hmpre);//主机内存缓冲区大小,以4kib为单位ctrl->hmmin = le32_to_cpu(id->hmmin);主机内存缓冲区最小大小,以4kib为单位ctrl->hmminds = le32_to_cpu(id->hmminds);//最小数据传输速率ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);//最大数据传输速率}//初始化与多路径相关的控制结构ret = nvme_mpath_init(ctrl, id);kfree(id);if (ret < 0)return ret;if (ctrl->apst_enabled && !prev_apst_enabled)dev_pm_qos_expose_latency_tolerance(ctrl->device);else if (!ctrl->apst_enabled && prev_apst_enabled)dev_pm_qos_hide_latency_tolerance(ctrl->device);//建立set featrue command(opcode=0x09 fid=0x0c),设置Autonomous Power State Transition//参见Figure 327: Autonomous Power State Transition – Command Dword 11ret = nvme_configure_apst(ctrl);if (ret < 0)return ret;//与上类似,fid不同ret = nvme_configure_timestamp(ctrl);if (ret < 0)return ret;ret = nvme_configure_directives(ctrl);if (ret < 0)return ret;ret = nvme_configure_acre(ctrl);if (ret < 0)return ret;ctrl->identified = true;return 0;
out_free:kfree(id);return ret;
}
//2.5 nvme_reset_work --> nvme_setup_io_queues详解
static int nvme_setup_io_queues(struct nvme_dev *dev)
{struct nvme_queue *adminq = &dev->queues[0];struct pci_dev *pdev = to_pci_dev(dev->dev);int result, nr_io_queues;unsigned long size;nr_io_queues = max_io_queues();//发送set feature cmd设置IO queues数目result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);if (result < 0)return result;if (nr_io_queues == 0)return 0;clear_bit(NVMEQ_ENABLED, &adminq->flags);//支持cmbif (dev->cmb_use_sqes) {result = nvme_cmb_qdepth(dev, nr_io_queues,sizeof(struct nvme_command));if (result > 0)dev->q_depth = result;elsedev->cmb_use_sqes = false;}do {// 计算所需的bar sizesize = db_bar_size(dev, nr_io_queues);//映射bar空间result = nvme_remap_bar(dev, size);if (!result)break;if (!--nr_io_queues)return -ENOMEM;} while (1);adminq->q_db = dev->dbs;retry:/* Deregister the admin queue's interrupt */pci_free_irq(pdev, 0, adminq);/** If we enable msix early due to not intx, disable it again before* setting up the full range we need.*/pci_free_irq_vectors(pdev);result = nvme_setup_irqs(dev, nr_io_queues);if (result <= 0)return -EIO;dev->num_vecs = result;result = max(result - 1, 1);dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];/** Should investigate if there's a performance win from allocating* more queues than interrupt vectors; it might allow the submission* path to scale better, even if the receive path is limited by the* number of interrupts.*///申请irq,中断处理函数是nvme_irqresult = queue_request_irq(adminq);if (result) {adminq->cq_vector = -1;return result;}set_bit(NVMEQ_ENABLED, &adminq->flags);//详见2.5.1result = nvme_create_io_queues(dev);if (result || dev->online_queues < 2)return result;if (dev->online_queues - 1 < dev->max_qid) {nr_io_queues = dev->online_queues - 1;nvme_disable_io_queues(dev);nvme_suspend_io_queues(dev);goto retry;}dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",dev->io_queues[HCTX_TYPE_DEFAULT],dev->io_queues[HCTX_TYPE_READ],dev->io_queues[HCTX_TYPE_POLL]);return 0;
}
//2.5.1 nvme_reset_work --> nvme_setup_io_queues -->nvme_create_io_queues详解
static int nvme_create_io_queues(struct nvme_dev *dev)
{unsigned i, max, rw_queues;int ret = 0;for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {//分配nvmeq结构体,并记录到dev->queues[]数组中,并分配   IO submit queue 和IO complete queue所需要的空间//dev->queues[qid]->cqes记录虚拟地址和dev->queues[qid]->cq_dma_addr记录物理地址,//初始化dev->queues[qid]结构体if (nvme_alloc_queue(dev, i, dev->q_depth)) {ret = -ENOMEM;break;}}max = min(dev->max_qid, dev->ctrl.queue_count - 1);if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +dev->io_queues[HCTX_TYPE_READ];} else {rw_queues = max;}for (i = dev->online_queues; i <= max; i++) {bool polled = i > rw_queues;//详见2.5.1.1ret = nvme_create_queue(&dev->queues[i], i, polled);if (ret)break;}return ret >= 0 ? 0 : ret;
}
//2.5.1.1 nvme_reset_work --> nvme_setup_io_queues -->nvme_create_io_queues --> nvme_create_queue详解
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
{struct nvme_dev *dev = nvmeq->dev;int result;s16 vector;clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);/** A queue's vector matches the queue identifier unless the controller* has only one vector available.*/if (!polled)vector = dev->num_vecs == 1 ? 0 : qid;elsevector = -1;//发送create cq cmd创建CQ队列//详见NVM-Express-Base-Specification 5.4 Create I/O Completion Queue commandresult = adapter_alloc_cq(dev, qid, nvmeq, vector);if (result)return result;//发送create sq cmd创建SQ队列//详见NVM-Express-Base-Specification. 5.5 Create I/O Submission Queue commandresult = adapter_alloc_sq(dev, qid, nvmeq);if (result < 0)return result;else if (result)goto release_cq;nvmeq->cq_vector = vector;//1. 初始化dev->queues[qid]一些队列相关的结构体//2. 使用memeset将cqes队列清0//3. 调用 nvme_dbbuf_init 函数来初始化设备的 Doorbell 缓冲区。nvme_init_queue(nvmeq, qid);if (vector != -1) {result = queue_request_irq(nvmeq);if (result < 0)goto release_sq;}set_bit(NVMEQ_ENABLED, &nvmeq->flags);return result;
release_sq:nvmeq->cq_vector = -1;dev->online_queues--;adapter_delete_sq(dev, qid);
release_cq:adapter_delete_cq(dev, qid);return result;
}

http://www.mrgr.cn/news/11613.html

相关文章:

  • 组播(UDP)
  • 【匹配】枚举所有可能的资源组合
  • Ansys Zemax | 如何寻找几何错误 - 第1部分
  • 个人博客和文档管理之vuepress
  • 【EMC专题】以太网电路为什么在变压器次级侧加浪涌保护二极管?
  • Go反射四讲---第四讲:如何使用反射遍历字符串,切片,数组和Map呢?
  • Node.js中的pipe方法全面讲解:事件解析与示例代码分析
  • 除了系统问题 前端可能会有什么问题
  • AntvX6-shape2
  • Linux开发环境搭建,虚拟机安装
  • <数据集>车牌识别数据集<目标检测>
  • 一起学习LeetCode热题100道(55/100)
  • 语音控制开关的语音识别ic芯片方案
  • Mybatis中的缓存
  • Android广播的分类和使用
  • C#—多线程
  • 【jvm】栈是否存在垃圾回收
  • v4l2(video4linux2) yuyv(yuv422)、MJPEG、H.264
  • yocto | 基于Linux的定制系统跑Qt app(第一集)
  • 【精选】基于Python的热门旅游景点数据分析系统的设计与实现(南京旅游,北京旅游,旅游网站,全国各地旅游网站)