From 348b5f0e4d4b3d13b9db5625d1c1a5195f1a5946 Mon Sep 17 00:00:00 2001 From: tb-user Date: Mon, 5 Nov 2018 12:48:26 +0000 Subject: [PATCH] nvme: add cq_cpulist module parameter Specify which CPUs shall run completion queues (including IRQ affinity) at module load time with e.g. nvme.cq_cpulist=0,20,40,60. Driver won't create more completion queues than CPUs in the cpulist. Change-Id: I73c1b70d67e97397d6ad7101c6265be5a609b357 Signed-off-by: Mykyta Iziumtsev --- drivers/nvme/host/pci.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3be974f..f233553 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "nvme.h" @@ -65,6 +66,17 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int cq_cpulist_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops cq_cpulist_ops = { + .set = cq_cpulist_set, +}; + +static cpumask_var_t cq_cpumask; +static bool cq_cpumask_present; +module_param_cb(cq_cpulist, &cq_cpulist_ops, NULL, 0); +MODULE_PARM_DESC(cq_cpulist, "Completion queue IRQ affinity cpu list. " + "By default spread IRQs across all online CPUs."); + struct nvme_dev; struct nvme_queue; @@ -124,6 +136,25 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) return param_set_int(val, kp); } +static int cq_cpulist_set(const char *val, const struct kernel_param *kp) +{ + int err; + + if (!alloc_cpumask_var(&cq_cpumask, GFP_KERNEL)) + return -ENOMEM; + + err = cpulist_parse(val, cq_cpumask); + if (err < 0 || cpumask_last(cq_cpumask) >= nr_cpu_ids) { + pr_warn("nvme: can't parse cq_cpulist, skipping\n"); + free_cpumask_var(cq_cpumask); + return 0; + } + + cq_cpumask_present = true; + + return 0; +} + static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; @@ -421,8 +452,26 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; + struct pci_dev *pdev = to_pci_dev(dev->dev); + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = irq_get_affinity_mask(pci_irq_vector(pdev, queue)); + if (!mask) + goto fallback; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; + +fallback: + WARN_ON_ONCE(set->nr_hw_queues > 1); + for_each_possible_cpu(cpu) + set->mq_map[cpu] = 0; + return 0; } /** @@ -1918,7 +1967,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - nr_io_queues = num_possible_cpus(); + if (cq_cpumask_present) + nr_io_queues = cpumask_weight(cq_cpumask); + else + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; @@ -1953,8 +2005,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); + nr_io_queues = + pci_alloc_irq_vectors(pdev, 1, nr_io_queues, PCI_IRQ_ALL_TYPES | + (cq_cpumask_present ? 0 : PCI_IRQ_AFFINITY)); if (nr_io_queues <= 0) return -EIO; dev->max_qid = nr_io_queues; @@ -1971,7 +2024,24 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) adminq->cq_vector = -1; return result; } - return nvme_create_io_queues(dev); + + result = nvme_create_io_queues(dev); + if (result) + return result; + + if (cq_cpumask_present) { + unsigned int cpu = cpumask_next(-1, cq_cpumask); + int cq_vector; + + for (cq_vector = 0; cq_vector < nr_io_queues; cq_vector++) { + irq_set_affinity_hint(pci_irq_vector(pdev, cq_vector), + get_cpu_mask(cpu)); + + cpu = cpumask_next(cpu, cq_cpumask); + } + } + + return result; } static void nvme_del_queue_end(struct request *req, blk_status_t error) @@ -2739,6 +2809,8 @@ static void __exit nvme_exit(void) pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); _nvme_check_size(); + if (cq_cpumask_present) + free_cpumask_var(cq_cpumask); } MODULE_AUTHOR("Matthew Wilcox "); -- 2.7.4