diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 838f07e2b64a..8d5aa55309e4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -37,9 +37,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* - * offset from end of service tree + * offset from end of queue service tree for idle class */ #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service tree under time slice mode */ +#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service under IOPS mode */ +#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate @@ -1361,6 +1365,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) cfqg->vfraction = max_t(unsigned, vfr, 1); } +static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) +{ + if (!iops_mode(cfqd)) + return CFQ_SLICE_MODE_GROUP_DELAY; + else + return CFQ_IOPS_MODE_GROUP_DELAY; +} + static void cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) { @@ -1380,7 +1392,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) n = rb_last(&st->rb); if (n) { __cfqg = rb_entry_cfqg(n); - cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; + cfqg->vdisktime = __cfqg->vdisktime + + cfq_get_cfqg_vdisktime_delay(cfqd); } else cfqg->vdisktime = st->min_vdisktime; cfq_group_service_tree_add(st, cfqg); @@ -3758,16 +3771,14 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, } #ifdef CONFIG_CFQ_GROUP_IOSCHED -static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) +static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *cfqq; uint64_t serial_nr; - bool nonroot_cg; rcu_read_lock(); serial_nr = bio_blkcg(bio)->css.serial_nr; - nonroot_cg = bio_blkcg(bio) != &blkcg_root; rcu_read_unlock(); /* @@ -3775,7 +3786,7 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) * spuriously on a newly created cic but there's no harm. */ if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr)) - return nonroot_cg; + return; /* * Drop reference to queues. New queues will be assigned in new @@ -3796,12 +3807,10 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) } cic->blkcg_serial_nr = serial_nr; - return nonroot_cg; } #else -static inline bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) +static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { - return false; } #endif /* CONFIG_CFQ_GROUP_IOSCHED */ @@ -3860,6 +3869,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, goto out; } + /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */ + cfqq->ioprio_class = IOPRIO_CLASS_NONE; cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); cfq_init_prio_data(cfqq, cic); cfq_link_cfqq_cfqg(cfqq, cfqg); @@ -4444,12 +4455,11 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, const int rw = rq_data_dir(rq); const bool is_sync = rq_is_sync(rq); struct cfq_queue *cfqq; - bool disable_wbt; spin_lock_irq(q->queue_lock); check_ioprio_changed(cic, bio); - disable_wbt = check_blkcg_changed(cic, bio); + check_blkcg_changed(cic, bio); new_queue: cfqq = cic_to_cfqq(cic, is_sync); if (!cfqq || cfqq == &cfqd->oom_cfqq) { @@ -4486,9 +4496,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, rq->elv.priv[1] = cfqq->cfqg; spin_unlock_irq(q->queue_lock); - if (disable_wbt) - wbt_disable_default(q); - return 0; } @@ -4701,6 +4708,7 @@ static void cfq_registered_queue(struct request_queue *q) */ if (blk_queue_nonrot(q)) cfqd->cfq_slice_idle = 0; + wbt_disable_default(q); } /* diff --git a/fs/block_dev.c b/fs/block_dev.c index 99d4e1af5502..3a1a64c87be1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -195,6 +195,25 @@ static void blkdev_bio_end_io_simple(struct bio *bio) wake_up_process(waiter); } +noinline static void debug_schedule(struct bio *bio) +{ + u64 t0 = local_clock(); + u64 t1, t2; + if (io_schedule_timeout(HZ * 60) <= 0) { + t1 = local_clock(); + //printk(KERN_WARNING "__blkdev_direct_IO_simple wait timeout %p\n", bio); + set_current_state(TASK_UNINTERRUPTIBLE); + if (READ_ONCE(bio->bi_private)) { + io_schedule(); + } + t2 = local_clock(); + __set_current_state(TASK_RUNNING); + printk(KERN_WARNING "__blkdev_direct_IO_simple wait recovered " + "%p %p %08x %llu %llu %llu\n", + bio, bio->bi_bdev, bio->bi_opf, t0, t1, t2); + } +} + static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) @@ -248,7 +267,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, break; if (!(iocb->ki_flags & IOCB_HIPRI) || !blk_mq_poll(bdev_get_queue(bdev), qc)) - io_schedule(); + debug_schedule(&bio); } __set_current_state(TASK_RUNNING);