mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	blk-mq: make the polling code adaptive
The previous commit introduced the hybrid sleep/poll mode. Take that one step further, and use the completion latencies to automatically sleep for half the mean completion time. This is a good approximation. This changes the 'io_poll_delay' sysfs file a bit to expose the various options. Depending on the value, the polling code will behave differently: -1 Never enter hybrid sleep mode 0 Use half of the completion mean for the sleep delay >0 Use this specific value as the sleep delay Signed-off-by: Jens Axboe <axboe@fb.com> Tested-By: Stephen Bates <sbates@raithlin.com> Reviewed-By: Stephen Bates <sbates@raithlin.com>
This commit is contained in:
		
							parent
							
								
									06426adf07
								
							
						
					
					
						commit
						64f1c21e86
					
				| @ -2132,6 +2132,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | ||||
| 	 */ | ||||
| 	q->nr_requests = set->queue_depth; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Default to classic polling | ||||
| 	 */ | ||||
| 	q->poll_nsec = -1; | ||||
| 
 | ||||
| 	if (set->ops->complete) | ||||
| 		blk_queue_softirq_done(q, set->ops->complete); | ||||
| 
 | ||||
| @ -2469,14 +2474,70 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); | ||||
| 
 | ||||
| static unsigned long blk_mq_poll_nsecs(struct request_queue *q, | ||||
| 				       struct blk_mq_hw_ctx *hctx, | ||||
| 				       struct request *rq) | ||||
| { | ||||
| 	struct blk_rq_stat stat[2]; | ||||
| 	unsigned long ret = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If stats collection isn't on, don't sleep but turn it on for | ||||
| 	 * future users | ||||
| 	 */ | ||||
| 	if (!blk_stat_enable(q)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We don't have to do this once per IO, should optimize this | ||||
| 	 * to just use the current window of stats until it changes | ||||
| 	 */ | ||||
| 	memset(&stat, 0, sizeof(stat)); | ||||
| 	blk_hctx_stat_get(hctx, stat); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * As an optimistic guess, use half of the mean service time | ||||
| 	 * for this type of request. We can (and should) make this smarter. | ||||
| 	 * For instance, if the completion latencies are tight, we can | ||||
| 	 * get closer than just half the mean. This is especially | ||||
| 	 * important on devices where the completion latencies are longer | ||||
| 	 * than ~10 usec. | ||||
| 	 */ | ||||
| 	if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples) | ||||
| 		ret = (stat[BLK_STAT_READ].mean + 1) / 2; | ||||
| 	else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples) | ||||
| 		ret = (stat[BLK_STAT_WRITE].mean + 1) / 2; | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, | ||||
| 				     struct blk_mq_hw_ctx *hctx, | ||||
| 				     struct request *rq) | ||||
| { | ||||
| 	struct hrtimer_sleeper hs; | ||||
| 	enum hrtimer_mode mode; | ||||
| 	unsigned int nsecs; | ||||
| 	ktime_t kt; | ||||
| 
 | ||||
| 	if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags)) | ||||
| 	if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * poll_nsec can be: | ||||
| 	 * | ||||
| 	 * -1:	don't ever hybrid sleep | ||||
| 	 *  0:	use half of prev avg | ||||
| 	 * >0:	use this specific value | ||||
| 	 */ | ||||
| 	if (q->poll_nsec == -1) | ||||
| 		return false; | ||||
| 	else if (q->poll_nsec > 0) | ||||
| 		nsecs = q->poll_nsec; | ||||
| 	else | ||||
| 		nsecs = blk_mq_poll_nsecs(q, hctx, rq); | ||||
| 
 | ||||
| 	if (!nsecs) | ||||
| 		return false; | ||||
| 
 | ||||
| 	set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); | ||||
| @ -2485,7 +2546,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, | ||||
| 	 * This will be replaced with the stats tracking code, using | ||||
| 	 * 'avg_completion_time / 2' as the pre-sleep target. | ||||
| 	 */ | ||||
| 	kt = ktime_set(0, q->poll_nsec); | ||||
| 	kt = ktime_set(0, nsecs); | ||||
| 
 | ||||
| 	mode = HRTIMER_MODE_REL; | ||||
| 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); | ||||
| @ -2520,7 +2581,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||||
| 	 * the IO isn't complete, we'll get called again and will go | ||||
| 	 * straight to the busy poll loop. | ||||
| 	 */ | ||||
| 	if (blk_mq_poll_hybrid_sleep(q, rq)) | ||||
| 	if (blk_mq_poll_hybrid_sleep(q, hctx, rq)) | ||||
| 		return true; | ||||
| 
 | ||||
| 	hctx->poll_considered++; | ||||
|  | ||||
| @ -352,24 +352,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) | ||||
| 
 | ||||
| static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) | ||||
| { | ||||
| 	return queue_var_show(q->poll_nsec / 1000, page); | ||||
| 	int val; | ||||
| 
 | ||||
| 	if (q->poll_nsec == -1) | ||||
| 		val = -1; | ||||
| 	else | ||||
| 		val = q->poll_nsec / 1000; | ||||
| 
 | ||||
| 	return sprintf(page, "%d\n", val); | ||||
| } | ||||
| 
 | ||||
| static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, | ||||
| 				size_t count) | ||||
| { | ||||
| 	unsigned long poll_usec; | ||||
| 	ssize_t ret; | ||||
| 	int err, val; | ||||
| 
 | ||||
| 	if (!q->mq_ops || !q->mq_ops->poll) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	ret = queue_var_store(&poll_usec, page, count); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 	err = kstrtoint(page, 10, &val); | ||||
| 	if (err < 0) | ||||
| 		return err; | ||||
| 
 | ||||
| 	q->poll_nsec = poll_usec * 1000; | ||||
| 	return ret; | ||||
| 	if (val == -1) | ||||
| 		q->poll_nsec = -1; | ||||
| 	else | ||||
| 		q->poll_nsec = val * 1000; | ||||
| 
 | ||||
| 	return count; | ||||
| } | ||||
| 
 | ||||
| static ssize_t queue_poll_show(struct request_queue *q, char *page) | ||||
|  | ||||
| @ -509,7 +509,7 @@ struct request_queue { | ||||
| 	unsigned int		request_fn_active; | ||||
| 
 | ||||
| 	unsigned int		rq_timeout; | ||||
| 	unsigned int		poll_nsec; | ||||
| 	int			poll_nsec; | ||||
| 	struct timer_list	timeout; | ||||
| 	struct work_struct	timeout_work; | ||||
| 	struct list_head	timeout_list; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Jens Axboe
						Jens Axboe