mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	blk-cgroup: show global disk stats in root cgroup io.stat
In order to improve consistency and usability in cgroup stat accounting, we would like to support the root cgroup's io.stat. Since the root cgroup has processes doing io even if the system has no explicitly created cgroups, we need to be careful to avoid overhead in that case. For that reason, the rstat algorithms don't handle the root cgroup, so just turning the file on wouldn't give correct statistics. To get around this, we simulate flushing the iostat struct by filling it out directly from global disk stats. The result is a root cgroup io.stat file consistent with both /proc/diskstats and io.stat. Note that in order to collect the disk stats, we needed to iterate over devices. To facilitate that, we had to change the linkage of a disk_type to external so that it can be used from blk-cgroup.c to iterate over disks. Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Boris Burkov <boris@bur.io> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									cd1fc4b98f
								
							
						
					
					
						commit
						ef45fe470e
					
				| @ -1483,8 +1483,7 @@ IO Interface Files | |||||||
| ~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~ | ||||||
| 
 | 
 | ||||||
|   io.stat |   io.stat | ||||||
| 	A read-only nested-keyed file which exists on non-root | 	A read-only nested-keyed file. | ||||||
| 	cgroups. |  | ||||||
| 
 | 
 | ||||||
| 	Lines are keyed by $MAJ:$MIN device numbers and not ordered. | 	Lines are keyed by $MAJ:$MIN device numbers and not ordered. | ||||||
| 	The following nested keys are defined. | 	The following nested keys are defined. | ||||||
|  | |||||||
| @ -782,12 +782,66 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) | |||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * The rstat algorithms intentionally don't handle the root cgroup to avoid | ||||||
|  |  * incurring overhead when no cgroups are defined. For that reason, | ||||||
|  |  * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the | ||||||
|  |  * iostat in the root cgroup's blkcg_gq. | ||||||
|  |  * | ||||||
|  |  * However, we would like to re-use the printing code between the root and | ||||||
|  |  * non-root cgroups to the extent possible. For that reason, we simulate | ||||||
|  |  * flushing the root cgroup's stats by explicitly filling in the iostat | ||||||
|  |  * with disk level statistics. | ||||||
|  |  */ | ||||||
|  | static void blkcg_fill_root_iostats(void) | ||||||
|  | { | ||||||
|  | 	struct class_dev_iter iter; | ||||||
|  | 	struct device *dev; | ||||||
|  | 
 | ||||||
|  | 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | ||||||
|  | 	while ((dev = class_dev_iter_next(&iter))) { | ||||||
|  | 		struct gendisk *disk = dev_to_disk(dev); | ||||||
|  | 		struct hd_struct *part = disk_get_part(disk, 0); | ||||||
|  | 		struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); | ||||||
|  | 		struct blkg_iostat tmp; | ||||||
|  | 		int cpu; | ||||||
|  | 
 | ||||||
|  | 		memset(&tmp, 0, sizeof(tmp)); | ||||||
|  | 		for_each_possible_cpu(cpu) { | ||||||
|  | 			struct disk_stats *cpu_dkstats; | ||||||
|  | 
 | ||||||
|  | 			cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); | ||||||
|  | 			tmp.ios[BLKG_IOSTAT_READ] += | ||||||
|  | 				cpu_dkstats->ios[STAT_READ]; | ||||||
|  | 			tmp.ios[BLKG_IOSTAT_WRITE] += | ||||||
|  | 				cpu_dkstats->ios[STAT_WRITE]; | ||||||
|  | 			tmp.ios[BLKG_IOSTAT_DISCARD] += | ||||||
|  | 				cpu_dkstats->ios[STAT_DISCARD]; | ||||||
|  | 			// convert sectors to bytes
 | ||||||
|  | 			tmp.bytes[BLKG_IOSTAT_READ] += | ||||||
|  | 				cpu_dkstats->sectors[STAT_READ] << 9; | ||||||
|  | 			tmp.bytes[BLKG_IOSTAT_WRITE] += | ||||||
|  | 				cpu_dkstats->sectors[STAT_WRITE] << 9; | ||||||
|  | 			tmp.bytes[BLKG_IOSTAT_DISCARD] += | ||||||
|  | 				cpu_dkstats->sectors[STAT_DISCARD] << 9; | ||||||
|  | 
 | ||||||
|  | 			u64_stats_update_begin(&blkg->iostat.sync); | ||||||
|  | 			blkg_iostat_set(&blkg->iostat.cur, &tmp); | ||||||
|  | 			u64_stats_update_end(&blkg->iostat.sync); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int blkcg_print_stat(struct seq_file *sf, void *v) | static int blkcg_print_stat(struct seq_file *sf, void *v) | ||||||
| { | { | ||||||
| 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); | 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); | ||||||
| 	struct blkcg_gq *blkg; | 	struct blkcg_gq *blkg; | ||||||
| 
 | 
 | ||||||
|  | 	if (!seq_css(sf)->parent) | ||||||
|  | 		blkcg_fill_root_iostats(); | ||||||
|  | 	else | ||||||
| 		cgroup_rstat_flush(blkcg->css.cgroup); | 		cgroup_rstat_flush(blkcg->css.cgroup); | ||||||
|  | 
 | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 
 | 
 | ||||||
| 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { | 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { | ||||||
| @ -876,7 +930,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) | |||||||
| static struct cftype blkcg_files[] = { | static struct cftype blkcg_files[] = { | ||||||
| 	{ | 	{ | ||||||
| 		.name = "stat", | 		.name = "stat", | ||||||
| 		.flags = CFTYPE_NOT_ON_ROOT, |  | ||||||
| 		.seq_show = blkcg_print_stat, | 		.seq_show = blkcg_print_stat, | ||||||
| 	}, | 	}, | ||||||
| 	{ }	/* terminate */ | 	{ }	/* terminate */ | ||||||
|  | |||||||
| @ -38,8 +38,6 @@ static struct kobject *block_depr; | |||||||
| static DEFINE_SPINLOCK(ext_devt_lock); | static DEFINE_SPINLOCK(ext_devt_lock); | ||||||
| static DEFINE_IDR(ext_devt_idr); | static DEFINE_IDR(ext_devt_idr); | ||||||
| 
 | 
 | ||||||
| static const struct device_type disk_type; |  | ||||||
| 
 |  | ||||||
| static void disk_check_events(struct disk_events *ev, | static void disk_check_events(struct disk_events *ev, | ||||||
| 			      unsigned int *clearing_ptr); | 			      unsigned int *clearing_ptr); | ||||||
| static void disk_alloc_events(struct gendisk *disk); | static void disk_alloc_events(struct gendisk *disk); | ||||||
| @ -1587,7 +1585,7 @@ static char *block_devnode(struct device *dev, umode_t *mode, | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct device_type disk_type = { | const struct device_type disk_type = { | ||||||
| 	.name		= "disk", | 	.name		= "disk", | ||||||
| 	.groups		= disk_attr_groups, | 	.groups		= disk_attr_groups, | ||||||
| 	.release	= disk_release, | 	.release	= disk_release, | ||||||
|  | |||||||
| @ -24,6 +24,7 @@ | |||||||
| #define disk_to_dev(disk)	(&(disk)->part0.__dev) | #define disk_to_dev(disk)	(&(disk)->part0.__dev) | ||||||
| #define part_to_dev(part)	(&((part)->__dev)) | #define part_to_dev(part)	(&((part)->__dev)) | ||||||
| 
 | 
 | ||||||
|  | extern const struct device_type disk_type; | ||||||
| extern struct device_type part_type; | extern struct device_type part_type; | ||||||
| extern struct class block_class; | extern struct class block_class; | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Boris Burkov
						Boris Burkov