Skip to content

Commit ef45fe4

Browse files
boryasaxboe
authored andcommitted
blk-cgroup: show global disk stats in root cgroup io.stat
In order to improve consistency and usability in cgroup stat accounting, we would like to support the root cgroup's io.stat. Since the root cgroup has processes doing io even if the system has no explicitly created cgroups, we need to be careful to avoid overhead in that case. For that reason, the rstat algorithms don't handle the root cgroup, so just turning the file on wouldn't give correct statistics. To get around this, we simulate flushing the iostat struct by filling it out directly from global disk stats. The result is a root cgroup io.stat file consistent with both /proc/diskstats and io.stat. Note that in order to collect the disk stats, we needed to iterate over devices. To facilitate that, we had to change the linkage of a disk_type to external so that it can be used from blk-cgroup.c to iterate over disks. Suggested-by: Tejun Heo <[email protected]> Signed-off-by: Boris Burkov <[email protected]> Acked-by: Tejun Heo <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent cd1fc4b commit ef45fe4

File tree

4 files changed

+58
-7
lines changed

4 files changed

+58
-7
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,8 +1483,7 @@ IO Interface Files
14831483
~~~~~~~~~~~~~~~~~~
14841484

14851485
io.stat
1486-
A read-only nested-keyed file which exists on non-root
1487-
cgroups.
1486+
A read-only nested-keyed file.
14881487

14891488
Lines are keyed by $MAJ:$MIN device numbers and not ordered.
14901489
The following nested keys are defined.

block/blk-cgroup.c

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -782,12 +782,66 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
782782
rcu_read_unlock();
783783
}
784784

785+
/*
786+
* The rstat algorithms intentionally don't handle the root cgroup to avoid
787+
* incurring overhead when no cgroups are defined. For that reason,
788+
* cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
789+
* iostat in the root cgroup's blkcg_gq.
790+
*
791+
* However, we would like to re-use the printing code between the root and
792+
* non-root cgroups to the extent possible. For that reason, we simulate
793+
* flushing the root cgroup's stats by explicitly filling in the iostat
794+
* with disk level statistics.
795+
*/
796+
static void blkcg_fill_root_iostats(void)
797+
{
798+
struct class_dev_iter iter;
799+
struct device *dev;
800+
801+
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
802+
while ((dev = class_dev_iter_next(&iter))) {
803+
struct gendisk *disk = dev_to_disk(dev);
804+
struct hd_struct *part = disk_get_part(disk, 0);
805+
struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
806+
struct blkg_iostat tmp;
807+
int cpu;
808+
809+
memset(&tmp, 0, sizeof(tmp));
810+
for_each_possible_cpu(cpu) {
811+
struct disk_stats *cpu_dkstats;
812+
813+
cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
814+
tmp.ios[BLKG_IOSTAT_READ] +=
815+
cpu_dkstats->ios[STAT_READ];
816+
tmp.ios[BLKG_IOSTAT_WRITE] +=
817+
cpu_dkstats->ios[STAT_WRITE];
818+
tmp.ios[BLKG_IOSTAT_DISCARD] +=
819+
cpu_dkstats->ios[STAT_DISCARD];
820+
// convert sectors to bytes
821+
tmp.bytes[BLKG_IOSTAT_READ] +=
822+
cpu_dkstats->sectors[STAT_READ] << 9;
823+
tmp.bytes[BLKG_IOSTAT_WRITE] +=
824+
cpu_dkstats->sectors[STAT_WRITE] << 9;
825+
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
826+
cpu_dkstats->sectors[STAT_DISCARD] << 9;
827+
828+
u64_stats_update_begin(&blkg->iostat.sync);
829+
blkg_iostat_set(&blkg->iostat.cur, &tmp);
830+
u64_stats_update_end(&blkg->iostat.sync);
831+
}
832+
}
833+
}
834+
785835
static int blkcg_print_stat(struct seq_file *sf, void *v)
786836
{
787837
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
788838
struct blkcg_gq *blkg;
789839

790-
cgroup_rstat_flush(blkcg->css.cgroup);
840+
if (!seq_css(sf)->parent)
841+
blkcg_fill_root_iostats();
842+
else
843+
cgroup_rstat_flush(blkcg->css.cgroup);
844+
791845
rcu_read_lock();
792846

793847
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -876,7 +930,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
876930
static struct cftype blkcg_files[] = {
877931
{
878932
.name = "stat",
879-
.flags = CFTYPE_NOT_ON_ROOT,
880933
.seq_show = blkcg_print_stat,
881934
},
882935
{ } /* terminate */

block/genhd.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ static struct kobject *block_depr;
3838
static DEFINE_SPINLOCK(ext_devt_lock);
3939
static DEFINE_IDR(ext_devt_idr);
4040

41-
static const struct device_type disk_type;
42-
4341
static void disk_check_events(struct disk_events *ev,
4442
unsigned int *clearing_ptr);
4543
static void disk_alloc_events(struct gendisk *disk);
@@ -1587,7 +1585,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
15871585
return NULL;
15881586
}
15891587

1590-
static const struct device_type disk_type = {
1588+
const struct device_type disk_type = {
15911589
.name = "disk",
15921590
.groups = disk_attr_groups,
15931591
.release = disk_release,

include/linux/genhd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#define disk_to_dev(disk) (&(disk)->part0.__dev)
2525
#define part_to_dev(part) (&((part)->__dev))
2626

27+
extern const struct device_type disk_type;
2728
extern struct device_type part_type;
2829
extern struct class block_class;
2930

0 commit comments

Comments
 (0)