Skip to content

Commit 9f25a8d

Browse files
committed
Merge branch 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - For cpustat, cgroup has a percpu hierarchical stat mechanism which propagates up the hierarchy lazily. This contains commits to factor out and generalize the mechanism so that it can be used for other cgroup stats too. The original intention was to update memcg stats to use it but memcg went for a different approach, so still the only user is cpustat. The factoring out and generalization still make sense and it's likely that this can be used for other purposes in the future. - cgroup uses kernfs_notify() (which uses fsnotify()) to inform user space of certain events. A rate limiting mechanism is added. - Other misc changes. * 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: css_set_lock should nest inside tasklist_lock rdmacg: Convert to use match_string() helper cgroup: Make cgroup_rstat_updated() ready for root cgroup usage cgroup: Add memory barriers to plug cgroup_rstat_updated() race window cgroup: Add cgroup_subsys->css_rstat_flush() cgroup: Replace cgroup_rstat_mutex with a spinlock cgroup: Factor out and expose cgroup_rstat_*() interface functions cgroup: Reorganize kernel/cgroup/rstat.c cgroup: Distinguish base resource stat implementation from rstat cgroup: Rename stat to rstat cgroup: Rename kernel/cgroup/stat.c to kernel/cgroup/rstat.c cgroup: Limit event generation frequency cgroup: Explicitly remove core interface files
2 parents 0bbddb8 + d8742e2 commit 9f25a8d

File tree

8 files changed

+554
-417
lines changed

8 files changed

+554
-417
lines changed

include/linux/cgroup-defs.h

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ enum {
105105
struct cgroup_file {
106106
/* do not access any fields from outside cgroup core */
107107
struct kernfs_node *kn;
108+
unsigned long notified_at;
109+
struct timer_list notify_timer;
108110
};
109111

110112
/*
@@ -128,6 +130,9 @@ struct cgroup_subsys_state {
128130
struct list_head sibling;
129131
struct list_head children;
130132

133+
/* flush target list anchored at cgrp->rstat_css_list */
134+
struct list_head rstat_css_node;
135+
131136
/*
132137
* PI: Subsys-unique ID. 0 is unused and root is always 1. The
133138
* matching css can be looked up using css_from_id().
@@ -256,12 +261,16 @@ struct css_set {
256261
struct rcu_head rcu_head;
257262
};
258263

264+
struct cgroup_base_stat {
265+
struct task_cputime cputime;
266+
};
267+
259268
/*
260-
* cgroup basic resource usage statistics. Accounting is done per-cpu in
261-
* cgroup_cpu_stat which is then lazily propagated up the hierarchy on
262-
* reads.
269+
* rstat - cgroup scalable recursive statistics. Accounting is done
270+
* per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
271+
* hierarchy on reads.
263272
*
264-
* When a stat gets updated, the cgroup_cpu_stat and its ancestors are
273+
* When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
265274
* linked into the updated tree. On the following read, propagation only
266275
* considers and consumes the updated tree. This makes reading O(the
267276
* number of descendants which have been active since last read) instead of
@@ -271,20 +280,24 @@ struct css_set {
271280
* aren't active and stat may be read frequently. The combination can
272281
* become very expensive. By propagating selectively, increasing reading
273282
* frequency decreases the cost of each read.
283+
*
284+
* This struct hosts both the fields which implement the above -
285+
* updated_children and updated_next - and the fields which track basic
286+
* resource statistics on top of it - bsync, bstat and last_bstat.
274287
*/
275-
struct cgroup_cpu_stat {
288+
struct cgroup_rstat_cpu {
276289
/*
277-
* ->sync protects all the current counters. These are the only
278-
* fields which get updated in the hot path.
290+
* ->bsync protects ->bstat. These are the only fields which get
291+
* updated in the hot path.
279292
*/
280-
struct u64_stats_sync sync;
281-
struct task_cputime cputime;
293+
struct u64_stats_sync bsync;
294+
struct cgroup_base_stat bstat;
282295

283296
/*
284297
* Snapshots at the last reading. These are used to calculate the
285298
* deltas to propagate to the global counters.
286299
*/
287-
struct task_cputime last_cputime;
300+
struct cgroup_base_stat last_bstat;
288301

289302
/*
290303
* Child cgroups with stat updates on this cpu since the last read
@@ -295,18 +308,12 @@ struct cgroup_cpu_stat {
295308
* to the cgroup makes it unnecessary for each per-cpu struct to
296309
* point back to the associated cgroup.
297310
*
298-
* Protected by per-cpu cgroup_cpu_stat_lock.
311+
* Protected by per-cpu cgroup_rstat_cpu_lock.
299312
*/
300313
struct cgroup *updated_children; /* terminated by self cgroup */
301314
struct cgroup *updated_next; /* NULL iff not on the list */
302315
};
303316

304-
struct cgroup_stat {
305-
/* per-cpu statistics are collected into the folowing global counters */
306-
struct task_cputime cputime;
307-
struct prev_cputime prev_cputime;
308-
};
309-
310317
struct cgroup {
311318
/* self css with NULL ->ss, points back to this cgroup */
312319
struct cgroup_subsys_state self;
@@ -406,10 +413,14 @@ struct cgroup {
406413
*/
407414
struct cgroup *dom_cgrp;
408415

416+
/* per-cpu recursive resource statistics */
417+
struct cgroup_rstat_cpu __percpu *rstat_cpu;
418+
struct list_head rstat_css_list;
419+
409420
/* cgroup basic resource statistics */
410-
struct cgroup_cpu_stat __percpu *cpu_stat;
411-
struct cgroup_stat pending_stat; /* pending from children */
412-
struct cgroup_stat stat;
421+
struct cgroup_base_stat pending_bstat; /* pending from children */
422+
struct cgroup_base_stat bstat;
423+
struct prev_cputime prev_cputime; /* for printing out cputime */
413424

414425
/*
415426
* list of pidlists, up to two for each namespace (one for procs, one
@@ -570,6 +581,7 @@ struct cgroup_subsys {
570581
void (*css_released)(struct cgroup_subsys_state *css);
571582
void (*css_free)(struct cgroup_subsys_state *css);
572583
void (*css_reset)(struct cgroup_subsys_state *css);
584+
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
573585
int (*css_extra_stat_show)(struct seq_file *seq,
574586
struct cgroup_subsys_state *css);
575587

include/linux/cgroup.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -690,11 +690,19 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
690690
char *buf, size_t buflen) {}
691691
#endif /* !CONFIG_CGROUPS */
692692

693+
#ifdef CONFIG_CGROUPS
693694
/*
694-
* Basic resource stats.
695+
* cgroup scalable recursive statistics.
695696
*/
696-
#ifdef CONFIG_CGROUPS
697+
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
698+
void cgroup_rstat_flush(struct cgroup *cgrp);
699+
void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
700+
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
701+
void cgroup_rstat_flush_release(void);
697702

703+
/*
704+
* Basic resource stats.
705+
*/
698706
#ifdef CONFIG_CGROUP_CPUACCT
699707
void cpuacct_charge(struct task_struct *tsk, u64 cputime);
700708
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);

kernel/cgroup/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0
2-
obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
2+
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
33

44
obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
55
obj-$(CONFIG_CGROUP_PIDS) += pids.o

kernel/cgroup/cgroup-internal.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,13 +201,12 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
201201
int cgroup_task_count(const struct cgroup *cgrp);
202202

203203
/*
204-
* stat.c
204+
* rstat.c
205205
*/
206-
void cgroup_stat_flush(struct cgroup *cgrp);
207-
int cgroup_stat_init(struct cgroup *cgrp);
208-
void cgroup_stat_exit(struct cgroup *cgrp);
209-
void cgroup_stat_show_cputime(struct seq_file *seq);
210-
void cgroup_stat_boot(void);
206+
int cgroup_rstat_init(struct cgroup *cgrp);
207+
void cgroup_rstat_exit(struct cgroup *cgrp);
208+
void cgroup_rstat_boot(void);
209+
void cgroup_base_stat_cputime_show(struct seq_file *seq);
211210

212211
/*
213212
* namespace.c

0 commit comments

Comments
 (0)