Skip to content

Commit 73e75e6

Browse files
Werkovhtejun
authored andcommitted
cgroup/pids: Separate semantics of pids.events related to pids.max
Currently, when pids.max limit is breached in the hierarchy, the event is counted and reported in the cgroup where the forking task resides. This decouples the limit and the notification caused by the limit making it hard to detect when the actual limit was effected. Redefine the pids.events:max as: the number of times the limit of the cgroup was hit. (Implementation differentiates also "forkfail" event but this is currently not exposed as it would better fit into pids.stat. It also differs from pids.events:max only when pids.max is configured on non-leaf cgroups.) Since it changes semantics of the original "max" event, introduce this change only in the v2 API of the controller and add a cgroup2 mount option to revert to the legacy behavior. Signed-off-by: Michal Koutný <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 0ac3800 commit 73e75e6

File tree

5 files changed

+64
-18
lines changed

5 files changed

+64
-18
lines changed

Documentation/admin-guide/cgroup-v1/pids.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ superset of parent/child/pids.current.
3636

3737
The pids.events file contains event counters:
3838

39-
- max: Number of times fork failed because limit was hit.
39+
- max: Number of times fork failed in the cgroup because limit was hit in
40+
self or ancestors.
4041

4142
Example
4243
-------

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ cgroup v2 currently supports the following mount options.
239239
will not be tracked by the memory controller (even if cgroup
240240
v2 is remounted later on).
241241

242+
pids_localevents
243+
Represent fork failures inside cgroup's pids.events:max (v1 behavior),
244+
not its limit being hit (v2 behavior).
245+
242246

243247
Organizing Processes and Threads
244248
--------------------------------
@@ -2205,12 +2209,13 @@ PID Interface Files
22052209
descendants has ever reached.
22062210

22072211
pids.events
2208-
A read-only flat-keyed file which exists on non-root cgroups. The
2209-
following entries are defined. Unless specified otherwise, a value
2210-
change in this file generates a file modified event.
2212+
A read-only flat-keyed file which exists on non-root cgroups. Unless
2213+
specified otherwise, a value change in this file generates a file
2214+
modified event. The following entries are defined.
22112215

22122216
max
2213-
Number of times fork failed because limit was hit.
2217+
The number of times the cgroup's number of processes hit the
2218+
limit (see also pids_localevents).
22142219

22152220
Organisational operations are not blocked by cgroup policies, so it is
22162221
possible to have pids.current > pids.max. This can be done by either

include/linux/cgroup-defs.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,12 @@ enum {
119119
/*
120120
* Enable hugetlb accounting for the memory controller.
121121
*/
122-
CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
122+
CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
123+
124+
/*
125+
* Enable legacy local pids.events.
126+
*/
127+
CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20),
123128
};
124129

125130
/* cftype->flags */

kernel/cgroup/cgroup.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,7 @@ enum cgroup2_param {
19221922
Opt_memory_localevents,
19231923
Opt_memory_recursiveprot,
19241924
Opt_memory_hugetlb_accounting,
1925+
Opt_pids_localevents,
19251926
nr__cgroup2_params
19261927
};
19271928

@@ -1931,6 +1932,7 @@ static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
19311932
fsparam_flag("memory_localevents", Opt_memory_localevents),
19321933
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
19331934
fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting),
1935+
fsparam_flag("pids_localevents", Opt_pids_localevents),
19341936
{}
19351937
};
19361938

@@ -1960,6 +1962,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
19601962
case Opt_memory_hugetlb_accounting:
19611963
ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
19621964
return 0;
1965+
case Opt_pids_localevents:
1966+
ctx->flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
1967+
return 0;
19631968
}
19641969
return -EINVAL;
19651970
}
@@ -1989,6 +1994,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
19891994
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
19901995
else
19911996
cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
1997+
1998+
if (root_flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
1999+
cgrp_dfl_root.flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS;
2000+
else
2001+
cgrp_dfl_root.flags &= ~CGRP_ROOT_PIDS_LOCAL_EVENTS;
19922002
}
19932003
}
19942004

@@ -2004,6 +2014,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
20042014
seq_puts(seq, ",memory_recursiveprot");
20052015
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
20062016
seq_puts(seq, ",memory_hugetlb_accounting");
2017+
if (cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
2018+
seq_puts(seq, ",pids_localevents");
20072019
return 0;
20082020
}
20092021

@@ -7062,7 +7074,8 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
70627074
"favordynmods\n"
70637075
"memory_localevents\n"
70647076
"memory_recursiveprot\n"
7065-
"memory_hugetlb_accounting\n");
7077+
"memory_hugetlb_accounting\n"
7078+
"pids_localevents\n");
70667079
}
70677080
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
70687081

kernel/cgroup/pids.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
3939
#define PIDS_MAX_STR "max"
4040

41+
enum pidcg_event {
42+
/* Fork failed in subtree because this pids_cgroup limit was hit. */
43+
PIDCG_MAX,
44+
/* Fork failed in this pids_cgroup because ancestor limit was hit. */
45+
PIDCG_FORKFAIL,
46+
NR_PIDCG_EVENTS,
47+
};
48+
4149
struct pids_cgroup {
4250
struct cgroup_subsys_state css;
4351

@@ -52,8 +60,7 @@ struct pids_cgroup {
5260
/* Handle for "pids.events" */
5361
struct cgroup_file events_file;
5462

55-
/* Number of times fork failed because limit was hit. */
56-
atomic64_t events_limit;
63+
atomic64_t events[NR_PIDCG_EVENTS];
5764
};
5865

5966
static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
@@ -148,12 +155,13 @@ static void pids_charge(struct pids_cgroup *pids, int num)
148155
* pids_try_charge - hierarchically try to charge the pid count
149156
* @pids: the pid cgroup state
150157
* @num: the number of pids to charge
158+
* @fail: storage of pid cgroup causing the fail
151159
*
152160
* This function follows the set limit. It will fail if the charge would cause
153161
* the new value to exceed the hierarchical limit. Returns 0 if the charge
154162
* succeeded, otherwise -EAGAIN.
155163
*/
156-
static int pids_try_charge(struct pids_cgroup *pids, int num)
164+
static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail)
157165
{
158166
struct pids_cgroup *p, *q;
159167

@@ -166,9 +174,10 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
166174
* p->limit is %PIDS_MAX then we know that this test will never
167175
* fail.
168176
*/
169-
if (new > limit)
177+
if (new > limit) {
178+
*fail = p;
170179
goto revert;
171-
180+
}
172181
/*
173182
* Not technically accurate if we go over limit somewhere up
174183
* the hierarchy, but that's tolerable for the watermark.
@@ -236,23 +245,31 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
236245
static int pids_can_fork(struct task_struct *task, struct css_set *cset)
237246
{
238247
struct cgroup_subsys_state *css;
239-
struct pids_cgroup *pids;
248+
struct pids_cgroup *pids, *pids_over_limit;
240249
int err;
241250

242251
if (cset)
243252
css = cset->subsys[pids_cgrp_id];
244253
else
245254
css = task_css_check(current, pids_cgrp_id, true);
246255
pids = css_pids(css);
247-
err = pids_try_charge(pids, 1);
256+
err = pids_try_charge(pids, 1, &pids_over_limit);
248257
if (err) {
249-
/* Only log the first time events_limit is incremented. */
250-
if (atomic64_inc_return(&pids->events_limit) == 1) {
258+
/* compatibility on v1 where events were notified in leaves. */
259+
if (!cgroup_subsys_on_dfl(pids_cgrp_subsys))
260+
pids_over_limit = pids;
261+
262+
/* Only log the first time limit is hit. */
263+
if (atomic64_inc_return(&pids->events[PIDCG_FORKFAIL]) == 1) {
251264
pr_info("cgroup: fork rejected by pids controller in ");
252-
pr_cont_cgroup_path(css->cgroup);
265+
pr_cont_cgroup_path(pids->css.cgroup);
253266
pr_cont("\n");
254267
}
268+
atomic64_inc(&pids_over_limit->events[PIDCG_MAX]);
269+
255270
cgroup_file_notify(&pids->events_file);
271+
if (pids_over_limit != pids)
272+
cgroup_file_notify(&pids_over_limit->events_file);
256273
}
257274
return err;
258275
}
@@ -340,8 +357,13 @@ static s64 pids_peak_read(struct cgroup_subsys_state *css,
340357
static int pids_events_show(struct seq_file *sf, void *v)
341358
{
342359
struct pids_cgroup *pids = css_pids(seq_css(sf));
360+
enum pidcg_event pe = PIDCG_MAX;
361+
362+
if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) ||
363+
cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS)
364+
pe = PIDCG_FORKFAIL;
343365

344-
seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
366+
seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events[pe]));
345367
return 0;
346368
}
347369

0 commit comments

Comments
 (0)