Skip to content

Commit e2d5990

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective
Currently, a partition root cannot have empty "cpuset.cpus.effective". As a result, a parent partition root cannot distribute out all its CPUs to child partitions with no CPUs left. However in most cases, there shouldn't be any tasks associated with intermediate nodes of the default hierarchy. So the current rule is too restrictive and can waste valuable CPU resource. To address this issue, we are now allowing a partition to have empty "cpuset.cpus.effective" as long as it has no task. Since cpuset is threaded, no-internal-process rule does not apply. So it is possible to have tasks in a partition root with child sub-partitions even though that should be uncommon. A parent partition with no task can now have all its CPUs distributed out to its child partitions. The top cpuset always have some house-keeping tasks running and so its list of effective cpu can't be empty. Once a partition with empty "cpuset.cpus.effective" is formed, no new task can be moved into it until "cpuset.cpus.effective" becomes non-empty. Signed-off-by: Waiman Long <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 18065eb commit e2d5990

File tree

1 file changed

+84
-25
lines changed

1 file changed

+84
-25
lines changed

kernel/cgroup/cpuset.c

Lines changed: 84 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,41 @@ static inline bool is_in_v2_mode(void)
416416
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
417417
}
418418

419+
/**
420+
* partition_is_populated - check if partition has tasks
421+
* @cs: partition root to be checked
422+
* @excluded_child: a child cpuset to be excluded in task checking
423+
* Return: true if there are tasks, false otherwise
424+
*
425+
* It is assumed that @cs is a valid partition root. @excluded_child should
426+
* be non-NULL when this cpuset is going to become a partition itself.
427+
*/
428+
static inline bool partition_is_populated(struct cpuset *cs,
429+
struct cpuset *excluded_child)
430+
{
431+
struct cgroup_subsys_state *css;
432+
struct cpuset *child;
433+
434+
if (cs->css.cgroup->nr_populated_csets)
435+
return true;
436+
if (!excluded_child && !cs->nr_subparts_cpus)
437+
return cgroup_is_populated(cs->css.cgroup);
438+
439+
rcu_read_lock();
440+
cpuset_for_each_child(child, css, cs) {
441+
if (child == excluded_child)
442+
continue;
443+
if (is_partition_valid(child))
444+
continue;
445+
if (cgroup_is_populated(child->css.cgroup)) {
446+
rcu_read_unlock();
447+
return true;
448+
}
449+
}
450+
rcu_read_unlock();
451+
return false;
452+
}
453+
419454
/*
420455
* Return in pmask the portion of a task's cpusets's cpus_allowed that
421456
* are online and are capable of running the task. If none are found,
@@ -1257,22 +1292,27 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
12571292
return -EBUSY;
12581293

12591294
/*
1260-
* Enabling partition root is not allowed if not all the CPUs
1261-
* can be granted from parent's effective_cpus or at least one
1262-
* CPU will be left after that.
1263-
*/
1264-
if ((cmd == partcmd_enable) &&
1265-
(!cpumask_subset(cs->cpus_allowed, parent->effective_cpus) ||
1266-
cpumask_equal(cs->cpus_allowed, parent->effective_cpus)))
1267-
return -EINVAL;
1268-
1269-
/*
1270-
* A cpumask update cannot make parent's effective_cpus become empty.
12711295
* new_prs will only be changed for the partcmd_update command.
12721296
*/
12731297
adding = deleting = false;
12741298
old_prs = new_prs = cs->partition_root_state;
12751299
if (cmd == partcmd_enable) {
1300+
/*
1301+
* Enabling partition root is not allowed if not all the CPUs
1302+
* can be granted from parent's effective_cpus.
1303+
*/
1304+
if (!cpumask_subset(cs->cpus_allowed, parent->effective_cpus))
1305+
return -EINVAL;
1306+
1307+
/*
1308+
* A parent can be left with no CPU as long as there is no
1309+
* task directly associated with the parent partition. For
1310+
* such a parent, no new task can be moved into it.
1311+
*/
1312+
if (cpumask_equal(cs->cpus_allowed, parent->effective_cpus) &&
1313+
partition_is_populated(parent, cs))
1314+
return -EINVAL;
1315+
12761316
cpumask_copy(tmp->addmask, cs->cpus_allowed);
12771317
adding = true;
12781318
} else if (cmd == partcmd_disable) {
@@ -1294,10 +1334,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
12941334
adding = cpumask_andnot(tmp->addmask, tmp->addmask,
12951335
parent->subparts_cpus);
12961336
/*
1297-
* Return error if the new effective_cpus could become empty.
1337+
* Return error if the new effective_cpus could become empty
1338+
* and there are tasks in the parent.
12981339
*/
12991340
if (adding &&
1300-
cpumask_equal(parent->effective_cpus, tmp->addmask)) {
1341+
cpumask_equal(parent->effective_cpus, tmp->addmask) &&
1342+
partition_is_populated(parent, cs)) {
13011343
if (!deleting)
13021344
return -EINVAL;
13031345
/*
@@ -1322,8 +1364,8 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
13221364
*/
13231365
adding = cpumask_and(tmp->addmask, cs->cpus_allowed,
13241366
parent->effective_cpus);
1325-
part_error = cpumask_equal(tmp->addmask,
1326-
parent->effective_cpus);
1367+
part_error = cpumask_equal(tmp->addmask, parent->effective_cpus) &&
1368+
partition_is_populated(parent, cs);
13271369
}
13281370

13291371
if (cmd == partcmd_update) {
@@ -1425,9 +1467,15 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
14251467

14261468
/*
14271469
* If it becomes empty, inherit the effective mask of the
1428-
* parent, which is guaranteed to have some CPUs.
1470+
* parent, which is guaranteed to have some CPUs unless
1471+
* it is a partition root that has explicitly distributed
1472+
* out all its CPUs.
14291473
*/
14301474
if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) {
1475+
if (is_partition_valid(cp) &&
1476+
cpumask_equal(cp->cpus_allowed, cp->subparts_cpus))
1477+
goto update_parent_subparts;
1478+
14311479
cpumask_copy(tmp->new_cpus, parent->effective_cpus);
14321480
if (!cp->use_parent_ecpus) {
14331481
cp->use_parent_ecpus = true;
@@ -1449,6 +1497,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
14491497
continue;
14501498
}
14511499

1500+
update_parent_subparts:
14521501
/*
14531502
* update_parent_subparts_cpumask() should have been called
14541503
* for cs already in update_cpumask(). We should also call
@@ -2254,6 +2303,12 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
22542303
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
22552304
goto out_unlock;
22562305

2306+
/*
2307+
* Task cannot be moved to a cpuset with empty effective cpus.
2308+
*/
2309+
if (cpumask_empty(cs->effective_cpus))
2310+
goto out_unlock;
2311+
22572312
cgroup_taskset_for_each(task, css, tset) {
22582313
ret = task_can_attach(task, cs->effective_cpus);
22592314
if (ret)
@@ -3119,7 +3174,8 @@ hotplug_update_tasks(struct cpuset *cs,
31193174
struct cpumask *new_cpus, nodemask_t *new_mems,
31203175
bool cpus_updated, bool mems_updated)
31213176
{
3122-
if (cpumask_empty(new_cpus))
3177+
/* A partition root is allowed to have empty effective cpus */
3178+
if (cpumask_empty(new_cpus) && !is_partition_valid(cs))
31233179
cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
31243180
if (nodes_empty(*new_mems))
31253181
*new_mems = parent_cs(cs)->effective_mems;
@@ -3188,10 +3244,11 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
31883244

31893245
/*
31903246
* In the unlikely event that a partition root has empty
3191-
* effective_cpus or its parent becomes invalid, we have to
3192-
* transition it to the invalid state.
3247+
* effective_cpus with tasks or its parent becomes invalid, we
3248+
* have to transition it to the invalid state.
31933249
*/
3194-
if (is_partition_valid(cs) && (cpumask_empty(&new_cpus) ||
3250+
if (is_partition_valid(cs) &&
3251+
((cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)) ||
31953252
is_partition_invalid(parent))) {
31963253
if (cs->nr_subparts_cpus) {
31973254
spin_lock_irq(&callback_lock);
@@ -3202,13 +3259,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
32023259
}
32033260

32043261
/*
3205-
* If the effective_cpus is empty because the child
3206-
* partitions take away all the CPUs, we can keep
3207-
* the current partition and let the child partitions
3208-
* fight for available CPUs.
3262+
* Force the partition to become invalid if either one of
3263+
* the following conditions hold:
3264+
* 1) empty effective cpus but not valid empty partition.
3265+
* 2) parent is invalid or doesn't grant any cpus to child
3266+
* partitions.
32093267
*/
32103268
if (is_partition_invalid(parent) ||
3211-
cpumask_empty(&new_cpus)) {
3269+
(cpumask_empty(&new_cpus) &&
3270+
partition_is_populated(cs, NULL))) {
32123271
int old_prs;
32133272

32143273
update_parent_subparts_cpumask(cs, partcmd_disable,

0 commit comments

Comments
 (0)