@@ -215,7 +215,13 @@ struct cpuset_remove_tasks_struct {
215215};
216216
217217/*
218- * Exclusive CPUs distributed out to sub-partitions of top_cpuset
218+ * For local partitions, update to subpartitions_cpus & isolated_cpus is done
219+ * in update_parent_effective_cpumask(). For remote partitions, it is done in
220+ * the remote_partition_*() and remote_cpus_update() helpers.
221+ */
222+ /*
223+ * Exclusive CPUs distributed out to local or remote sub-partitions of
224+ * top_cpuset
219225 */
220226static cpumask_var_t subpartitions_cpus ;
221227
@@ -1359,9 +1365,14 @@ void rebuild_sched_domains(void)
13591365 *
13601366 * Iterate through each task of @cs updating its cpus_allowed to the
13611367 * effective cpuset's. As this function is called with cpuset_mutex held,
1362- * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
1363- * is used instead of effective_cpus to make sure all offline CPUs are also
1364- * included as hotplug code won't update cpumasks for tasks in top_cpuset.
1368+ * cpuset membership stays stable.
1369+ *
1370+ * For top_cpuset, task_cpu_possible_mask() is used instead of effective_cpus
1371+ * to make sure all offline CPUs are also included as hotplug code won't
1372+ * update cpumasks for tasks in top_cpuset.
1373+ *
1374+ * As task_cpu_possible_mask() can be task dependent in arm64, we have to
1375+ * do cpu masking per task instead of doing it once for all.
13651376 */
13661377static void update_tasks_cpumask (struct cpuset * cs , struct cpumask * new_cpus )
13671378{
@@ -1423,7 +1434,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
14231434 *
14241435 * Return: 0 if successful, an error code otherwise
14251436 */
1426- static int update_partition_exclusive (struct cpuset * cs , int new_prs )
1437+ static int update_partition_exclusive_flag (struct cpuset * cs , int new_prs )
14271438{
14281439 bool exclusive = (new_prs > PRS_MEMBER );
14291440
@@ -1506,12 +1517,12 @@ static void reset_partition_data(struct cpuset *cs)
15061517}
15071518
15081519/*
1509- * partition_xcpus_newstate - Exclusive CPUs state change
1520+ * isolated_cpus_update - Update the isolated_cpus mask
15101521 * @old_prs: old partition_root_state
15111522 * @new_prs: new partition_root_state
15121523 * @xcpus: exclusive CPUs with state change
15131524 */
1514- static void partition_xcpus_newstate (int old_prs , int new_prs , struct cpumask * xcpus )
1525+ static void isolated_cpus_update (int old_prs , int new_prs , struct cpumask * xcpus )
15151526{
15161527 WARN_ON_ONCE (old_prs == new_prs );
15171528 if (new_prs == PRS_ISOLATED )
@@ -1545,8 +1556,8 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
15451556
15461557 isolcpus_updated = (new_prs != parent -> partition_root_state );
15471558 if (isolcpus_updated )
1548- partition_xcpus_newstate (parent -> partition_root_state , new_prs ,
1549- xcpus );
1559+ isolated_cpus_update (parent -> partition_root_state , new_prs ,
1560+ xcpus );
15501561
15511562 cpumask_andnot (parent -> effective_cpus , parent -> effective_cpus , xcpus );
15521563 return isolcpus_updated ;
@@ -1576,8 +1587,8 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
15761587
15771588 isolcpus_updated = (old_prs != parent -> partition_root_state );
15781589 if (isolcpus_updated )
1579- partition_xcpus_newstate (old_prs , parent -> partition_root_state ,
1580- xcpus );
1590+ isolated_cpus_update (old_prs , parent -> partition_root_state ,
1591+ xcpus );
15811592
15821593 cpumask_and (xcpus , xcpus , cpu_active_mask );
15831594 cpumask_or (parent -> effective_cpus , parent -> effective_cpus , xcpus );
@@ -1906,8 +1917,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
19061917 int old_prs , new_prs ;
19071918 int part_error = PERR_NONE ; /* Partition error? */
19081919 int subparts_delta = 0 ;
1909- struct cpumask * xcpus ; /* cs effective_xcpus */
19101920 int isolcpus_updated = 0 ;
1921+ struct cpumask * xcpus = user_xcpus (cs );
19111922 bool nocpu ;
19121923
19131924 lockdep_assert_held (& cpuset_mutex );
@@ -1919,7 +1930,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
19191930 */
19201931 adding = deleting = false;
19211932 old_prs = new_prs = cs -> partition_root_state ;
1922- xcpus = user_xcpus (cs );
19231933
19241934 if (cmd == partcmd_invalidate ) {
19251935 if (is_prs_invalid (old_prs ))
@@ -2133,7 +2143,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
21332143 * CPU lists in cs haven't been updated yet. So defer it to later.
21342144 */
21352145 if ((old_prs != new_prs ) && (cmd != partcmd_update )) {
2136- int err = update_partition_exclusive (cs , new_prs );
2146+ int err = update_partition_exclusive_flag (cs , new_prs );
21372147
21382148 if (err )
21392149 return err ;
@@ -2171,7 +2181,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
21712181 update_unbound_workqueue_cpumask (isolcpus_updated );
21722182
21732183 if ((old_prs != new_prs ) && (cmd == partcmd_update ))
2174- update_partition_exclusive (cs , new_prs );
2184+ update_partition_exclusive_flag (cs , new_prs );
21752185
21762186 if (adding || deleting ) {
21772187 update_tasks_cpumask (parent , tmp -> addmask );
@@ -3140,7 +3150,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
31403150 int err = PERR_NONE , old_prs = cs -> partition_root_state ;
31413151 struct cpuset * parent = parent_cs (cs );
31423152 struct tmpmasks tmpmask ;
3143- bool new_xcpus_state = false;
3153+ bool isolcpus_updated = false;
31443154
31453155 if (old_prs == new_prs )
31463156 return 0 ;
@@ -3154,7 +3164,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
31543164 if (alloc_cpumasks (NULL , & tmpmask ))
31553165 return - ENOMEM ;
31563166
3157- err = update_partition_exclusive (cs , new_prs );
3167+ err = update_partition_exclusive_flag (cs , new_prs );
31583168 if (err )
31593169 goto out ;
31603170
@@ -3182,8 +3192,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
31823192 } else if (old_prs && new_prs ) {
31833193 /*
31843194 * A change in load balance state only, no change in cpumasks.
3195+ * Need to update isolated_cpus.
31853196 */
3186- new_xcpus_state = true;
3197+ isolcpus_updated = true;
31873198 } else {
31883199 /*
31893200 * Switching back to member is always allowed even if it
@@ -3207,22 +3218,26 @@ static int update_prstate(struct cpuset *cs, int new_prs)
32073218 */
32083219 if (err ) {
32093220 new_prs = - new_prs ;
3210- update_partition_exclusive (cs , new_prs );
3221+ update_partition_exclusive_flag (cs , new_prs );
32113222 }
32123223
32133224 spin_lock_irq (& callback_lock );
32143225 cs -> partition_root_state = new_prs ;
32153226 WRITE_ONCE (cs -> prs_err , err );
32163227 if (!is_partition_valid (cs ))
32173228 reset_partition_data (cs );
3218- else if (new_xcpus_state )
3219- partition_xcpus_newstate (old_prs , new_prs , cs -> effective_xcpus );
3229+ else if (isolcpus_updated )
3230+ isolated_cpus_update (old_prs , new_prs , cs -> effective_xcpus );
32203231 spin_unlock_irq (& callback_lock );
3221- update_unbound_workqueue_cpumask (new_xcpus_state );
3232+ update_unbound_workqueue_cpumask (isolcpus_updated );
32223233
3223- /* Force update if switching back to member */
3234+ /* Force update if switching back to member & update effective_xcpus */
32243235 update_cpumasks_hier (cs , & tmpmask , !new_prs );
32253236
3237+ /* A newly created partition must have effective_xcpus set */
3238+ WARN_ON_ONCE (!old_prs && (new_prs > 0 )
3239+ && cpumask_empty (cs -> effective_xcpus ));
3240+
32263241 /* Update sched domains and load balance flag */
32273242 update_partition_sd_lb (cs , old_prs );
32283243
0 commit comments