Skip to content

Commit 8aae636

Browse files
mwbringmannZhengShunQian
authored andcommitted
powerpc/numa: Ensure nodes initialized for hotplug
[ Upstream commit ea05ba7 ] This patch fixes some problems encountered at runtime with configurations that support memory-less nodes, or that hot-add CPUs into nodes that are memoryless during system execution after boot. The problems of interest include: * Nodes known to powerpc to be memoryless at boot, but to have CPUs in them are allowed to be 'possible' and 'online'. Memory allocations for those nodes are taken from another node that does have memory until and if memory is hot-added to the node. * Nodes which have no resources assigned at boot, but which may still be referenced subsequently by affinity or associativity attributes, are kept in the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs to the system can reference these nodes and bring them online instead of redirecting the references to one of the set of nodes known to have memory at boot. Note that this software operates under the context of CPU hotplug. We are not doing memory hotplug in this code, but rather updating the kernel's CPU topology (i.e. arch_update_cpu_topology / numa_update_cpu_topology). We are initializing a node that may be used by CPUs or memory before it can be referenced as invalid by a CPU hotplug operation. CPU hotplug operations are protected by a range of APIs including cpu_maps_update_begin/cpu_maps_update_done, cpus_read/write_lock / cpus_read/write_unlock, device locks, and more. Memory hotplug operations, including try_online_node, are protected by mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the case of CPUs being hot-added to a previously memoryless node, the try_online_node operation occurs wholly within the CPU locks with no overlap. Using HMC hot-add/hot-remove operations, we have been able to add and remove CPUs to any possible node without failures. HMC operations involve a degree self-serialization, though. Signed-off-by: Michael Bringmann <[email protected]> Reviewed-by: Nathan Fontenot <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent ba6b1d0 commit 8aae636

File tree

1 file changed

+37
-10
lines changed

1 file changed

+37
-10
lines changed

arch/powerpc/mm/numa.c

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long lcpu)
551551
nid = of_node_to_nid_single(cpu);
552552

553553
out_present:
554-
if (nid < 0 || !node_online(nid))
554+
if (nid < 0 || !node_possible(nid))
555555
nid = first_online_node;
556556

557557
map_cpu_to_node(lcpu, nid);
@@ -969,10 +969,8 @@ static void __init find_possible_nodes(void)
969969
goto out;
970970

971971
for (i = 0; i < numnodes; i++) {
972-
if (!node_possible(i)) {
973-
setup_node_data(i, 0, 0);
972+
if (!node_possible(i))
974973
node_set(i, node_possible_map);
975-
}
976974
}
977975

978976
out:
@@ -1335,6 +1333,40 @@ static long vphn_get_associativity(unsigned long cpu,
13351333
return rc;
13361334
}
13371335

1336+
static inline int find_and_online_cpu_nid(int cpu)
1337+
{
1338+
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
1339+
int new_nid;
1340+
1341+
/* Use associativity from first thread for all siblings */
1342+
vphn_get_associativity(cpu, associativity);
1343+
new_nid = associativity_to_nid(associativity);
1344+
if (new_nid < 0 || !node_possible(new_nid))
1345+
new_nid = first_online_node;
1346+
1347+
if (NODE_DATA(new_nid) == NULL) {
1348+
#ifdef CONFIG_MEMORY_HOTPLUG
1349+
/*
1350+
* Need to ensure that NODE_DATA is initialized for a node from
1351+
* available memory (see memblock_alloc_try_nid). If unable to
1352+
* init the node, then default to nearest node that has memory
1353+
* installed.
1354+
*/
1355+
if (try_online_node(new_nid))
1356+
new_nid = first_online_node;
1357+
#else
1358+
/*
1359+
* Default to using the nearest node that has memory installed.
1360+
* Otherwise, it would be necessary to patch the kernel MM code
1361+
* to deal with more memoryless-node error conditions.
1362+
*/
1363+
new_nid = first_online_node;
1364+
#endif
1365+
}
1366+
1367+
return new_nid;
1368+
}
1369+
13381370
/*
13391371
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
13401372
* characteristics change. This function doesn't perform any locking and is
@@ -1400,7 +1432,6 @@ int arch_update_cpu_topology(void)
14001432
{
14011433
unsigned int cpu, sibling, changed = 0;
14021434
struct topology_update_data *updates, *ud;
1403-
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
14041435
cpumask_t updated_cpus;
14051436
struct device *dev;
14061437
int weight, new_nid, i = 0;
@@ -1435,11 +1466,7 @@ int arch_update_cpu_topology(void)
14351466
continue;
14361467
}
14371468

1438-
/* Use associativity from first thread for all siblings */
1439-
vphn_get_associativity(cpu, associativity);
1440-
new_nid = associativity_to_nid(associativity);
1441-
if (new_nid < 0 || !node_online(new_nid))
1442-
new_nid = first_online_node;
1469+
new_nid = find_and_online_cpu_nid(cpu);
14431470

14441471
if (new_nid == numa_cpu_lookup_table[cpu]) {
14451472
cpumask_andnot(&cpu_associativity_changes_mask,

0 commit comments

Comments
 (0)