Skip to content

Commit 4d85b78

Browse files
bgoglinndenoyelle
authored andcommitted
linux: add HWLOC_USE_NUMA_DISTANCES envvar to disable new node locality heuristics
1) Some SLIT tables are buggy for NVDIMMs nodes (asymmetric and wrong). 2) Current Linux kernels (at least up to 5.3) may create conflicting node target/initiators when proximity domains and OS node indexes are different (e.g. when PXM 0 and 1 in CPU0 are nodes 0 and 2 in Linux on dual-CLX platforms in SNC mode). This envvar will allow working around such hardware/software bugs. By default, the envvar is 7, which means SLIT is gathered and (1) and (2) are enabled. Removing bit 0 disables SLIT entirely. Removing bit 1 disables (1). Removing bit 2 disables (2). Signed-off-by: Brice Goglin <[email protected]>
1 parent 4669bb9 commit 4d85b78

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

doc/hwloc.doxy

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,17 @@ following environment variables.
10021002
actual displaying of these error messages.
10031003
</dd>
10041004

1005+
<dt>HWLOC_USE_NUMA_DISTANCES=7</dt>
1006+
<dd>enables or disables the use of NUMA distances.
1007+
NUMA distances and memory target/initiator information may be used
1008+
to improve the locality of NUMA nodes, especially CPU-less nodes.
1009+
Bits in the value of this environment variable enable different features:
1010+
Bit 0 enables the gathering of NUMA distances from the operating system.
1011+
Bit 1 further enables the use of NUMA distances to improve the
1012+
locality of CPU-less nodes.
1013+
Bit 2 enables the use of target/initiator information.
1014+
</dd>
1015+
10051016
<dt>HWLOC_GROUPING=1</dt>
10061017
<dd>enables or disables objects grouping based on distances.
10071018
By default, hwloc uses distance matrices between objects (either read

hwloc/topology-linux.c

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ struct hwloc_linux_backend_data_s {
5757
int is_knl;
5858
int is_amd_with_CU;
5959
int use_dt;
60+
int use_numa_distances;
61+
int use_numa_distances_for_cpuless;
62+
int use_numa_initiators;
6063
struct utsname utsname; /* fields contain \0 when unknown */
6164
int fallback_nbprocessors; /* only used in hwloc_linux_fallback_pu_level(), maybe be <= 0 (error) earlier */
6265
unsigned pagesize;
@@ -4008,7 +4011,9 @@ annotate_sysfsnode(struct hwloc_topology *topology,
40084011
topology->support.discovery->numa_memory = 1;
40094012
topology->support.discovery->disallowed_numa = 1;
40104013

4011-
if (nbnodes >= 2 && !hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd)) {
4014+
if (nbnodes >= 2
4015+
&& data->use_numa_distances
4016+
&& !hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd)) {
40124017
hwloc_internal_distances_add(topology, "NUMALatency", nbnodes, nodes, distances,
40134018
HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY,
40144019
HWLOC_DISTANCES_ADD_FLAG_GROUP);
@@ -4188,6 +4193,10 @@ look_sysfsnode(struct hwloc_topology *topology,
41884193
/* failed to read/create some nodes, don't bother reading/fixing
41894194
* a distance matrix that would likely be wrong anyway.
41904195
*/
4196+
data->use_numa_distances = 0;
4197+
}
4198+
4199+
if (!data->use_numa_distances) {
41914200
free(distances);
41924201
distances = NULL;
41934202
}
@@ -4223,7 +4232,8 @@ look_sysfsnode(struct hwloc_topology *topology,
42234232
if (node && !hwloc_bitmap_iszero(node->cpuset)) {
42244233
hwloc_obj_t tree;
42254234
/* update from HMAT initiators if any */
4226-
read_node_initiators(data, node, nbnodes, nodes, path);
4235+
if (data->use_numa_initiators)
4236+
read_node_initiators(data, node, nbnodes, nodes, path);
42274237

42284238
tree = node;
42294239
if (need_memcaches)
@@ -4241,12 +4251,13 @@ look_sysfsnode(struct hwloc_topology *topology,
42414251
if (node && hwloc_bitmap_iszero(node->cpuset)) {
42424252
hwloc_obj_t tree;
42434253
/* update from HMAT initiators if any */
4244-
if (!read_node_initiators(data, node, nbnodes, nodes, path))
4245-
if (!hwloc_bitmap_iszero(node->cpuset))
4246-
goto fixed;
4254+
if (data->use_numa_initiators)
4255+
if (!read_node_initiators(data, node, nbnodes, nodes, path))
4256+
if (!hwloc_bitmap_iszero(node->cpuset))
4257+
goto fixed;
42474258

42484259
/* if HMAT didn't help, try to find locality of CPU-less NUMA nodes by looking at their distances */
4249-
if (distances)
4260+
if (distances && data->use_numa_distances_for_cpuless)
42504261
fixup_cpuless_node_locality_from_distances(i, nbnodes, nodes, distances);
42514262

42524263
fixed:
@@ -6884,6 +6895,17 @@ hwloc_linux_component_instantiate(struct hwloc_topology *topology,
68846895
if (!data->dumped_hwdata_dirname)
68856896
data->dumped_hwdata_dirname = (char *) RUNSTATEDIR "/hwloc/";
68866897

6898+
data->use_numa_distances = 1;
6899+
data->use_numa_distances_for_cpuless = 1;
6900+
data->use_numa_initiators = 1;
6901+
env = getenv("HWLOC_USE_NUMA_DISTANCES");
6902+
if (env) {
6903+
unsigned val = atoi(env);
6904+
data->use_numa_distances = !!(val & 3); /* 2 implies 1 */
6905+
data->use_numa_distances_for_cpuless = !!(val & 2);
6906+
data->use_numa_initiators = !!(val & 4);
6907+
}
6908+
68876909
env = getenv("HWLOC_USE_DT");
68886910
if (env)
68896911
data->use_dt = atoi(env);

0 commit comments

Comments
 (0)