Skip to content

Commit e0e3c7c

Browse files
committed
linux/cpukinds: force homogeneity on NVIDIA Grace
Add a quirk to uniformize basefreq (to its minimum), maxfreq and capacity (to their maximum) and enable it on NVIDIA Grace. Also add the envvar HWLOC_CPUKINDS_HOMOGENEOUS to force that quirk (=1) or avoid it (=0). Closes #634 Signed-off-by: Brice Goglin <[email protected]>
1 parent 18a7005 commit e0e3c7c

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

doc/hwloc.doxy

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,13 @@ following environment variables.
12061206
If set to 0, max frequencies are entirely ignored.
12071207
</dd>
12081208

1209+
<dt>HWLOC_CPUKINDS_HOMOGENEOUS=0</dt>
1210+
<dd>uniformize max frequency, base frequency and Linux capacity to
1211+
force a single homogeneous kind of CPUs.
1212+
This is enabled by default on NVIDIA Grace but may be disabled
1213+
if set to 0 (or enabled on other platforms if set to 1).
1214+
</dd>
1215+
12091216
<dt>HWLOC_PCI_LOCALITY=&lt;domain/bus&gt; &lt;cpuset&gt;;...</dt>
12101217
<dt>HWLOC_PCI_LOCALITY=/path/to/pci/locality/file</dt>
12111218
<dd>changes the locality of I/O devices behing the specified PCI buses.

hwloc/topology-linux.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4606,6 +4606,64 @@ hwloc_linux_cpukinds_adjust_maxfreqs(unsigned nr_pus,
46064606
}
46074607
}
46084608

4609+
static void
4610+
hwloc_linux_cpukinds_force_homogeneous(struct hwloc_topology *topology,
4611+
unsigned nr_pus,
4612+
struct hwloc_linux_cpukinds_by_pu *by_pu)
4613+
{
4614+
unsigned i;
4615+
unsigned long base_freq = ULONG_MAX;
4616+
unsigned long max_freq = 0;
4617+
unsigned long capacity = 0;
4618+
for(i=0; i<nr_pus; i++) {
4619+
/* use the lowest base_freq for all cores */
4620+
if (by_pu[i].base_freq && by_pu[i].base_freq < base_freq)
4621+
base_freq = by_pu[i].base_freq;
4622+
/* use the highest max_freq for all cores */
4623+
if (by_pu[i].max_freq > max_freq)
4624+
max_freq = by_pu[i].max_freq;
4625+
/* use the highest capacity for all cores */
4626+
if (by_pu[i].capacity > capacity)
4627+
capacity = by_pu[i].capacity;
4628+
}
4629+
hwloc_debug("linux/cpukinds: forcing homogeneous max_freq %lu base_freq %lu capacity %lu\n",
4630+
max_freq, base_freq, capacity);
4631+
4632+
if (max_freq) {
4633+
hwloc_bitmap_t rootset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
4634+
if (rootset) {
4635+
char value[64];
4636+
snprintf(value, sizeof(value), "%lu", max_freq/1000);
4637+
hwloc_linux_cpukinds_register_one(topology, rootset,
4638+
HWLOC_CPUKIND_EFFICIENCY_UNKNOWN,
4639+
(char *) "FrequencyMaxMHz", value);
4640+
/* the cpuset is given to the callee */
4641+
}
4642+
}
4643+
if (base_freq != ULONG_MAX) {
4644+
hwloc_bitmap_t rootset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
4645+
if (rootset) {
4646+
char value[64];
4647+
snprintf(value, sizeof(value), "%lu", base_freq/1000);
4648+
hwloc_linux_cpukinds_register_one(topology, rootset,
4649+
HWLOC_CPUKIND_EFFICIENCY_UNKNOWN,
4650+
(char *) "FrequencyBaseMHz", value);
4651+
/* the cpuset is given to the callee */
4652+
}
4653+
}
4654+
if (capacity) {
4655+
hwloc_bitmap_t rootset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
4656+
if (rootset) {
4657+
char value[64];
4658+
snprintf(value, sizeof(value), "%lu", capacity);
4659+
hwloc_linux_cpukinds_register_one(topology, rootset,
4660+
HWLOC_CPUKIND_EFFICIENCY_UNKNOWN,
4661+
(char *) "LinuxCapacity", value);
4662+
/* the cpuset is given to the callee */
4663+
}
4664+
}
4665+
}
4666+
46094667
static int
46104668
look_sysfscpukinds(struct hwloc_topology *topology,
46114669
struct hwloc_linux_backend_data_s *data)
@@ -4619,6 +4677,8 @@ look_sysfscpukinds(struct hwloc_topology *topology,
46194677
hwloc_bitmap_t atom_pmu_set, core_pmu_set;
46204678
int maxfreq_enabled = -1; /* -1 means adjust (default), 0 means ignore, 1 means enforce */
46214679
unsigned adjust_max = 10;
4680+
int force_homogeneous;
4681+
const char *info;
46224682
int pu, i;
46234683

46244684
env = getenv("HWLOC_CPUKINDS_MAXFREQ");
@@ -4669,6 +4729,19 @@ look_sysfscpukinds(struct hwloc_topology *topology,
46694729
} hwloc_bitmap_foreach_end();
46704730
assert(i == nr_pus);
46714731

4732+
/* NVIDIA Grace is homogeneous with slight variations of max frequency, ignore those */
4733+
info = hwloc_obj_get_info_by_name(topology->levels[0][0], "SoC0ID");
4734+
force_homogeneous = info && !strcmp(info, "jep106:036b:0241");
4735+
/* force homogeneity ? */
4736+
env = getenv("HWLOC_CPUKINDS_HOMOGENEOUS");
4737+
if (env)
4738+
force_homogeneous = atoi(env);
4739+
if (force_homogeneous) {
4740+
hwloc_linux_cpukinds_force_homogeneous(topology, (unsigned) nr_pus, by_pu);
4741+
free(by_pu);
4742+
return 0;
4743+
}
4744+
46724745
if (maxfreq_enabled == -1 && !max_without_basefreq)
46734746
/* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
46744747
hwloc_linux_cpukinds_adjust_maxfreqs(nr_pus, by_pu, adjust_max);

0 commit comments

Comments
 (0)