Skip to content

Commit 60f2031

Browse files
committed
linux/cpukinds: adjust turboboost max frequencies before building cpukinds
Avoids building cpukinds and merging them later. Signed-off-by: Brice Goglin <[email protected]>
1 parent 9ae30cc commit 60f2031

File tree

1 file changed

+38
-66
lines changed

1 file changed

+38
-66
lines changed

hwloc/topology-linux.c

Lines changed: 38 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4435,6 +4435,9 @@ struct hwloc_linux_cpukinds_by_pu {
44354435
unsigned long max_freq;
44364436
unsigned long base_freq;
44374437
unsigned long capacity;
4438+
int done; /* temporary bit to identify PU that were processed by the current algorithm
4439+
* (only hwloc_linux_cpukinds_adjust_maxfreqs() for now)
4440+
*/
44384441
};
44394442

44404443
struct hwloc_linux_cpukinds {
@@ -4539,85 +4542,54 @@ hwloc_linux_cpukinds_destroy(struct hwloc_linux_cpukinds *cpukinds)
45394542
free (cpukinds->sets);
45404543
}
45414544

4542-
/* merge all PUs of cpuset inside a single cpukinds set with the given value */
4543-
static void
4544-
hwloc_linux_cpukinds_merge_values(struct hwloc_linux_cpukinds *cpukinds,
4545-
hwloc_const_cpuset_t cpuset,
4546-
unsigned long value)
4547-
{
4548-
unsigned first, i;
4549-
hwloc_bitmap_t tmpset = hwloc_bitmap_alloc();
4550-
if (!tmpset)
4551-
return;
4552-
4553-
/* find a set with that value */
4554-
for(first=0; first<cpukinds->nr_sets; first++)
4555-
if (cpukinds->sets[first].value == value)
4556-
break;
4557-
/* it must exist since we're downgrading some values to an existing one */
4558-
assert(first < cpukinds->nr_sets);
4559-
4560-
/* merge affected sets with the existing one */
4561-
for(i=0; i<cpukinds->nr_sets; i++) {
4562-
if (i == first)
4563-
continue;
4564-
4565-
hwloc_bitmap_and(tmpset, cpukinds->sets[i].cpuset, cpuset);
4566-
if (hwloc_bitmap_iszero(tmpset))
4567-
/* not affected */
4568-
continue;
4569-
4570-
hwloc_bitmap_or(cpukinds->sets[first].cpuset, cpukinds->sets[first].cpuset, tmpset);
4571-
hwloc_bitmap_andnot(cpukinds->sets[i].cpuset, cpukinds->sets[i].cpuset, tmpset);
4572-
if (hwloc_bitmap_iszero(cpukinds->sets[i].cpuset)) {
4573-
/* became empty, remove it, and move remaining sets by one */
4574-
hwloc_bitmap_free(cpukinds->sets[i].cpuset);
4575-
memmove(&cpukinds->sets[i], &cpukinds->sets[i+1], (cpukinds->nr_sets-i-1)*sizeof(cpukinds->sets[i]));
4576-
cpukinds->nr_sets--;
4577-
if (i<first)
4578-
first--;
4579-
i--;
4580-
}
4581-
}
4582-
4583-
hwloc_bitmap_free(tmpset);
4584-
}
4585-
45864545
/* for each set of PUs with the same base frequency,
45874546
* adjust max frequencies by up to adjust_max percents
45884547
*/
45894548
static void
4590-
hwloc_linux_cpukinds_adjust_maxfreqs(struct hwloc_linux_cpukinds *cpufreqs_max,
4591-
struct hwloc_linux_cpukinds *cpufreqs_base,
4549+
hwloc_linux_cpukinds_adjust_maxfreqs(unsigned nr_pus,
4550+
struct hwloc_linux_cpukinds_by_pu *by_pu,
45924551
unsigned adjust_max)
45934552
{
4594-
unsigned i, j;
4595-
for(i=0; i<cpufreqs_base->nr_sets; i++) {
4596-
unsigned long min_maxfreq = UINT_MAX, max_maxfreq = 0;
4597-
4598-
for(j=0; j<cpufreqs_max->nr_sets; j++) {
4599-
if (!hwloc_bitmap_intersects(cpufreqs_base->sets[i].cpuset, cpufreqs_max->sets[j].cpuset))
4553+
unsigned i, next = 0, done = 0;
4554+
while (done < nr_pus) {
4555+
/* start a new group of same base_frequency at next */
4556+
unsigned first = next;
4557+
unsigned long cur_base_freq = by_pu[first].base_freq;
4558+
unsigned long min_maxfreq = by_pu[first].max_freq;
4559+
unsigned long max_maxfreq = by_pu[first].max_freq;
4560+
by_pu[first].done = 1;
4561+
done++;
4562+
next = 0;
4563+
for(i=first+1; i<nr_pus; i++) {
4564+
if (by_pu[i].done)
46004565
continue;
4601-
4602-
if (cpufreqs_max->sets[j].value < min_maxfreq)
4603-
min_maxfreq = cpufreqs_max->sets[j].value;
4604-
if (cpufreqs_max->sets[j].value > max_maxfreq)
4605-
max_maxfreq = cpufreqs_max->sets[j].value;
4566+
if (by_pu[i].base_freq == cur_base_freq) {
4567+
if (by_pu[i].max_freq > max_maxfreq)
4568+
max_maxfreq = by_pu[i].max_freq;
4569+
else if (by_pu[i].max_freq < min_maxfreq)
4570+
min_maxfreq = by_pu[i].max_freq;
4571+
by_pu[i].done = 1;
4572+
done++;
4573+
} else {
4574+
if (!next)
4575+
next = i;
4576+
}
46064577
}
4607-
if (min_maxfreq == UINT_MAX)
4608-
continue;
46094578

46104579
if (min_maxfreq == max_maxfreq) {
46114580
hwloc_debug("linux/cpufreq: max frequencies always %lu when base=%lu\n",
4612-
min_maxfreq, cpufreqs_base->sets[i].value);
4581+
min_maxfreq, cur_base_freq);
46134582
} else {
46144583
float ratio = ((float)(max_maxfreq-min_maxfreq)/(float)min_maxfreq);
46154584
hwloc_debug("linux/cpufreq: max frequencies in [%lu-%lu] when base=%lu\n",
4616-
min_maxfreq, max_maxfreq, cpufreqs_base->sets[i].value);
4585+
min_maxfreq, max_maxfreq, cur_base_freq);
46174586
if (ratio*100 < (float)adjust_max) {
46184587
hwloc_debug("linux/cpufreq: max frequencies overrated up to %u%% < %u%%, adjust all to %lu\n",
46194588
(unsigned)(ratio*100), adjust_max, min_maxfreq);
4620-
hwloc_linux_cpukinds_merge_values(cpufreqs_max, cpufreqs_base->sets[i].cpuset, min_maxfreq);
4589+
/* update max_freq of all PUs with this base_freq */
4590+
for(i=first; i<nr_pus; i++)
4591+
if (by_pu[i].base_freq == cur_base_freq)
4592+
by_pu[i].max_freq = min_maxfreq;
46214593
}
46224594
}
46234595
}
@@ -4686,6 +4658,10 @@ look_sysfscpukinds(struct hwloc_topology *topology,
46864658
} hwloc_bitmap_foreach_end();
46874659
assert(i == nr_pus);
46884660

4661+
if (maxfreq_enabled == -1 && !max_without_basefreq)
4662+
/* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
4663+
hwloc_linux_cpukinds_adjust_maxfreqs(nr_pus, by_pu, adjust_max);
4664+
46894665
/* now store base+max frequency */
46904666
hwloc_linux_cpukinds_init(&cpufreqs_max);
46914667
hwloc_linux_cpukinds_init(&cpufreqs_base);
@@ -4696,10 +4672,6 @@ look_sysfscpukinds(struct hwloc_topology *topology,
46964672
hwloc_linux_cpukinds_add(&cpufreqs_base, by_pu[i].pu, by_pu[i].base_freq/1000);
46974673
}
46984674

4699-
if (maxfreq_enabled == -1 && cpufreqs_max.nr_sets && !max_without_basefreq)
4700-
/* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
4701-
hwloc_linux_cpukinds_adjust_maxfreqs(&cpufreqs_max, &cpufreqs_base, adjust_max);
4702-
47034675
if (maxfreq_enabled != 0)
47044676
/* only expose maxfreq info if we miss some basefreq info */
47054677
hwloc_linux_cpukinds_register(&cpufreqs_max, topology, "FrequencyMaxMHz", 0);

0 commit comments

Comments
 (0)