@@ -4435,6 +4435,9 @@ struct hwloc_linux_cpukinds_by_pu {
4435
4435
unsigned long max_freq ;
4436
4436
unsigned long base_freq ;
4437
4437
unsigned long capacity ;
4438
+ int done ; /* temporary bit to identify PU that were processed by the current algorithm
4439
+ * (only hwloc_linux_cpukinds_adjust_maxfreqs() for now)
4440
+ */
4438
4441
};
4439
4442
4440
4443
struct hwloc_linux_cpukinds {
@@ -4539,85 +4542,54 @@ hwloc_linux_cpukinds_destroy(struct hwloc_linux_cpukinds *cpukinds)
4539
4542
free (cpukinds -> sets );
4540
4543
}
4541
4544
4542
- /* merge all PUs of cpuset inside a single cpukinds set with the given value */
4543
- static void
4544
- hwloc_linux_cpukinds_merge_values (struct hwloc_linux_cpukinds * cpukinds ,
4545
- hwloc_const_cpuset_t cpuset ,
4546
- unsigned long value )
4547
- {
4548
- unsigned first , i ;
4549
- hwloc_bitmap_t tmpset = hwloc_bitmap_alloc ();
4550
- if (!tmpset )
4551
- return ;
4552
-
4553
- /* find a set with that value */
4554
- for (first = 0 ; first < cpukinds -> nr_sets ; first ++ )
4555
- if (cpukinds -> sets [first ].value == value )
4556
- break ;
4557
- /* it must exist since we're downgrading some values to an existing one */
4558
- assert (first < cpukinds -> nr_sets );
4559
-
4560
- /* merge affected sets with the existing one */
4561
- for (i = 0 ; i < cpukinds -> nr_sets ; i ++ ) {
4562
- if (i == first )
4563
- continue ;
4564
-
4565
- hwloc_bitmap_and (tmpset , cpukinds -> sets [i ].cpuset , cpuset );
4566
- if (hwloc_bitmap_iszero (tmpset ))
4567
- /* not affected */
4568
- continue ;
4569
-
4570
- hwloc_bitmap_or (cpukinds -> sets [first ].cpuset , cpukinds -> sets [first ].cpuset , tmpset );
4571
- hwloc_bitmap_andnot (cpukinds -> sets [i ].cpuset , cpukinds -> sets [i ].cpuset , tmpset );
4572
- if (hwloc_bitmap_iszero (cpukinds -> sets [i ].cpuset )) {
4573
- /* became empty, remove it, and move remaining sets by one */
4574
- hwloc_bitmap_free (cpukinds -> sets [i ].cpuset );
4575
- memmove (& cpukinds -> sets [i ], & cpukinds -> sets [i + 1 ], (cpukinds -> nr_sets - i - 1 )* sizeof (cpukinds -> sets [i ]));
4576
- cpukinds -> nr_sets -- ;
4577
- if (i < first )
4578
- first -- ;
4579
- i -- ;
4580
- }
4581
- }
4582
-
4583
- hwloc_bitmap_free (tmpset );
4584
- }
4585
-
4586
4545
/* for each set of PUs with the same base frequency,
4587
4546
* adjust max frequencies by up to adjust_max percents
4588
4547
*/
4589
4548
static void
4590
- hwloc_linux_cpukinds_adjust_maxfreqs (struct hwloc_linux_cpukinds * cpufreqs_max ,
4591
- struct hwloc_linux_cpukinds * cpufreqs_base ,
4549
+ hwloc_linux_cpukinds_adjust_maxfreqs (unsigned nr_pus ,
4550
+ struct hwloc_linux_cpukinds_by_pu * by_pu ,
4592
4551
unsigned adjust_max )
4593
4552
{
4594
- unsigned i , j ;
4595
- for (i = 0 ; i < cpufreqs_base -> nr_sets ; i ++ ) {
4596
- unsigned long min_maxfreq = UINT_MAX , max_maxfreq = 0 ;
4597
-
4598
- for (j = 0 ; j < cpufreqs_max -> nr_sets ; j ++ ) {
4599
- if (!hwloc_bitmap_intersects (cpufreqs_base -> sets [i ].cpuset , cpufreqs_max -> sets [j ].cpuset ))
4553
+ unsigned i , next = 0 , done = 0 ;
4554
+ while (done < nr_pus ) {
4555
+ /* start a new group of same base_frequency at next */
4556
+ unsigned first = next ;
4557
+ unsigned long cur_base_freq = by_pu [first ].base_freq ;
4558
+ unsigned long min_maxfreq = by_pu [first ].max_freq ;
4559
+ unsigned long max_maxfreq = by_pu [first ].max_freq ;
4560
+ by_pu [first ].done = 1 ;
4561
+ done ++ ;
4562
+ next = 0 ;
4563
+ for (i = first + 1 ; i < nr_pus ; i ++ ) {
4564
+ if (by_pu [i ].done )
4600
4565
continue ;
4601
-
4602
- if (cpufreqs_max -> sets [j ].value < min_maxfreq )
4603
- min_maxfreq = cpufreqs_max -> sets [j ].value ;
4604
- if (cpufreqs_max -> sets [j ].value > max_maxfreq )
4605
- max_maxfreq = cpufreqs_max -> sets [j ].value ;
4566
+ if (by_pu [i ].base_freq == cur_base_freq ) {
4567
+ if (by_pu [i ].max_freq > max_maxfreq )
4568
+ max_maxfreq = by_pu [i ].max_freq ;
4569
+ else if (by_pu [i ].max_freq < min_maxfreq )
4570
+ min_maxfreq = by_pu [i ].max_freq ;
4571
+ by_pu [i ].done = 1 ;
4572
+ done ++ ;
4573
+ } else {
4574
+ if (!next )
4575
+ next = i ;
4576
+ }
4606
4577
}
4607
- if (min_maxfreq == UINT_MAX )
4608
- continue ;
4609
4578
4610
4579
if (min_maxfreq == max_maxfreq ) {
4611
4580
hwloc_debug ("linux/cpufreq: max frequencies always %lu when base=%lu\n" ,
4612
- min_maxfreq , cpufreqs_base -> sets [ i ]. value );
4581
+ min_maxfreq , cur_base_freq );
4613
4582
} else {
4614
4583
float ratio = ((float )(max_maxfreq - min_maxfreq )/(float )min_maxfreq );
4615
4584
hwloc_debug ("linux/cpufreq: max frequencies in [%lu-%lu] when base=%lu\n" ,
4616
- min_maxfreq , max_maxfreq , cpufreqs_base -> sets [ i ]. value );
4585
+ min_maxfreq , max_maxfreq , cur_base_freq );
4617
4586
if (ratio * 100 < (float )adjust_max ) {
4618
4587
hwloc_debug ("linux/cpufreq: max frequencies overrated up to %u%% < %u%%, adjust all to %lu\n" ,
4619
4588
(unsigned )(ratio * 100 ), adjust_max , min_maxfreq );
4620
- hwloc_linux_cpukinds_merge_values (cpufreqs_max , cpufreqs_base -> sets [i ].cpuset , min_maxfreq );
4589
+ /* update max_freq of all PUs with this base_freq */
4590
+ for (i = first ; i < nr_pus ; i ++ )
4591
+ if (by_pu [i ].base_freq == cur_base_freq )
4592
+ by_pu [i ].max_freq = min_maxfreq ;
4621
4593
}
4622
4594
}
4623
4595
}
@@ -4686,6 +4658,10 @@ look_sysfscpukinds(struct hwloc_topology *topology,
4686
4658
} hwloc_bitmap_foreach_end ();
4687
4659
assert (i == nr_pus );
4688
4660
4661
+ if (maxfreq_enabled == -1 && !max_without_basefreq )
4662
+ /* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
4663
+ hwloc_linux_cpukinds_adjust_maxfreqs (nr_pus , by_pu , adjust_max );
4664
+
4689
4665
/* now store base+max frequency */
4690
4666
hwloc_linux_cpukinds_init (& cpufreqs_max );
4691
4667
hwloc_linux_cpukinds_init (& cpufreqs_base );
@@ -4696,10 +4672,6 @@ look_sysfscpukinds(struct hwloc_topology *topology,
4696
4672
hwloc_linux_cpukinds_add (& cpufreqs_base , by_pu [i ].pu , by_pu [i ].base_freq /1000 );
4697
4673
}
4698
4674
4699
- if (maxfreq_enabled == -1 && cpufreqs_max .nr_sets && !max_without_basefreq )
4700
- /* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
4701
- hwloc_linux_cpukinds_adjust_maxfreqs (& cpufreqs_max , & cpufreqs_base , adjust_max );
4702
-
4703
4675
if (maxfreq_enabled != 0 )
4704
4676
/* only expose maxfreq info if we miss some basefreq info */
4705
4677
hwloc_linux_cpukinds_register (& cpufreqs_max , topology , "FrequencyMaxMHz" , 0 );
0 commit comments