@@ -4606,6 +4606,64 @@ hwloc_linux_cpukinds_adjust_maxfreqs(unsigned nr_pus,
4606
4606
}
4607
4607
}
4608
4608
4609
+ static void
4610
+ hwloc_linux_cpukinds_force_homogeneous (struct hwloc_topology * topology ,
4611
+ unsigned nr_pus ,
4612
+ struct hwloc_linux_cpukinds_by_pu * by_pu )
4613
+ {
4614
+ unsigned i ;
4615
+ unsigned long base_freq = ULONG_MAX ;
4616
+ unsigned long max_freq = 0 ;
4617
+ unsigned long capacity = 0 ;
4618
+ for (i = 0 ; i < nr_pus ; i ++ ) {
4619
+ /* use the lowest base_freq for all cores */
4620
+ if (by_pu [i ].base_freq && by_pu [i ].base_freq < base_freq )
4621
+ base_freq = by_pu [i ].base_freq ;
4622
+ /* use the highest max_freq for all cores */
4623
+ if (by_pu [i ].max_freq > max_freq )
4624
+ max_freq = by_pu [i ].max_freq ;
4625
+ /* use the highest capacity for all cores */
4626
+ if (by_pu [i ].capacity > capacity )
4627
+ capacity = by_pu [i ].capacity ;
4628
+ }
4629
+ hwloc_debug ("linux/cpukinds: forcing homogeneous max_freq %lu base_freq %lu capacity %lu\n" ,
4630
+ max_freq , base_freq , capacity );
4631
+
4632
+ if (max_freq ) {
4633
+ hwloc_bitmap_t rootset = hwloc_bitmap_dup (topology -> levels [0 ][0 ]-> cpuset );
4634
+ if (rootset ) {
4635
+ char value [64 ];
4636
+ snprintf (value , sizeof (value ), "%lu" , max_freq /1000 );
4637
+ hwloc_linux_cpukinds_register_one (topology , rootset ,
4638
+ HWLOC_CPUKIND_EFFICIENCY_UNKNOWN ,
4639
+ (char * ) "FrequencyMaxMHz" , value );
4640
+ /* the cpuset is given to the callee */
4641
+ }
4642
+ }
4643
+ if (base_freq != ULONG_MAX ) {
4644
+ hwloc_bitmap_t rootset = hwloc_bitmap_dup (topology -> levels [0 ][0 ]-> cpuset );
4645
+ if (rootset ) {
4646
+ char value [64 ];
4647
+ snprintf (value , sizeof (value ), "%lu" , base_freq /1000 );
4648
+ hwloc_linux_cpukinds_register_one (topology , rootset ,
4649
+ HWLOC_CPUKIND_EFFICIENCY_UNKNOWN ,
4650
+ (char * ) "FrequencyBaseMHz" , value );
4651
+ /* the cpuset is given to the callee */
4652
+ }
4653
+ }
4654
+ if (capacity ) {
4655
+ hwloc_bitmap_t rootset = hwloc_bitmap_dup (topology -> levels [0 ][0 ]-> cpuset );
4656
+ if (rootset ) {
4657
+ char value [64 ];
4658
+ snprintf (value , sizeof (value ), "%lu" , capacity );
4659
+ hwloc_linux_cpukinds_register_one (topology , rootset ,
4660
+ HWLOC_CPUKIND_EFFICIENCY_UNKNOWN ,
4661
+ (char * ) "LinuxCapacity" , value );
4662
+ /* the cpuset is given to the callee */
4663
+ }
4664
+ }
4665
+ }
4666
+
4609
4667
static int
4610
4668
look_sysfscpukinds (struct hwloc_topology * topology ,
4611
4669
struct hwloc_linux_backend_data_s * data )
@@ -4619,6 +4677,8 @@ look_sysfscpukinds(struct hwloc_topology *topology,
4619
4677
hwloc_bitmap_t atom_pmu_set , core_pmu_set ;
4620
4678
int maxfreq_enabled = -1 ; /* -1 means adjust (default), 0 means ignore, 1 means enforce */
4621
4679
unsigned adjust_max = 10 ;
4680
+ int force_homogeneous ;
4681
+ const char * info ;
4622
4682
int pu , i ;
4623
4683
4624
4684
env = getenv ("HWLOC_CPUKINDS_MAXFREQ" );
@@ -4669,6 +4729,19 @@ look_sysfscpukinds(struct hwloc_topology *topology,
4669
4729
} hwloc_bitmap_foreach_end ();
4670
4730
assert (i == nr_pus );
4671
4731
4732
+ /* NVIDIA Grace is homogeneous with slight variations of max frequency, ignore those */
4733
+ info = hwloc_obj_get_info_by_name (topology -> levels [0 ][0 ], "SoC0ID" );
4734
+ force_homogeneous = info && !strcmp (info , "jep106:036b:0241" );
4735
+ /* force homogeneity ? */
4736
+ env = getenv ("HWLOC_CPUKINDS_HOMOGENEOUS" );
4737
+ if (env )
4738
+ force_homogeneous = atoi (env );
4739
+ if (force_homogeneous ) {
4740
+ hwloc_linux_cpukinds_force_homogeneous (topology , (unsigned ) nr_pus , by_pu );
4741
+ free (by_pu );
4742
+ return 0 ;
4743
+ }
4744
+
4672
4745
if (maxfreq_enabled == -1 && !max_without_basefreq )
4673
4746
/* we have basefreq, check maxfreq and ignore/fix it if turboboost 3.0 makes the max different on different cores */
4674
4747
hwloc_linux_cpukinds_adjust_maxfreqs (nr_pus , by_pu , adjust_max );
0 commit comments