2525 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626 *****************************************************************************/
2727
28+ #include <stdlib.h>
2829#include <string.h>
2930#ifdef __APPLE__
3031#include <sys/sysctl.h>
@@ -33,6 +34,20 @@ size_t length=sizeof(value);
3334int64_t value64 ;
3435size_t length64 = sizeof (value64 );
3536#endif
37+ #if (defined OS_LINUX || defined OS_ANDROID )
38+ #include <asm/hwcap.h>
39+ #include <sys/auxv.h>
40+ #ifndef HWCAP_CPUID
41+ #define HWCAP_CPUID (1 << 11)
42+ #endif
43+ #ifndef HWCAP_SVE
44+ #define HWCAP_SVE (1 << 22)
45+ #endif
46+
47+ #define get_cpu_ftr (id , var ) ({ \
48+ __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
49+ })
50+ #endif
3651
3752#define CPU_UNKNOWN 0
3853#define CPU_ARMV8 1
@@ -42,11 +57,11 @@ size_t length64=sizeof(value64);
4257#define CPU_CORTEXA57 3
4358#define CPU_CORTEXA72 4
4459#define CPU_CORTEXA73 5
45- #define CPU_CORTEXA76 23
60+ #define CPU_CORTEXA76 23
4661#define CPU_NEOVERSEN1 11
4762#define CPU_NEOVERSEV1 16
4863#define CPU_NEOVERSEN2 17
49- #define CPU_NEOVERSEV2 24
64+ #define CPU_NEOVERSEV2 24
5065#define CPU_CORTEXX1 18
5166#define CPU_CORTEXX2 19
5267#define CPU_CORTEXA510 20
@@ -93,7 +108,7 @@ static char *cpuname[] = {
93108 "CORTEXA710" ,
94109 "FT2000" ,
95110 "CORTEXA76" ,
96- "NEOVERSEV2"
111+ "NEOVERSEV2"
97112};
98113
99114static char * cpuname_lower [] = {
@@ -121,9 +136,13 @@ static char *cpuname_lower[] = {
121136 "cortexa710" ,
122137 "ft2000" ,
123138 "cortexa76" ,
124- "neoversev2"
139+ "neoversev2"
125140};
126141
142+ static int cpulowperf = 0 ;
143+ static int cpumidperf = 0 ;
144+ static int cpuhiperf = 0 ;
145+
127146int get_feature (char * search )
128147{
129148
@@ -158,33 +177,108 @@ int get_feature(char *search)
158177#endif
159178 return (0 );
160179}
161-
180+ static int cpusort (const void * model1 , const void * model2 )
181+ {
182+ return (* (int * )model2 - * (int * )model1 );
183+ }
162184
163185int detect (void )
164186{
165187
166188#if defined( __linux ) || defined( __NetBSD__ )
167-
189+ int n ,i ,ii ;
190+ int midr_el1 ;
191+ int implementer ;
192+ int cpucap [1024 ];
193+ int cpucores [1024 ];
168194 FILE * infile ;
169- char buffer [512 ], * p , * cpu_part = NULL , * cpu_implementer = NULL ;
195+ char cpupart [6 ],cpuimpl [6 ];
196+ char * cpu_impl = NULL ,* cpu_pt = NULL ;
197+ char buffer [2048 ], * p , * cpu_part = NULL , * cpu_implementer = NULL ;
170198 p = (char * ) NULL ;
171-
172- infile = fopen ("/proc/cpuinfo" , "r" );
173- while (fgets (buffer , sizeof (buffer ), infile )) {
174- if ((cpu_part != NULL ) && (cpu_implementer != NULL )) {
175- break ;
199+ cpulowperf = cpumidperf = cpuhiperf = 0 ;
200+ for (i = 0 ;i < 1024 ;i ++ )cpucores [i ]= 0 ;
201+ n = 0 ;
202+ infile = fopen ("/sys/devices/system/cpu/possible" , "r" );
203+ if (!infile ) {
204+ infile = fopen ("/proc/cpuinfo" , "r" );
205+ while (fgets (buffer , sizeof (buffer ), infile )) {
206+ if (!strncmp ("processor" , buffer , 9 ))
207+ n ++ ;
176208 }
177-
178- if ((cpu_part == NULL ) && !strncmp ("CPU part" , buffer , 8 )) {
179- cpu_part = strchr (buffer , ':' ) + 2 ;
180- cpu_part = strdup (cpu_part );
181- } else if ((cpu_implementer == NULL ) && !strncmp ("CPU implementer" , buffer , 15 )) {
182- cpu_implementer = strchr (buffer , ':' ) + 2 ;
183- cpu_implementer = strdup (cpu_implementer );
209+ } else {
210+ fgets (buffer , sizeof (buffer ), infile );
211+ sscanf (buffer ,"0-%d" ,& n );
212+ n ++ ;
213+ }
214+ fclose (infile );
215+
216+ cpu_implementer = NULL ;
217+ for (i = 0 ;i < n ;i ++ ){
218+ sprintf (buffer ,"/sys/devices/system/cpu/cpu%d/regs/identification/midr_el1" ,i );
219+ infile = fopen (buffer ,"r" );
220+ if (!infile ) {
221+ infile = fopen ("/proc/cpuinfo" , "r" );
222+ for (ii = 0 ;ii < n ;ii ++ ){
223+ cpu_part = NULL ;cpu_implementer = NULL ;
224+ while (fgets (buffer , sizeof (buffer ), infile )) {
225+ if ((cpu_part != NULL ) && (cpu_implementer != NULL )) {
226+ break ;
227+ }
228+
229+ if ((cpu_part == NULL ) && !strncmp ("CPU part" , buffer , 8 )) {
230+ cpu_pt = strchr (buffer , ':' ) + 2 ;
231+ cpu_part = strdup (cpu_pt );
232+ cpucores [i ]= strtol (cpu_part ,NULL ,0 );
233+
234+ } else if ((cpu_implementer == NULL ) && !strncmp ("CPU implementer" , buffer , 15 )) {
235+ cpu_impl = strchr (buffer , ':' ) + 2 ;
236+ cpu_implementer = strdup (cpu_impl );
237+ }
238+
239+ }
240+ if (strstr (cpu_implementer , "0x41" )) {
241+ if (cpucores [ii ] >= 0xd4b ) cpuhiperf ++ ;
242+ else
243+ if (cpucores [ii ] >= 0xd07 ) cpumidperf ++ ;
244+ else cpulowperf ++ ;
245+ }
246+ else cpulowperf ++ ;
247+ }
248+ fclose (infile );
249+ break ;
250+ } else {
251+ (void )fgets (buffer , sizeof (buffer ), infile );
252+ midr_el1 = strtoul (buffer ,NULL ,16 );
253+ fclose (infile );
254+ implementer = (midr_el1 >> 24 ) & 0xFF ;
255+ cpucores [i ] = (midr_el1 >> 4 ) & 0xFFF ;
256+ sprintf (buffer ,"/sys/devices/system/cpu/cpu%d/cpu_capacity" ,i );
257+ infile = fopen (buffer ,"r" );
258+ if (!infile ) {
259+ if (implementer == 65 ) {
260+ if (cpucores [i ] >= 0xd4b ) cpuhiperf ++ ;
261+ else
262+ if (cpucores [i ] >= 0xd07 ) cpumidperf ++ ;
263+ else cpulowperf ++ ;
264+ }
265+ else cpulowperf ++ ;
266+ } else {
267+ (void )fgets (buffer , sizeof (buffer ), infile );
268+ sscanf (buffer ,"%d" ,& cpucap [i ]);
269+ if (cpucap [i ] >= 1000 ) cpuhiperf ++ ;
270+ else
271+ if (cpucap [i ] >= 500 ) cpumidperf ++ ;
272+ else cpulowperf ++ ;
273+ fclose (infile );
274+ }
184275 }
276+ sprintf (cpuimpl ,"0x%2x" ,implementer );
277+ cpu_implementer = strdup (cpuimpl );
185278 }
186-
187- fclose (infile );
279+ qsort (cpucores ,1024 ,sizeof (int ),cpusort );
280+ sprintf (cpupart ,"0x%3x" ,cpucores [0 ]);
281+ cpu_part = strdup (cpupart );
188282 if (cpu_part != NULL && cpu_implementer != NULL ) {
189283 // Arm
190284 if (strstr (cpu_implementer , "0x41" )) {
@@ -219,7 +313,7 @@ int detect(void)
219313 else if (strstr (cpu_part , "0xd4f" )) //NVIDIA Grace et al.
220314 return CPU_NEOVERSEV2 ;
221315 else if (strstr (cpu_part , "0xd0b" ))
222- return CPU_CORTEXA76 ;
316+ return CPU_CORTEXA76 ;
223317 }
224318 // Qualcomm
225319 else if (strstr (cpu_implementer , "0x51" ) && strstr (cpu_part , "0xc00" ))
@@ -277,11 +371,20 @@ int detect(void)
277371 }
278372#else
279373#ifdef __APPLE__
374+ sysctlbyname ("hw.ncpu" ,& value64 ,& length64 ,NULL ,0 );
375+ cpulowperf = value64 ;
376+ sysctlbyname ("hw.nperflevels" ,& value64 ,& length64 ,NULL ,0 );
377+ if (value64 > 1 ) {
378+ sysctlbyname ("hw.perflevel0.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
379+ cpuhiperf = value64 ;
380+ sysctlbyname ("hw.perflevel1.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
381+ cpulowperf = value64 ;
382+ }
280383 sysctlbyname ("hw.cpufamily" ,& value64 ,& length64 ,NULL ,0 );
281384 if (value64 == 131287967 || value64 == 458787763 ) return CPU_VORTEX ; //A12/M1
282385 if (value64 == 3660830781 ) return CPU_VORTEX ; //A15/M2
283- if (value64 == 2271604202 ) return CPU_VORTEX ; //A16/M3
284- if (value64 == 1867590060 ) return CPU_VORTEX ; //M4
386+ if (value64 == 2271604202 ) return CPU_VORTEX ; //A16/M3
387+ if (value64 == 1867590060 ) return CPU_VORTEX ; //M4
285388#endif
286389 return CPU_ARMV8 ;
287390#endif
@@ -331,10 +434,22 @@ int n=0;
331434 fclose (infile );
332435
333436 printf ("#define NUM_CORES %d\n" ,n );
437+ if (cpulowperf > 0 )
438+ printf ("#define NUM_CORES_LP %d\n" ,cpulowperf );
439+ if (cpumidperf > 0 )
440+ printf ("#define NUM_CORES_MP %d\n" ,cpumidperf );
441+ if (cpuhiperf > 0 )
442+ printf ("#define NUM_CORES_HP %d\n" ,cpuhiperf );
334443#endif
335444#ifdef __APPLE__
336445 sysctlbyname ("hw.physicalcpu_max" ,& value ,& length ,NULL ,0 );
337446 printf ("#define NUM_CORES %d\n" ,value );
447+ if (cpulowperf > 0 )
448+ printf ("#define NUM_CORES_LP %d\n" ,cpulowperf );
449+ if (cpumidperf > 0 )
450+ printf ("#define NUM_CORES_MP %d\n" ,cpumidperf );
451+ if (cpuhiperf > 0 )
452+ printf ("#define NUM_CORES_HP %d\n" ,cpuhiperf );
338453#endif
339454}
340455
@@ -347,7 +462,6 @@ void get_cpuconfig(void)
347462 printf ("#define ARMV8\n" );
348463 printf ("#define HAVE_NEON\n" ); // This shouldn't be necessary
349464 printf ("#define HAVE_VFPV4\n" ); // This shouldn't be necessary
350-
351465 int d = detect ();
352466 switch (d )
353467 {
@@ -402,8 +516,8 @@ void get_cpuconfig(void)
402516 break ;
403517
404518 case CPU_NEOVERSEV1 :
405- printf ("#define HAVE_SVE 1\n" );
406- case CPU_CORTEXA76 :
519+ printf ("#define HAVE_SVE 1\n" );
520+ case CPU_CORTEXA76 :
407521 printf ("#define %s\n" , cpuname [d ]);
408522 printf ("#define L1_CODE_SIZE 65536\n" );
409523 printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -431,32 +545,32 @@ void get_cpuconfig(void)
431545 printf ("#define L2_ASSOCIATIVE 8\n" );
432546 printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
433547 printf ("#define DTB_SIZE 4096\n" );
434- printf ("#define HAVE_SVE 1\n" );
548+ printf ("#define HAVE_SVE 1\n" );
435549 break ;
436- case CPU_NEOVERSEV2 :
550+ case CPU_NEOVERSEV2 :
437551 printf ("#define ARMV9\n" );
438- printf ("#define HAVE_SVE 1\n" );
439- printf ("#define %s\n" , cpuname [d ]);
440- printf ("#define L1_CODE_SIZE 65536\n" );
441- printf ("#define L1_CODE_LINESIZE 64\n" );
442- printf ("#define L1_CODE_ASSOCIATIVE 4\n" );
443- printf ("#define L1_DATA_SIZE 65536\n" );
444- printf ("#define L1_DATA_LINESIZE 64\n" );
445- printf ("#define L1_DATA_ASSOCIATIVE 4\n" );
446- printf ("#define L2_SIZE 1048576\n" );
447- printf ("#define L2_LINESIZE 64\n" );
448- printf ("#define L2_ASSOCIATIVE 8\n" );
449- // L1 Data TLB = 48 entries
450- // L2 Data TLB = 2048 entries
451- printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
452- printf ("#define DTB_SIZE 4096\n" ); // Set to 4096 for symmetry with other configs.
453- break ;
552+ printf ("#define HAVE_SVE 1\n" );
553+ printf ("#define %s\n" , cpuname [d ]);
554+ printf ("#define L1_CODE_SIZE 65536\n" );
555+ printf ("#define L1_CODE_LINESIZE 64\n" );
556+ printf ("#define L1_CODE_ASSOCIATIVE 4\n" );
557+ printf ("#define L1_DATA_SIZE 65536\n" );
558+ printf ("#define L1_DATA_LINESIZE 64\n" );
559+ printf ("#define L1_DATA_ASSOCIATIVE 4\n" );
560+ printf ("#define L2_SIZE 1048576\n" );
561+ printf ("#define L2_LINESIZE 64\n" );
562+ printf ("#define L2_ASSOCIATIVE 8\n" );
563+ // L1 Data TLB = 48 entries
564+ // L2 Data TLB = 2048 entries
565+ printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
566+ printf ("#define DTB_SIZE 4096\n" ); // Set to 4096 for symmetry with other configs.
567+ break ;
454568 case CPU_CORTEXA510 :
455569 case CPU_CORTEXA710 :
456570 case CPU_CORTEXX1 :
457571 case CPU_CORTEXX2 :
458572 printf ("#define ARMV9\n" );
459- printf ("#define HAVE_SVE 1\n" );
573+ printf ("#define HAVE_SVE 1\n" );
460574 printf ("#define %s\n" , cpuname [d ]);
461575 printf ("#define L1_CODE_SIZE 65536\n" );
462576 printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -559,8 +673,6 @@ void get_cpuconfig(void)
559673 case CPU_VORTEX :
560674 printf ("#define VORTEX \n" );
561675#ifdef __APPLE__
562- sysctlbyname ("hw.cpufamily" ,& value64 ,& length64 ,NULL ,0 );
563- if (value64 == 1867590060 ) printf ("#define HAVE_SME 1\n" );; //M4
564676 sysctlbyname ("hw.l1icachesize" ,& value64 ,& length64 ,NULL ,0 );
565677 printf ("#define L1_CODE_SIZE %lld \n" ,value64 );
566678 sysctlbyname ("hw.cachelinesize" ,& value64 ,& length64 ,NULL ,0 );
@@ -575,7 +687,7 @@ void get_cpuconfig(void)
575687 break ;
576688 case CPU_A64FX :
577689 printf ("#define A64FX\n" );
578- printf ("#define HAVE_SVE 1\n" );
690+ printf ("#define HAVE_SVE 1\n" );
579691 printf ("#define L1_CODE_SIZE 65535\n" );
580692 printf ("#define L1_DATA_SIZE 65535\n" );
581693 printf ("#define L1_DATA_LINESIZE 256\n" );
0 commit comments