@@ -57,9 +57,11 @@ size_t length64=sizeof(value64);
5757#define CPU_CORTEXA57 3
5858#define CPU_CORTEXA72 4
5959#define CPU_CORTEXA73 5
60+ #define CPU_CORTEXA76 23
6061#define CPU_NEOVERSEN1 11
6162#define CPU_NEOVERSEV1 16
6263#define CPU_NEOVERSEN2 17
64+ #define CPU_NEOVERSEV2 24
6365#define CPU_CORTEXX1 18
6466#define CPU_CORTEXX2 19
6567#define CPU_CORTEXA510 20
@@ -104,7 +106,9 @@ static char *cpuname[] = {
104106 "CORTEXX2" ,
105107 "CORTEXA510" ,
106108 "CORTEXA710" ,
107- "FT2000"
109+ "FT2000" ,
110+ "CORTEXA76" ,
111+ "NEOVERSEV2"
108112};
109113
110114static char * cpuname_lower [] = {
@@ -130,7 +134,9 @@ static char *cpuname_lower[] = {
130134 "cortexx2" ,
131135 "cortexa510" ,
132136 "cortexa710" ,
133- "ft2000"
137+ "ft2000" ,
138+ "cortexa76" ,
139+ "neoversev2"
134140};
135141
136142static int cpulowperf = 0 ;
@@ -140,7 +146,7 @@ static int cpuhiperf=0;
140146int get_feature (char * search )
141147{
142148
143- #ifdef __linux
149+ #if defined( __linux ) || defined( __NetBSD__ )
144150 FILE * infile ;
145151 char buffer [2048 ], * p ,* t ;
146152 p = (char * ) NULL ;
@@ -179,7 +185,7 @@ static int cpusort(const void *model1, const void *model2)
179185int detect (void )
180186{
181187
182- #ifdef __linux
188+ #if defined( __linux ) || defined( __NetBSD__ )
183189 int n ,i ,ii ;
184190 int midr_el1 ;
185191 int implementer ;
@@ -243,8 +249,8 @@ int detect(void)
243249 break ;
244250 } else {
245251 (void )fgets (buffer , sizeof (buffer ), infile );
246- midr_el1 = strtoul (buffer ,NULL ,16 );
247- fclose (infile );
252+ midr_el1 = strtoul (buffer ,NULL ,16 );
253+ fclose (infile );
248254 implementer = (midr_el1 >> 24 ) & 0xFF ;
249255 cpucores [i ] = (midr_el1 >> 4 ) & 0xFFF ;
250256 sprintf (buffer ,"/sys/devices/system/cpu/cpu%d/cpu_capacity" ,i );
@@ -304,6 +310,10 @@ int detect(void)
304310 return CPU_CORTEXX2 ;
305311 else if (strstr (cpu_part , "0xd4e" )) //X3
306312 return CPU_CORTEXX2 ;
313+ else if (strstr (cpu_part , "0xd4f" )) //NVIDIA Grace et al.
314+ return CPU_NEOVERSEV2 ;
315+ else if (strstr (cpu_part , "0xd0b" ))
316+ return CPU_CORTEXA76 ;
307317 }
308318 // Qualcomm
309319 else if (strstr (cpu_implementer , "0x51" ) && strstr (cpu_part , "0xc00" ))
@@ -361,9 +371,20 @@ int detect(void)
361371 }
362372#else
363373#ifdef __APPLE__
374+ sysctlbyname ("hw.ncpu" ,& value64 ,& length64 ,NULL ,0 );
375+ cpulowperf = value64 ;
376+ sysctlbyname ("hw.nperflevels" ,& value64 ,& length64 ,NULL ,0 );
377+ if (value64 > 1 ) {
378+ sysctlbyname ("hw.perflevel0.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
379+ cpuhiperf = value64 ;
380+ sysctlbyname ("hw.perflevel1.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
381+ cpulowperf = value64 ;
382+ }
364383 sysctlbyname ("hw.cpufamily" ,& value64 ,& length64 ,NULL ,0 );
365384 if (value64 == 131287967 || value64 == 458787763 ) return CPU_VORTEX ; //A12/M1
366385 if (value64 == 3660830781 ) return CPU_VORTEX ; //A15/M2
386+ if (value64 == 2271604202 ) return CPU_VORTEX ; //A16/M3
387+ if (value64 == 1867590060 ) return CPU_VORTEX ; //M4
367388#endif
368389 return CPU_ARMV8 ;
369390#endif
@@ -396,7 +417,7 @@ void get_cpucount(void)
396417{
397418int n = 0 ;
398419
399- #ifdef __linux
420+ #if defined( __linux ) || defined( __NetBSD__ )
400421 FILE * infile ;
401422 char buffer [2048 ], * p ,* t ;
402423 p = (char * ) NULL ;
@@ -423,6 +444,12 @@ int n=0;
423444#ifdef __APPLE__
424445 sysctlbyname ("hw.physicalcpu_max" ,& value ,& length ,NULL ,0 );
425446 printf ("#define NUM_CORES %d\n" ,value );
447+ if (cpulowperf > 0 )
448+ printf ("#define NUM_CORES_LP %d\n" ,cpulowperf );
449+ if (cpumidperf > 0 )
450+ printf ("#define NUM_CORES_MP %d\n" ,cpumidperf );
451+ if (cpuhiperf > 0 )
452+ printf ("#define NUM_CORES_HP %d\n" ,cpuhiperf );
426453#endif
427454}
428455
@@ -489,6 +516,8 @@ void get_cpuconfig(void)
489516 break ;
490517
491518 case CPU_NEOVERSEV1 :
519+ printf ("#define HAVE_SVE 1\n" );
520+ case CPU_CORTEXA76 :
492521 printf ("#define %s\n" , cpuname [d ]);
493522 printf ("#define L1_CODE_SIZE 65536\n" );
494523 printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -516,12 +545,32 @@ void get_cpuconfig(void)
516545 printf ("#define L2_ASSOCIATIVE 8\n" );
517546 printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
518547 printf ("#define DTB_SIZE 4096\n" );
548+ printf ("#define HAVE_SVE 1\n" );
519549 break ;
550+ case CPU_NEOVERSEV2 :
551+ printf ("#define ARMV9\n" );
552+ printf ("#define HAVE_SVE 1\n" );
553+ printf ("#define %s\n" , cpuname [d ]);
554+ printf ("#define L1_CODE_SIZE 65536\n" );
555+ printf ("#define L1_CODE_LINESIZE 64\n" );
556+ printf ("#define L1_CODE_ASSOCIATIVE 4\n" );
557+ printf ("#define L1_DATA_SIZE 65536\n" );
558+ printf ("#define L1_DATA_LINESIZE 64\n" );
559+ printf ("#define L1_DATA_ASSOCIATIVE 4\n" );
560+ printf ("#define L2_SIZE 1048576\n" );
561+ printf ("#define L2_LINESIZE 64\n" );
562+ printf ("#define L2_ASSOCIATIVE 8\n" );
563+ // L1 Data TLB = 48 entries
564+ // L2 Data TLB = 2048 entries
565+ printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
566+ printf ("#define DTB_SIZE 4096\n" ); // Set to 4096 for symmetry with other configs.
567+ break ;
520568 case CPU_CORTEXA510 :
521569 case CPU_CORTEXA710 :
522570 case CPU_CORTEXX1 :
523571 case CPU_CORTEXX2 :
524572 printf ("#define ARMV9\n" );
573+ printf ("#define HAVE_SVE 1\n" );
525574 printf ("#define %s\n" , cpuname [d ]);
526575 printf ("#define L1_CODE_SIZE 65536\n" );
527576 printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -638,6 +687,7 @@ void get_cpuconfig(void)
638687 break ;
639688 case CPU_A64FX :
640689 printf ("#define A64FX\n" );
690+ printf ("#define HAVE_SVE 1\n" );
641691 printf ("#define L1_CODE_SIZE 65535\n" );
642692 printf ("#define L1_DATA_SIZE 65535\n" );
643693 printf ("#define L1_DATA_LINESIZE 256\n" );
@@ -670,7 +720,7 @@ void get_libname(void)
670720void get_features (void )
671721{
672722
673- #ifdef __linux
723+ #if defined( __linux ) || defined( __NetBSD__ )
674724 FILE * infile ;
675725 char buffer [2048 ], * p ,* t ;
676726 p = (char * ) NULL ;
0 commit comments