Skip to content

Commit a87713f

Browse files
authored
Merge pull request #4597 from martin-frbg/issue4581
Add support for the ARM Cortex-A76 cpu
2 parents 5865973 + 584e876 commit a87713f

File tree

8 files changed

+71
-3
lines changed

8 files changed

+71
-3
lines changed

Makefile.arm64

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
5858
endif
5959
endif
6060

61+
ifeq ($(CORE), CORTEXA76)
62+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
63+
ifneq ($(F_COMPILER), NAG)
64+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
65+
endif
66+
endif
67+
6168
ifeq ($(CORE), FT2000)
6269
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
6370
ifneq ($(F_COMPILER), NAG)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
167167
- **Cortex A57**: Optimized Level-3 and Level-2 functions
168168
- **Cortex A72**: same as A57 ( different cpu specifications)
169169
- **Cortex A73**: same as A57 (different cpu specifications)
170+
- **Cortex A76**: same as A57 (different cpu specifications)
170171
- **Falkor**: same as A57 (different cpu specifications)
171172
- **ThunderX**: Optimized some Level-1 functions
172173
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ CORTEXA53
9393
CORTEXA57
9494
CORTEXA72
9595
CORTEXA73
96+
CORTEXA76
9697
CORTEXA510
9798
CORTEXA710
9899
CORTEXX1

cmake/prebuild.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ endif ()
932932
set(ZGEMM_UNROLL_M 4)
933933
set(ZGEMM_UNROLL_N 4)
934934
set(SYMV_P 16)
935-
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
935+
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73" OR "${TCORE}" STREQUAL "CORTEXA76")
936936
file(APPEND ${TARGET_CONF_TEMP}
937937
"#define L1_CODE_SIZE\t49152\n"
938938
"#define L1_CODE_LINESIZE\t64\n"

cpuid_arm64.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ size_t length64=sizeof(value64);
4242
#define CPU_CORTEXA57 3
4343
#define CPU_CORTEXA72 4
4444
#define CPU_CORTEXA73 5
45+
#define CPU_CORTEXA76 23
4546
#define CPU_NEOVERSEN1 11
4647
#define CPU_NEOVERSEV1 16
4748
#define CPU_NEOVERSEN2 17
@@ -89,7 +90,8 @@ static char *cpuname[] = {
8990
"CORTEXX2",
9091
"CORTEXA510",
9192
"CORTEXA710",
92-
"FT2000"
93+
"FT2000",
94+
"CORTEXA76"
9395
};
9496

9597
static char *cpuname_lower[] = {
@@ -115,7 +117,8 @@ static char *cpuname_lower[] = {
115117
"cortexx2",
116118
"cortexa510",
117119
"cortexa710",
118-
"ft2000"
120+
"ft2000",
121+
"cortexa76"
119122
};
120123

121124
int get_feature(char *search)
@@ -210,6 +213,8 @@ int detect(void)
210213
return CPU_CORTEXX2;
211214
else if (strstr(cpu_part, "0xd4e")) //X3
212215
return CPU_CORTEXX2;
216+
else if (strstr(cpu_part, "0xd0b"))
217+
return CPU_CORTEXA76;
213218
}
214219
// Qualcomm
215220
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@@ -391,6 +396,7 @@ void get_cpuconfig(void)
391396
break;
392397

393398
case CPU_NEOVERSEV1:
399+
case CPU_CORTEXA76:
394400
printf("#define %s\n", cpuname[d]);
395401
printf("#define L1_CODE_SIZE 65536\n");
396402
printf("#define L1_CODE_LINESIZE 64\n");

getarch.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,6 +1331,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13311331
#define CORENAME "CORTEXA73"
13321332
#endif
13331333

1334+
#ifdef FORCE_CORTEXA76
1335+
#define FORCE
1336+
#define ARCHITECTURE "ARM64"
1337+
#define SUBARCHITECTURE "CORTEXA76"
1338+
#define SUBDIRNAME "arm64"
1339+
#define ARCHCONFIG "-DCORTEXA76 " \
1340+
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
1341+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
1342+
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
1343+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1344+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
1345+
#define LIBNAME "cortexa76"
1346+
#define CORENAME "CORTEXA76"
1347+
#endif
1348+
13341349
#ifdef FORCE_CORTEXX1
13351350
#define FORCE
13361351
#define ARCHITECTURE "ARM64"

kernel/arm64/KERNEL.CORTEXA76

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include $(KERNELDIR)/KERNEL.CORTEXA57
2+
3+

param.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3351,6 +3351,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
33513351
#define CGEMM_DEFAULT_R 4096
33523352
#define ZGEMM_DEFAULT_R 2048
33533353

3354+
#elif defined(CORTEXA76)
3355+
3356+
#define SGEMM_DEFAULT_UNROLL_M 16
3357+
#define SGEMM_DEFAULT_UNROLL_N 4
3358+
3359+
#define DGEMM_DEFAULT_UNROLL_M 8
3360+
#define DGEMM_DEFAULT_UNROLL_N 4
3361+
3362+
#define CGEMM_DEFAULT_UNROLL_M 8
3363+
#define CGEMM_DEFAULT_UNROLL_N 4
3364+
3365+
#define ZGEMM_DEFAULT_UNROLL_M 4
3366+
#define ZGEMM_DEFAULT_UNROLL_N 4
3367+
3368+
#if defined(XDOUBLE) || defined(DOUBLE)
3369+
#define SWITCH_RATIO 8
3370+
#else
3371+
#define SWITCH_RATIO 16
3372+
#endif
3373+
3374+
#define SGEMM_DEFAULT_P 256
3375+
#define DGEMM_DEFAULT_P 128
3376+
#define CGEMM_DEFAULT_P 128
3377+
#define ZGEMM_DEFAULT_P 64
3378+
3379+
#define SGEMM_DEFAULT_Q 512
3380+
#define DGEMM_DEFAULT_Q 256
3381+
#define CGEMM_DEFAULT_Q 256
3382+
#define ZGEMM_DEFAULT_Q 256
3383+
3384+
#define SGEMM_DEFAULT_R 4096
3385+
#define DGEMM_DEFAULT_R 4096
3386+
#define CGEMM_DEFAULT_R 4096
3387+
#define ZGEMM_DEFAULT_R 4096
3388+
33543389
#elif defined(CORTEXA53) || defined(CORTEXA55)
33553390

33563391
#define SGEMM_DEFAULT_UNROLL_M 8

0 commit comments

Comments
 (0)