Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,16 @@ endif
endif
endif

# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
ifeq ($(CORE), AMPERE1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
endif
endif
endif

# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
Expand Down
2 changes: 2 additions & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13)
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14)
# Note that the behavior of -dumpversion is compile-time-configurable for
# gcc-7.x and newer. Use -dumpfullversion there
ifeq ($(GCCVERSIONGTEQ7),1)
Expand Down
27 changes: 25 additions & 2 deletions cpuid_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ size_t length64=sizeof(value64);
#define CPU_TSV110 9
// Ampere
#define CPU_EMAG8180 10
#define CPU_AMPERE1 25
// Apple
#define CPU_VORTEX 13
// Fujitsu
Expand Down Expand Up @@ -111,7 +112,8 @@ static char *cpuname[] = {
"CORTEXA710",
"FT2000",
"CORTEXA76",
"NEOVERSEV2"
"NEOVERSEV2",
"AMPERE1"
};

static char *cpuname_lower[] = {
Expand Down Expand Up @@ -139,7 +141,9 @@ static char *cpuname_lower[] = {
"cortexa710",
"ft2000",
"cortexa76",
"neoversev2"
"neoversev2",
"ampere1",
"ampere1a"
};

static int cpulowperf=0;
Expand Down Expand Up @@ -334,6 +338,10 @@ int detect(void)
// Ampere
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
return CPU_EMAG8180;
else if (strstr(cpu_implementer, "0xc0")) {
if (strstr(cpu_part, "0xac3") || strstr(cpu_part, "0xac4"))
return CPU_AMPERE1;
}
// Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX;
Expand Down Expand Up @@ -684,6 +692,21 @@ void get_cpuconfig(void)
printf("#define DTB_SIZE 4096\n");
break;

case CPU_AMPERE1:
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 16384\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 8\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;

case CPU_THUNDERX3T110:
printf("#define THUNDERX3T110 \n");
printf("#define L1_CODE_SIZE 65536 \n");
Expand Down
18 changes: 17 additions & 1 deletion getarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_CSKY */
/* #define FORCE_CK860FV */
/* #define FORCE_GENERIC */
/* #define FORCE_AMPERE1 */

#ifdef FORCE_P2
#define FORCE
Expand Down Expand Up @@ -1590,6 +1591,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EMAG8180"
#endif

#ifdef FORCE_AMPERE1
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "AMPERE1"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DAMPERE1 " \
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \
"-march=armv8.6-a+crypto+crc+fp16+sha3+rng"
#define LIBNAME "ampere1"
#define CORENAME "AMPERE1"
#endif

#ifdef FORCE_THUNDERX3T110
#define ARMV8
#define FORCE
Expand Down Expand Up @@ -1820,7 +1837,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "CK860FV"
#endif


#ifndef FORCE

#ifdef USER_TARGET
Expand Down
1 change: 1 addition & 0 deletions kernel/arm64/KERNEL.AMPERE1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.NEOVERSEN1
35 changes: 35 additions & 0 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -3635,6 +3635,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#elif defined(AMPERE1)

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#else
#define SWITCH_RATIO 16
#endif

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4

#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4

#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4

#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#define SGEMM_DEFAULT_P 240
#define DGEMM_DEFAULT_P 240
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 640
#define DGEMM_DEFAULT_Q 320
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112

#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#elif defined(A64FX) // 512-bit SVE

/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
Expand Down
Loading