Skip to content

Commit 531c6b9

Browse files
authored
Merge pull request #34 from xianyi/develop
rebase
2 parents 75577f9 + ddcbed6 commit 531c6b9

File tree

13 files changed

+151
-27
lines changed

13 files changed

+151
-27
lines changed

Makefile.system

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ DYNAMIC_CORE += FALKOR
558558
DYNAMIC_CORE += THUNDERX
559559
DYNAMIC_CORE += THUNDERX2T99
560560
DYNAMIC_CORE += TSV110
561+
DYNAMIC_CORE += EMAG8180
561562
endif
562563

563564
ifeq ($(ARCH), zarch)

benchmark/copy.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ int main(int argc, char *argv[]){
129129
int step = 1;
130130

131131
struct timeval start, stop;
132-
double time1,timeg;
132+
double time1 = 0.0, timeg = 0.0;
133+
long nanos = 0;
134+
time_t seconds = 0;
135+
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
133136

134137
argc--;argv++;
135138

@@ -163,35 +166,32 @@ int main(int argc, char *argv[]){
163166
timeg=0;
164167

165168
fprintf(stderr, " %6d : ", (int)m);
169+
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
170+
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
171+
}
166172

173+
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
174+
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
175+
}
167176

168177
for (l=0; l<loops; l++)
169178
{
179+
clock_gettime(CLOCK_REALTIME, &time_start);
180+
COPY (&m, x, &inc_x, y, &inc_y );
181+
clock_gettime(CLOCK_REALTIME, &time_end);
170182

171-
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
172-
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
173-
}
183+
nanos = time_end.tv_nsec - time_start.tv_nsec;
184+
seconds = time_end.tv_sec - time_start.tv_sec;
174185

175-
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
176-
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
177-
}
178-
gettimeofday( &start, (struct timezone *)0);
186+
time1 = seconds + nanos / 1.e9;
187+
timeg += time1;
188+
}
179189

180-
COPY (&m, x, &inc_x, y, &inc_y );
190+
timeg /= loops;
181191

182-
gettimeofday( &stop, (struct timezone *)0);
183-
184-
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
185-
186-
timeg += time1;
187-
188-
}
189-
190-
timeg /= loops;
191-
192-
fprintf(stderr,
193-
" %10.2f MBytes %10.6f sec\n",
194-
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
192+
fprintf(stderr,
193+
" %10.2f MBytes %12.9f sec\n",
194+
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);
195195

196196
}
197197

cmake/arch.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ endif ()
4545

4646
if (DYNAMIC_ARCH)
4747
if (ARM64)
48-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110)
48+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180)
4949
endif ()
5050

5151
if (POWER)

cmake/prebuild.cmake

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,29 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
332332
set(ZGEMM_UNROLL_M 4)
333333
set(ZGEMM_UNROLL_N 4)
334334
set(SYMV_P 16)
335+
elseif ("${TCORE}" STREQUAL "EMAG8180")
336+
file(APPEND ${TARGET_CONF_TEMP}
337+
"#define ARMV8\n"
338+
"#define L1_CODE_SIZE\t32768\n"
339+
"#define L1_CODE_LINESIZE\t64\n"
340+
"#define L1_CODE_ASSOCIATIVE\t4\n"
341+
"#define L1_DATA_SIZE\t32768\n"
342+
"#define L1_DATA_LINESIZE\t64\n"
343+
"#define L1_DATA_ASSOCIATIVE\t4\n"
344+
"#define L2_SIZE\t5262144\n"
345+
"#define L2_LINESIZE\t64\n"
346+
"#define L2_ASSOCIATIVE\t8\n"
347+
"#define DTB_DEFAULT_ENTRIES\t64\n"
348+
"#define DTB_SIZE\t4096\n")
349+
set(SGEMM_UNROLL_M 16)
350+
set(SGEMM_UNROLL_N 4)
351+
set(DGEMM_UNROLL_M 8)
352+
set(DGEMM_UNROLL_N 4)
353+
set(CGEMM_UNROLL_M 8)
354+
set(CGEMM_UNROLL_N 4)
355+
set(ZGEMM_UNROLL_M 4)
356+
set(ZGEMM_UNROLL_N 4)
357+
set(SYMV_P 16)
335358
elseif ("${TCORE}" STREQUAL "POWER6")
336359
file(APPEND ${TARGET_CONF_TEMP}
337360
"#define L1_DATA_SIZE 32768\n"

cpuid_arm64.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
#define CPU_THUNDERX2T99 8
4242
//Hisilicon
4343
#define CPU_TSV110 9
44+
// Ampere
45+
#define CPU_EMAG8180 10
4446

4547
static char *cpuname[] = {
4648
"UNKNOWN",
@@ -52,7 +54,8 @@ static char *cpuname[] = {
5254
"FALKOR",
5355
"THUNDERX",
5456
"THUNDERX2T99",
55-
"TSV110"
57+
"TSV110",
58+
"EMAG8180"
5659
};
5760

5861
static char *cpuname_lower[] = {
@@ -65,7 +68,8 @@ static char *cpuname_lower[] = {
6568
"falkor",
6669
"thunderx",
6770
"thunderx2t99",
68-
"tsv110"
71+
"tsv110",
72+
"emag8180"
6973
};
7074

7175
int get_feature(char *search)
@@ -152,6 +156,9 @@ int detect(void)
152156
// HiSilicon
153157
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
154158
return CPU_TSV110;
159+
// Ampere
160+
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
161+
return CPU_EMAG8180;
155162
}
156163

157164
p = (char *) NULL ;
@@ -335,6 +342,18 @@ void get_cpuconfig(void)
335342
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
336343
printf("#define DTB_SIZE 4096 \n");
337344
break;
345+
346+
case CPU_EMAG8180:
347+
// Minimum parameters for ARMv8 (based on A53)
348+
printf("#define EMAG8180\n");
349+
printf("#define L1_CODE_SIZE 32768\n");
350+
printf("#define L1_DATA_SIZE 32768\n");
351+
printf("#define L1_DATA_LINESIZE 64\n");
352+
printf("#define L2_SIZE 262144\n");
353+
printf("#define L2_LINESIZE 64\n");
354+
printf("#define DTB_DEFAULT_ENTRIES 64\n");
355+
printf("#define DTB_SIZE 4096\n");
356+
338357
}
339358
get_cpucount();
340359
}

driver/others/dynamic_arm64.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,11 @@ extern gotoblas_t gotoblas_FALKOR;
5151
extern gotoblas_t gotoblas_THUNDERX;
5252
extern gotoblas_t gotoblas_THUNDERX2T99;
5353
extern gotoblas_t gotoblas_TSV110;
54+
extern gotoblas_t gotoblas_EMAG8180;
5455

5556
extern void openblas_warning(int verbose, const char * msg);
5657

57-
#define NUM_CORETYPES 9
58+
#define NUM_CORETYPES 10
5859

5960
/*
6061
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -78,6 +79,7 @@ static char *corename[] = {
7879
"thunderx",
7980
"thunderx2t99",
8081
"tsv110",
82+
"emag8180",
8183
"unknown"
8284
};
8385

@@ -91,6 +93,7 @@ char *gotoblas_corename(void) {
9193
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
9294
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
9395
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
96+
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
9497
return corename[NUM_CORETYPES];
9598
}
9699

@@ -119,6 +122,7 @@ static gotoblas_t *force_coretype(char *coretype) {
119122
case 6: return (&gotoblas_THUNDERX);
120123
case 7: return (&gotoblas_THUNDERX2T99);
121124
case 8: return (&gotoblas_TSV110);
125+
case 9: return (&gotoblas_EMAG8180);
122126
}
123127
snprintf(message, 128, "Core not found: %s\n", coretype);
124128
openblas_warning(1, message);
@@ -189,6 +193,13 @@ static gotoblas_t *get_coretype(void) {
189193
return &gotoblas_TSV110;
190194
}
191195
break;
196+
case 0x50: // Ampere
197+
switch (part)
198+
{
199+
case 0x000: // Skylark/EMAG8180
200+
return &gotoblas_EMAG8180;
201+
}
202+
break;
192203
case 0x51: // Qualcomm
193204
switch (part)
194205
{

getarch.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10931093
#else
10941094
#endif
10951095

1096+
#ifdef FORCE_EMAG8180
1097+
#define ARMV8
1098+
#define FORCE
1099+
#define ARCHITECTURE "ARM64"
1100+
#define SUBARCHITECTURE "EMAG8180"
1101+
#define SUBDIRNAME "arm64"
1102+
#define ARCHCONFIG "-DEMAG8180 " \
1103+
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
1104+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
1105+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
1106+
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
1107+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1108+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
1109+
#define LIBNAME "emag8180"
1110+
#define CORENAME "EMAG8180"
1111+
#endif
10961112

10971113
#ifdef FORCE_ZARCH_GENERIC
10981114
#define FORCE

kernel/arm64/KERNEL.EMAG8180

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include $(KERNELDIR)/KERNEL.CORTEXA57
2+
3+

kernel/mips64/KERNEL

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,27 @@ endif
167167

168168
CGEMM3MKERNEL = zgemm3m_kernel.S
169169
ZGEMM3MKERNEL = zgemm3m_kernel.S
170+
171+
ifndef ISMINKERNEL
172+
ISMINKERNEL = imin.S
173+
endif
174+
175+
ifndef IDMINKERNEL
176+
IDMINKERNEL = imin.S
177+
endif
178+
179+
ifndef IQMINKERNEL
180+
IQMINKERNEL = imin.S
181+
endif
182+
183+
ifndef ISMAXKERNEL
184+
ISMAXKERNEL = imax.S
185+
endif
186+
187+
ifndef IDMAXKERNEL
188+
IDMAXKERNEL = imax.S
189+
endif
190+
191+
ifndef IQMAXKERNEL
192+
IQMAXKERNEL = imax.S
193+
endif

kernel/power/KERNEL

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,26 @@ ifndef DSDOTKERNEL
5050
DSDOTKERNEL = ../generic/dot.c
5151
endif
5252

53+
ifndef ISMINKERNEL
54+
ISMINKERNEL = imin.S
55+
endif
56+
57+
ifndef IDMINKERNEL
58+
IDMINKERNEL = imin.S
59+
endif
60+
61+
ifndef IQMINKERNEL
62+
IQMINKERNEL = imin.S
63+
endif
64+
65+
ifndef ISMAXKERNEL
66+
ISMAXKERNEL = imax.S
67+
endif
68+
69+
ifndef IDMAXKERNEL
70+
IDMAXKERNEL = imax.S
71+
endif
72+
73+
ifndef IQMAXKERNEL
74+
IQMAXKERNEL = imax.S
75+
endif

0 commit comments

Comments
 (0)