Skip to content

Commit d5aeff6

Browse files
author
Ashwin Sekhar T K
committed
ARM64: Enable DYNAMIC_ARCH
Enable DYNAMIC_ARCH feature on ARM64. This patch uses the cpuid feature in linux kernel to detect the core type at runtime (https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt). If this feature is missing in kernel, then the user should use the OPENBLAS_CORETYPE env variable to select the desired core type.
1 parent af2837c commit d5aeff6

File tree

8 files changed

+350
-64
lines changed

8 files changed

+350
-64
lines changed

Makefile.system

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT)
510510
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
511511
endif
512512

513+
ifeq ($(ARCH), arm64)
514+
DYNAMIC_CORE = ARMV8
515+
DYNAMIC_CORE += CORTEXA57
516+
DYNAMIC_CORE += THUNDERX
517+
DYNAMIC_CORE += THUNDERX2T99
518+
endif
519+
513520
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
514521
ifndef DYNAMIC_CORE
515522
override DYNAMIC_ARCH=

driver/others/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ endif
1515
# COMMONOBJS += info.$(SUFFIX)
1616

1717
ifeq ($(DYNAMIC_ARCH), 1)
18+
ifeq ($(ARCH),arm64)
19+
COMMONOBJS += dynamic_arm64.$(SUFFIX)
20+
else
1821
COMMONOBJS += dynamic.$(SUFFIX)
22+
endif
1923
else
2024
COMMONOBJS += parameter.$(SUFFIX)
2125
endif
@@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c
7175
endif
7276

7377
ifeq ($(DYNAMIC_ARCH), 1)
78+
ifeq ($(ARCH),arm64)
79+
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
80+
else
7481
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
82+
endif
7583
else
7684
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
7785
endif

driver/others/dynamic_arm64.c

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#include "common.h"
40+
#include <asm/hwcap.h>
41+
#include <sys/auxv.h>
42+
43+
extern gotoblas_t gotoblas_ARMV8;
44+
extern gotoblas_t gotoblas_CORTEXA57;
45+
extern gotoblas_t gotoblas_THUNDERX;
46+
extern gotoblas_t gotoblas_THUNDERX2T99;
47+
48+
extern void openblas_warning(int verbose, const char * msg);
49+
50+
#define NUM_CORETYPES 4
51+
52+
/*
53+
* In case asm/hwcap.h is outdated on the build system, make sure
54+
* that HWCAP_CPUID is defined
55+
*/
56+
#ifndef HWCAP_CPUID
57+
#define HWCAP_CPUID (1 << 11)
58+
#endif
59+
60+
#define get_cpu_ftr(id, var) ({ \
61+
asm("mrs %0, "#id : "=r" (var)); \
62+
})
63+
64+
static char *corename[] = {
65+
"armv8",
66+
"cortexa57",
67+
"thunderx",
68+
"thunderx2t99",
69+
"unknown"
70+
};
71+
72+
char *gotoblas_corename(void) {
73+
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
74+
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
75+
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
76+
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
77+
return corename[NUM_CORETYPES];
78+
}
79+
80+
static gotoblas_t *force_coretype(char *coretype) {
81+
int i ;
82+
int found = -1;
83+
char message[128];
84+
85+
for ( i=0 ; i < NUM_CORETYPES; i++)
86+
{
87+
if (!strncasecmp(coretype, corename[i], 20))
88+
{
89+
found = i;
90+
break;
91+
}
92+
}
93+
94+
switch (found)
95+
{
96+
case 0: return (&gotoblas_ARMV8);
97+
case 1: return (&gotoblas_CORTEXA57);
98+
case 2: return (&gotoblas_THUNDERX);
99+
case 3: return (&gotoblas_THUNDERX2T99);
100+
}
101+
snprintf(message, 128, "Core not found: %s\n", coretype);
102+
openblas_warning(1, message);
103+
return NULL;
104+
}
105+
106+
static gotoblas_t *get_coretype(void) {
107+
int implementer, variant, part, arch, revision, midr_el1;
108+
109+
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
110+
char coremsg[128];
111+
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
112+
openblas_warning(1, coremsg);
113+
return NULL;
114+
}
115+
116+
get_cpu_ftr(MIDR_EL1, midr_el1);
117+
/*
118+
* MIDR_EL1
119+
*
120+
* 31 24 23 20 19 16 15 4 3 0
121+
* -----------------------------------------------------------------
122+
* | Implementer | Variant | Architecture | Part Number | Revision |
123+
* -----------------------------------------------------------------
124+
*/
125+
implementer = (midr_el1 >> 24) & 0xFF;
126+
part = (midr_el1 >> 4) & 0xFFF;
127+
128+
switch(implementer)
129+
{
130+
case 0x41: // ARM
131+
switch (part)
132+
{
133+
case 0xd07: // Cortex A57
134+
case 0xd08: // Cortex A72
135+
case 0xd03: // Cortex A53
136+
return &gotoblas_CORTEXA57;
137+
}
138+
break;
139+
case 0x42: // Broadcom
140+
switch (part)
141+
{
142+
case 0x516: // Vulcan
143+
return &gotoblas_THUNDERX2T99;
144+
}
145+
break;
146+
case 0x43: // Cavium
147+
switch (part)
148+
{
149+
case 0x0a1: // ThunderX
150+
return &gotoblas_THUNDERX;
151+
case 0x0af: // ThunderX2
152+
return &gotoblas_THUNDERX2T99;
153+
}
154+
break;
155+
}
156+
return NULL;
157+
}
158+
159+
void gotoblas_dynamic_init(void) {
160+
161+
char coremsg[128];
162+
char coren[22];
163+
char *p;
164+
165+
if (gotoblas) return;
166+
167+
p = getenv("OPENBLAS_CORETYPE");
168+
if ( p )
169+
{
170+
gotoblas = force_coretype(p);
171+
}
172+
else
173+
{
174+
gotoblas = get_coretype();
175+
}
176+
177+
if (gotoblas == NULL)
178+
{
179+
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
180+
openblas_warning(1, coremsg);
181+
gotoblas = &gotoblas_ARMV8;
182+
}
183+
184+
if (gotoblas && gotoblas->init) {
185+
strncpy(coren, gotoblas_corename(), 20);
186+
sprintf(coremsg, "Core: %s\n", coren);
187+
openblas_warning(2, coremsg);
188+
gotoblas -> init();
189+
} else {
190+
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
191+
exit(1);
192+
}
193+
194+
}
195+
196+
void gotoblas_dynamic_quit(void) {
197+
gotoblas = NULL;
198+
}

kernel/arm64/KERNEL.ARMV8

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
113113
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
114114
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
115115
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
116-
SGEMMINCOPYOBJ = sgemm_incopy.o
117-
SGEMMITCOPYOBJ = sgemm_itcopy.o
116+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
117+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
118118
endif
119119
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
120120
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
121-
SGEMMONCOPYOBJ = sgemm_oncopy.o
122-
SGEMMOTCOPYOBJ = sgemm_otcopy.o
121+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
122+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
123123

124124
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
125125
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
@@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
134134
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
135135
endif
136136

137-
DGEMMINCOPYOBJ = dgemm_incopy.o
138-
DGEMMITCOPYOBJ = dgemm_itcopy.o
137+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
138+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
139139
endif
140140

141141
ifeq ($(DGEMM_UNROLL_N), 4)
@@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
146146
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
147147
endif
148148

149-
DGEMMONCOPYOBJ = dgemm_oncopy.o
150-
DGEMMOTCOPYOBJ = dgemm_otcopy.o
149+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
150+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
151151

152152
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
153153
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
154154
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
155155
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
156156
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
157-
CGEMMINCOPYOBJ = cgemm_incopy.o
158-
CGEMMITCOPYOBJ = cgemm_itcopy.o
157+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
158+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
159159
endif
160160
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
161161
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
162-
CGEMMONCOPYOBJ = cgemm_oncopy.o
163-
CGEMMOTCOPYOBJ = cgemm_otcopy.o
162+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
163+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
164164

165165
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
166166
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
167167
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
168168
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
169169
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
170-
ZGEMMINCOPYOBJ = zgemm_incopy.o
171-
ZGEMMITCOPYOBJ = zgemm_itcopy.o
170+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
171+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
172172
endif
173173
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
174174
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
175-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
176-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
175+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
176+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
177177

178178
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
179179
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
@@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
201201
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
202202
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
203203
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
204-
SGEMMONCOPYOBJ = sgemm_oncopy.o
205-
SGEMMOTCOPYOBJ = sgemm_otcopy.o
204+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
205+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
206206

207207
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
208208
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
209209
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
210-
DGEMMONCOPYOBJ = dgemm_oncopy.o
211-
DGEMMOTCOPYOBJ = dgemm_otcopy.o
210+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
211+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
212212

213213
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
214214
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
215215
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
216-
CGEMMONCOPYOBJ = cgemm_oncopy.o
217-
CGEMMOTCOPYOBJ = cgemm_otcopy.o
216+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
217+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
218218

219219
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
220220
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
221221
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
222-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
223-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
222+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
223+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
224224

225225
endif

0 commit comments

Comments
 (0)