Skip to content

Commit 067e43c

Browse files
authored
Merge pull request #5575 from martin-frbg/woa-neozdot
Make the thunderx2 zdot kernel compatible with LLVM21 in Windows on Arm
2 parents 0ff51a4 + d39b777 commit 067e43c

File tree

2 files changed

+30
-17
lines changed

2 files changed

+30
-17
lines changed

kernel/arm64/KERNEL.NEOVERSEN1

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,8 @@ ZNRM2KERNEL = znrm2.S
102102

103103
DDOTKERNEL = dot.c
104104
SDOTKERNEL = dot.c
105-
ifeq ($(OSNAME), WINNT)
106-
ifeq ($(C_COMPILER), CLANG)
107-
CDOTKERNEL = zdot.S
108-
ZDOTKERNEL = zdot.S
109-
else
110-
CDOTKERNEL = zdot_thunderx2t99.c
111-
ZDOTKERNEL = zdot_thunderx2t99.c
112-
endif
113-
else
114105
CDOTKERNEL = zdot_thunderx2t99.c
115106
ZDOTKERNEL = zdot_thunderx2t99.c
116-
endif
117107
DSDOTKERNEL = dot.S
118108

119109
DGEMM_BETA = dgemm_beta.S

kernel/arm64/zdot_thunderx2t99.c

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232
#endif
3333

3434
#include "common.h"
35-
35+
#ifdef _MSC_VER
36+
#include <complex.h>
37+
#endif
3638
#include <arm_neon.h>
3739

3840
#define N "x0" /* vector length */
@@ -197,14 +199,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
197199
#if defined(SMP)
198200
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
199201
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
200-
void *c, BLASLONG ldc, int (*function)(), int nthreads);
202+
void *c, BLASLONG ldc, int (*function)(void), int nthreads);
201203
#endif
202204

203205
static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, OPENBLAS_COMPLEX_FLOAT *result)
204206
{
205-
FLOAT dotr = 0.0, doti = 0.0;
207+
FLOAT dotr = 0.0, doti = 0.0;
208+
209+
#ifdef _MSC_VER
210+
CREAL(*result) = 0.0;
211+
CIMAG(*result) = 0.0;
212+
#else
206213
OPENBLAS_COMPLEX_FLOAT cf = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0);
207-
*result = cf;
214+
*result = cf;
215+
#endif
208216

209217
if ( n < 0 ) return;
210218

@@ -235,8 +243,9 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
235243
" asr "J", "N", #"N_DIV_SHIFT" \n"
236244
" cmp "J", xzr \n"
237245
" beq 3f //dot_kernel_F1 \n"
238-
246+
#ifndef _MSC_VER
239247
" .align 5 \n"
248+
#endif
240249
"2: //dot_kernel_F: \n"
241250
" "KERNEL_F" \n"
242251
" subs "J", "J", #1 \n"
@@ -297,10 +306,14 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
297306
"v23", "v24", "v25", "v26", "v27", "v28", "v29",
298307
"v30", "v31"
299308
);
300-
309+
#ifdef _MSC_VER
310+
CREAL(*result) = dotr;
311+
CIMAG(*result) = doti;
312+
#else
301313
cf=OPENBLAS_MAKE_COMPLEX_FLOAT(dotr, doti);
302314
*result = cf;
303-
return;
315+
#endif
316+
return;
304317
}
305318

306319
#if defined(SMP)
@@ -320,7 +333,13 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
320333
int nthreads;
321334
FLOAT dummy_alpha;
322335
#endif
336+
#ifdef _MSC_VER
337+
OPENBLAS_COMPLEX_FLOAT zdot;
338+
CREAL(zdot) = 0.0;
339+
CIMAG(zdot) = 0.0;
340+
#else
323341
OPENBLAS_COMPLEX_FLOAT zdot = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0,0.0);
342+
#endif
324343

325344
#if defined(SMP)
326345
if (inc_x == 0 || inc_y == 0 || n <= 10000)
@@ -347,7 +366,11 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
347366

348367
ptr = (OPENBLAS_COMPLEX_FLOAT *)result;
349368
for (i = 0; i < nthreads; i++) {
369+
#ifdef _MSC_VER
370+
CREAL(zdot)+= CREAL(*ptr);CIMAG(zdot)+=CIMAG(*ptr);
371+
#else
350372
zdot = OPENBLAS_MAKE_COMPLEX_FLOAT (CREAL(zdot) + CREAL(*ptr), CIMAG(zdot) + CIMAG(*ptr));
373+
#endif
351374
ptr = (void *)(((char *)ptr) + sizeof(double) * 2);
352375
}
353376
}

0 commit comments

Comments
 (0)