Skip to content

Commit 6144004

Browse files
committed
Fix xtheadvector compilation
1 parent 4a94ef5 commit 6144004

File tree

3 files changed

+64
-70
lines changed

3 files changed

+64
-70
lines changed

common_macro.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2709,7 +2709,7 @@
27092709
#ifndef ASSEMBLER
27102710
#if !defined(DYNAMIC_ARCH) \
27112711
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
2712-
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
2712+
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA) || defined(ARCH_RISCV64))
27132713
extern BLASLONG gemm_offset_a;
27142714
extern BLASLONG gemm_offset_b;
27152715
extern BLASLONG bgemm_p;

common_riscv64.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
9393
# include <riscv_vector.h>
9494
#endif
9595

96-
#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 )
97-
// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this
98-
#define RISCV_0p10_INTRINSICS
99-
#define RISCV_RVV(x) x
100-
#else
10196
#define RISCV_RVV(x) __riscv_ ## x
102-
#endif
10397

10498
#if defined(C910V) || defined(RISCV64_ZVL256B)
10599
# if !defined(DOUBLE)

kernel/riscv64/dsdot_vector.c

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -37,114 +37,114 @@ double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
3737
vfloat32m2_t vx, vy;
3838
unsigned int gvl = 0;
3939
vfloat64m1_t v_res, v_z0;
40-
gvl = vsetvlmax_e64m1();
41-
v_res = vfmv_v_f_f64m1(0, gvl);
42-
v_z0 = vfmv_v_f_f64m1(0, gvl);
40+
gvl = __riscv_vsetvlmax_e64m1();
41+
v_res = __riscv_vfmv_v_f_f64m1(0, gvl);
42+
v_z0 = __riscv_vfmv_v_f_f64m1(0, gvl);
4343

4444
if(inc_x == 1 && inc_y == 1){
45-
gvl = vsetvl_e64m4(n);
46-
vr = vfmv_v_f_f64m4(0, gvl);
45+
gvl = __riscv_vsetvl_e64m4(n);
46+
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
4747
for(i=0,j=0; i<n/gvl; i++){
48-
vx = vle32_v_f32m2(&x[j], gvl);
49-
vy = vle32_v_f32m2(&y[j], gvl);
50-
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
48+
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
49+
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
50+
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
5151
j += gvl;
5252
}
5353
if(j > 0){
54-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
55-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
54+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
55+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
5656
}
5757
//tail
5858
if(j < n){
59-
gvl = vsetvl_e64m4(n-j);
60-
vx = vle32_v_f32m2(&x[j], gvl);
61-
vy = vle32_v_f32m2(&y[j], gvl);
62-
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
63-
//vr = vfdot_vv_f32m2(vx, vy, gvl);
64-
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
65-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
66-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
59+
gvl = __riscv_vsetvl_e64m4(n-j);
60+
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
61+
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
62+
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
63+
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
64+
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
65+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
66+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
6767
}
6868
}else if(inc_y == 1){
69-
gvl = vsetvl_e64m4(n);
70-
vr = vfmv_v_f_f64m4(0, gvl);
69+
gvl = __riscv_vsetvl_e64m4(n);
70+
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
7171
int stride_x = inc_x * sizeof(FLOAT);
7272
for(i=0,j=0; i<n/gvl; i++){
73-
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
74-
vy = vle32_v_f32m2(&y[j], gvl);
75-
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
73+
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
74+
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
75+
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
7676
j += gvl;
7777
}
7878
if(j > 0){
79-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
80-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
79+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
80+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
8181

8282
}
8383
//tail
8484
if(j < n){
85-
gvl = vsetvl_e64m4(n-j);
86-
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
87-
vy = vle32_v_f32m2(&y[j], gvl);
88-
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
89-
//vr = vfdot_vv_f32m2(vx, vy, gvl);
90-
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
91-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
92-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
85+
gvl = __riscv_vsetvl_e64m4(n-j);
86+
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
87+
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
88+
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
89+
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
90+
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
91+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
92+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
9393

9494
}
9595
}else if(inc_x == 1){
96-
gvl = vsetvl_e64m4(n);
97-
vr = vfmv_v_f_f64m4(0, gvl);
96+
gvl = __riscv_vsetvl_e64m4(n);
97+
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
9898
int stride_y = inc_y * sizeof(FLOAT);
9999
for(i=0,j=0; i<n/gvl; i++){
100-
vx = vle32_v_f32m2(&x[j], gvl);
101-
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
102-
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
100+
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
101+
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
102+
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
103103
j += gvl;
104104
}
105105
if(j > 0){
106-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
107-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
106+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
107+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
108108

109109
}
110110
//tail
111111
if(j < n){
112-
gvl = vsetvl_e64m4(n-j);
113-
vx = vle32_v_f32m2(&x[j], gvl);
114-
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
115-
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
116-
//vr = vfdot_vv_f32m2(vx, vy, gvl);
117-
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
118-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
119-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
112+
gvl = __riscv_vsetvl_e64m4(n-j);
113+
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
114+
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
115+
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
116+
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
117+
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
118+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
119+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
120120

121121
}
122122
}else{
123-
gvl = vsetvl_e64m4(n);
124-
vr = vfmv_v_f_f64m4(0, gvl);
123+
gvl = __riscv_vsetvl_e64m4(n);
124+
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
125125
int stride_x = inc_x * sizeof(FLOAT);
126126
int stride_y = inc_y * sizeof(FLOAT);
127127
for(i=0,j=0; i<n/gvl; i++){
128-
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
129-
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
130-
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
128+
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
129+
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
130+
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
131131
j += gvl;
132132
}
133133
if(j > 0){
134-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
135-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
134+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
135+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
136136

137137
}
138138
//tail
139139
if(j < n){
140-
gvl = vsetvl_e64m4(n-j);
141-
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
142-
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
143-
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
144-
//vr = vfdot_vv_f32m2(vx, vy, gvl);
145-
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
146-
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
147-
dot += (double)vfmv_f_s_f64m1_f64(v_res);
140+
gvl = __riscv_vsetvl_e64m4(n-j);
141+
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
142+
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
143+
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
144+
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
145+
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
146+
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
147+
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
148148

149149
}
150150
}

0 commit comments

Comments
 (0)