Skip to content

Commit ceb44be

Browse files
committed
update the intrinsic api to the offical name.
1 parent ed47326 commit ceb44be

39 files changed

+1628
-1479
lines changed

kernel/riscv64/amax_vector.c

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
#include <math.h>
3030

3131
#if !defined(DOUBLE)
32-
#define RVV_EFLOAT RVV_E32
33-
#define RVV_M RVV_M8
34-
#define FLOAT_V_T float32xm8_t
35-
#define VLEV_FLOAT vlev_float32xm8
36-
#define VLSEV_FLOAT vlsev_float32xm8
37-
#define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8
38-
#define MASK_T e32xm8_t
39-
#define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8
40-
#define VFMVVF_FLOAT vfmvvf_float32xm8
41-
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8
42-
#define VFMAXVV_FLOAT vfmaxvv_float32xm8
32+
#define VSETVL(n) vsetvl_e32m8(n)
33+
#define VSETVL_MAX vsetvlmax_e32m1()
34+
#define FLOAT_V_T vfloat32m8_t
35+
#define FLOAT_V_T_M1 vfloat32m1_t
36+
#define VLEV_FLOAT vle_v_f32m8
37+
#define VLSEV_FLOAT vlse_v_f32m8
38+
#define VFREDMAXVS_FLOAT vfredmax_vs_f32m8_f32m1
39+
#define MASK_T vbool4_t
40+
#define VMFLTVF_FLOAT vmflt_vf_f32m8_b4
41+
#define VFMVVF_FLOAT vfmv_v_f_f32m8
42+
#define VFMVVF_FLOAT_M1 vfmv_v_f_f32m1
43+
#define VFRSUBVF_MASK_FLOAT vfrsub_vf_f32m8_m
44+
#define VFMAXVV_FLOAT vfmax_vv_f32m8
4345
#else
44-
#define RVV_EFLOAT RVV_E64
45-
#define RVV_M RVV_M8
46-
#define FLOAT_V_T float64xm8_t
47-
#define VLEV_FLOAT vlev_float64xm8
48-
#define VLSEV_FLOAT vlsev_float64xm8
49-
#define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8
50-
#define MASK_T e64xm8_t
51-
#define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8
52-
#define VFMVVF_FLOAT vfmvvf_float64xm8
53-
#define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8
54-
#define VFMAXVV_FLOAT vfmaxvv_float64xm8
46+
#define VSETVL(n) vsetvl_e64m8(n)
47+
#define VSETVL_MAX vsetvlmax_e64m1()
48+
#define FLOAT_V_T vfloat64m8_t
49+
#define FLOAT_V_T_M1 vfloat64m1_t
50+
#define VLEV_FLOAT vle_v_f64m8
51+
#define VLSEV_FLOAT vlse_v_f64m8
52+
#define VFREDMAXVS_FLOAT vfredmax_vs_f64m8_f64m1
53+
#define MASK_T vbool8_t
54+
#define VMFLTVF_FLOAT vmflt_vf_f64m8_b8
55+
#define VFMVVF_FLOAT vfmv_v_f_f64m8
56+
#define VFMVVF_FLOAT_M1 vfmv_v_f_f64m1
57+
#define VFRSUBVF_MASK_FLOAT vfrsub_vf_f64m8_m
58+
#define VFMAXVV_FLOAT vfmax_vv_f64m8
5559
#endif
5660

5761
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
@@ -62,19 +66,25 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
6266
if (n <= 0 || inc_x <= 0) return(maxf);
6367
unsigned int gvl = 0;
6468
FLOAT_V_T v0, v1, v_max;
69+
FLOAT_V_T_M1 v_res, v_zero;
70+
gvl = VSETVL_MAX;
71+
v_res = VFMVVF_FLOAT_M1(0, gvl);
72+
v_zero = VFMVVF_FLOAT_M1(0, gvl);
6573

6674
MASK_T mask0, mask1;
6775
FLOAT zero = 0.0;
6876
if(inc_x == 1){
69-
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
77+
gvl = VSETVL(n);
7078
if(gvl <= n/2){
7179
v_max = VFMVVF_FLOAT(0, gvl);
7280
for(i=0,j=0; i<n/(gvl*2); i++){
7381
v0 = VLEV_FLOAT(&x[j], gvl);
82+
v1 = VLEV_FLOAT(&x[j+gvl], gvl);
7483
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
7584
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
7685
#if defined(DOUBLE)
7786
asm volatile(
87+
"vsetvli zero, zero, e8, m1\n\t"
7888
"vor.vv v0, %1, %1\n\t"
7989
"vsetvli x0, %3, e64,m8 \n\t"
8090
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -83,6 +93,7 @@ asm volatile(
8393
:"v0");
8494
#else
8595
asm volatile(
96+
"vsetvli zero, zero, e8, m1\n\t"
8697
"vor.vv v0, %1, %1\n\t"
8798
"vsetvli x0, %3, e32,m8 \n\t"
8899
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -98,6 +109,7 @@ asm volatile(
98109
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
99110
#if defined(DOUBLE)
100111
asm volatile(
112+
"vsetvli zero, zero, e8, m1\n\t"
101113
"vor.vv v0, %1, %1\n\t"
102114
"vsetvli x0, %3, e64,m8 \n\t"
103115
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -106,6 +118,7 @@ asm volatile(
106118
:"v0");
107119
#else
108120
asm volatile(
121+
"vsetvli zero, zero, e8, m1\n\t"
109122
"vor.vv v0, %1, %1\n\t"
110123
"vsetvli x0, %3, e32,m8 \n\t"
111124
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -117,17 +130,17 @@ asm volatile(
117130
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
118131
j += gvl*2;
119132
}
120-
v0 = VFMVVF_FLOAT(0, gvl);
121-
v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl);
122-
maxf = v0[0];
133+
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_zero, gvl);
134+
maxf = v_res[0];
123135
}
124136
for(;j<n;){
125-
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
137+
gvl = VSETVL(n-j);
126138
v0 = VLEV_FLOAT(&x[j], gvl);
127139
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
128140
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
129141
#if defined(DOUBLE)
130142
asm volatile(
143+
"vsetvli zero, zero, e8, m1\n\t"
131144
"vor.vv v0, %1, %1\n\t"
132145
"vsetvli x0, %3, e64,m8 \n\t"
133146
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -136,6 +149,7 @@ asm volatile(
136149
:"v0");
137150
#else
138151
asm volatile(
152+
"vsetvli zero, zero, e8, m1\n\t"
139153
"vor.vv v0, %1, %1\n\t"
140154
"vsetvli x0, %3, e32,m8 \n\t"
141155
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -144,14 +158,13 @@ asm volatile(
144158
:"v0");
145159
#endif
146160

147-
v1 = VFMVVF_FLOAT(0, gvl);
148-
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
149-
if(v0[0] > maxf)
150-
maxf = v0[0];
161+
v_res = VFREDMAXVS_FLOAT(v_res, v0, v_zero, gvl);
162+
if(v_res[0] > maxf)
163+
maxf = v_res[0];
151164
j += gvl;
152165
}
153166
}else{
154-
gvl = vsetvli(n, RVV_EFLOAT, RVV_M);
167+
gvl = VSETVL(n);
155168
BLASLONG stride_x = inc_x * sizeof(FLOAT);
156169
if(gvl <= n/2){
157170
BLASLONG inc_xv = inc_x * gvl;
@@ -162,6 +175,7 @@ asm volatile(
162175
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
163176
#if defined(DOUBLE)
164177
asm volatile(
178+
"vsetvli zero, zero, e8, m1\n\t"
165179
"vor.vv v0, %1, %1\n\t"
166180
"vsetvli x0, %3, e64,m8 \n\t"
167181
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -170,6 +184,7 @@ asm volatile(
170184
:"v0");
171185
#else
172186
asm volatile(
187+
"vsetvli zero, zero, e8, m1\n\t"
173188
"vor.vv v0, %1, %1\n\t"
174189
"vsetvli x0, %3, e32,m8 \n\t"
175190
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -185,6 +200,7 @@ asm volatile(
185200
//v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl);
186201
#if defined(DOUBLE)
187202
asm volatile(
203+
"vsetvli zero, zero, e8, m1\n\t"
188204
"vor.vv v0, %1, %1\n\t"
189205
"vsetvli x0, %3, e64,m8 \n\t"
190206
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -193,6 +209,7 @@ asm volatile(
193209
:"v0");
194210
#else
195211
asm volatile(
212+
"vsetvli zero, zero, e8, m1\n\t"
196213
"vor.vv v0, %1, %1\n\t"
197214
"vsetvli x0, %3, e32,m8 \n\t"
198215
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -205,17 +222,17 @@ asm volatile(
205222
j += gvl*2;
206223
ix += inc_xv*2;
207224
}
208-
v0 = VFMVVF_FLOAT(0, gvl);
209-
v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl);
210-
maxf = v0[0];
225+
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_zero, gvl);
226+
maxf = v_res[0];
211227
}
212228
for(;j<n;){
213-
gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M);
229+
gvl = VSETVL(n-j);
214230
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
215231
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
216232
//v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl);
217233
#if defined(DOUBLE)
218234
asm volatile(
235+
"vsetvli zero, zero, e8, m1\n\t"
219236
"vor.vv v0, %1, %1\n\t"
220237
"vsetvli x0, %3, e64,m8 \n\t"
221238
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -224,6 +241,7 @@ asm volatile(
224241
:"v0");
225242
#else
226243
asm volatile(
244+
"vsetvli zero, zero, e8, m1\n\t"
227245
"vor.vv v0, %1, %1\n\t"
228246
"vsetvli x0, %3, e32,m8 \n\t"
229247
"vfrsub.vf %0, %0, %2, v0.t \n\t"
@@ -232,10 +250,9 @@ asm volatile(
232250
:"v0");
233251
#endif
234252

235-
v1 = VFMVVF_FLOAT(0, gvl);
236-
v0 = VFREDMAXVS_FLOAT(v0, v1, gvl);
237-
if(v0[0] > maxf)
238-
maxf = v0[0];
253+
v_res = VFREDMAXVS_FLOAT(v_res, v0, v_zero, gvl);
254+
if(v_res[0] > maxf)
255+
maxf = v_res[0];
239256
j += gvl;
240257
}
241258
}

0 commit comments

Comments
 (0)