Skip to content

Commit b4a1153

Browse files
authored
Merge pull request #4566 from XiWeiGu/fix_loongarch_lsx
LoongArch: Fixed LSX opt
2 parents 56d114b + 50869f6 commit b4a1153

File tree

6 files changed

+121
-31
lines changed

6 files changed

+121
-31
lines changed

kernel/loongarch64/amin_lsx.S

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
146146
add.d X, X, INCX
147147
vinsgr2vr.d VX1, t3, 0
148148
vinsgr2vr.d VX1, t4, 1
149-
vfmaxa.d VM1, VX0, VX1
149+
vfmina.d VM1, VX0, VX1
150150
ld.d t1, X, 0 * SIZE
151151
add.d X, X, INCX
152152
ld.d t2, X, 0 * SIZE
@@ -159,9 +159,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
159159
add.d X, X, INCX
160160
vinsgr2vr.d VX1, t3, 0
161161
vinsgr2vr.d VX1, t4, 1
162-
vfmaxa.d VM2, VX0, VX1
163-
vfmaxa.d VM1, VM1, VM2
164-
vfmaxa.d VM0, VM0, VM1
162+
vfmina.d VM2, VX0, VX1
163+
vfmina.d VM1, VM1, VM2
164+
vfmina.d VM0, VM0, VM1
165165
#else
166166
ld.w t1, X, 0
167167
add.d X, X, INCX
@@ -187,8 +187,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
187187
vinsgr2vr.w VX1, t2, 1
188188
vinsgr2vr.w VX1, t3, 2
189189
vinsgr2vr.w VX1, t4, 3
190-
vfmaxa.s VM1, VX0, VX1
191-
vfmaxa.s VM0, VM0, VM1
190+
vfmina.s VM1, VX0, VX1
191+
vfmina.s VM0, VM0, VM1
192192
#endif
193193
addi.d I, I, -1
194194
blt $r0, I, .L21

kernel/loongarch64/axpby_lsx.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
990990
#endif
991991
add.d YY, YY, INCY
992992
blt $r0, I, .L222
993+
move Y, YY
993994
b .L997
994995
.align 3
995996

kernel/loongarch64/camax_lsx.S

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
177177
FABS t4, t4
178178
ADD t1, t1, t2
179179
ADD t3, t3, t4
180-
FMAX s1, t1, t3
180+
FMAX s2, t1, t3
181181
LD t1, X, 0 * SIZE
182182
LD t2, X, 1 * SIZE
183183
add.d X, X, INCX
@@ -205,13 +205,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
205205
ADD t1, t1, t2
206206
ADD t3, t3, t4
207207
FMAX s4, t1, t3
208+
209+
FMAX s1, s1, s2
210+
FMAX s3, s3, s4
211+
FMAX a0, a0, s3
212+
FMAX a0, a0, s1
208213
blt $r0, I, .L21
209214
.align 3
210215

211216
.L22:
212-
FMAX s1, s1, s2
213-
FMAX s3, s3, s4
214-
FMAX s1, s1, s3
217+
MOV s1, a0
215218
.align 3
216219

217220
.L23: //N<8

kernel/loongarch64/camin_lsx.S

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
186186
FABS t4, t4
187187
ADD t1, t1, t2
188188
ADD t3, t3, t4
189-
FMIN s1, t1, t3
189+
FMIN s2, t1, t3
190190
LD t1, X, 0 * SIZE
191191
LD t2, X, 1 * SIZE
192192
add.d X, X, INCX
@@ -214,13 +214,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
214214
ADD t1, t1, t2
215215
ADD t3, t3, t4
216216
FMIN s4, t1, t3
217+
218+
FMIN s1, s1, s2
219+
FMIN s3, s3, s4
220+
FMIN a0, a0, s3
221+
FMIN a0, a0, s1
217222
blt $r0, I, .L21
218223
.align 3
219224

220225
.L22:
221-
FMIN s1, s1, s2
222-
FMIN s3, s3, s4
223-
FMIN s1, s1, s3
226+
MOV s1, a0
224227
.align 3
225228

226229
.L23: //N<8

kernel/loongarch64/crot_lsx.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
vreplgr2vr.d VXC, t1
8383
vreplgr2vr.d VXS, t2
8484
vreplgr2vr.d VXZ, t3
85+
srai.d I, N, 1
8586
#else
8687
vreplgr2vr.w VXC, t1
8788
vreplgr2vr.w VXS, t2

kernel/loongarch64/icamin_lsx.S

Lines changed: 99 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7070
LD a1, X, 1 * SIZE
7171
FABS a0, a0
7272
FABS a1, a1
73-
ADD s1, a1, a0
74-
vreplvei.w VM0, VM0, 0
73+
ADD s1, a1, a0 // Initialization value
7574
vxor.v VI3, VI3, VI3 // 0
7675
#ifdef DOUBLE
7776
li.d I, -1
7877
vreplgr2vr.d VI4, I
7978
vffint.d.l VI4, VI4 // -1
80-
bne INCX, TEMP, .L20
79+
bne INCX, TEMP, .L20 // incx != 1
80+
81+
// Init Index
8182
addi.d i0, i0, 1
82-
srai.d I, N, 2
83-
bge $r0, I, .L21
84-
slli.d i0, i0, 1 //2
83+
slli.d i0, i0, 1 // 2
8584
vreplgr2vr.d VINC4, i0
8685
addi.d i0, i0, -3
8786
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@@ -91,14 +90,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9190
vinsgr2vr.d VI0, i0, 0 //1
9291
addi.d i0, i0, 1
9392
vinsgr2vr.d VI0, i0, 1 //2
93+
94+
srai.d I, N, 2
95+
bge $r0, I, .L21
96+
97+
// Init VM0
98+
vld VX0, X, 0 * SIZE
99+
vld VX1, X, 2 * SIZE
100+
vpickev.d x1, VX1, VX0
101+
vpickod.d x2, VX1, VX0
102+
vfmul.d x3, VI4, x1
103+
vfmul.d x4, VI4, x2
104+
vfcmp.clt.d VT0, x1, VI3
105+
vfcmp.clt.d VINC8, x2, VI3
106+
vbitsel.v x1, x1, x3, VT0
107+
vbitsel.v x2, x2, x4, VINC8
108+
vfadd.d VM0, x1, x2
94109
#else
95110
li.w I, -1
96111
vreplgr2vr.w VI4, I
97112
vffint.s.w VI4, VI4 // -1
98-
bne INCX, TEMP, .L20
113+
bne INCX, TEMP, .L20 // incx != 1
114+
115+
// Init Index
99116
addi.w i0, i0, 1
100-
srai.d I, N, 2
101-
bge $r0, I, .L21
102117
slli.w i0, i0, 2 //4
103118
vreplgr2vr.w VINC4, i0
104119
addi.w i0, i0, -7
@@ -117,6 +132,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
117132
vinsgr2vr.w VI0, i0, 2 //3
118133
addi.w i0, i0, 1
119134
vinsgr2vr.w VI0, i0, 3 //4
135+
136+
srai.d I, N, 2
137+
bge $r0, I, .L21
138+
139+
// Init VM0
140+
vld VX0, X, 0 * SIZE
141+
vld VX1, X, 4 * SIZE
142+
vpickev.w x1, VX1, VX0
143+
vpickod.w x2, VX1, VX0
144+
vfmul.s x3, VI4, x1
145+
vfmul.s x4, VI4, x2
146+
vfcmp.clt.s VT0, x1, VI3
147+
vfcmp.clt.s VINC8, x2, VI3
148+
vbitsel.v x1, x1, x3, VT0
149+
vbitsel.v x2, x2, x4, VINC8
150+
vfadd.s VM0, x1, x2
120151
#endif
121152
.align 3
122153

@@ -139,6 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
139170
vfcmp.ceq.d VT0, x3, VM0
140171
vbitsel.v VM0, x3, VM0, VT0
141172
vbitsel.v VI0, VI1, VI0, VT0
173+
142174
vld VX0, X, 4 * SIZE
143175
vadd.d VI1, VI1, VINC4
144176
vld VX1, X, 6 * SIZE
@@ -206,9 +238,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
206238
.L20: // INCX!=1
207239
#ifdef DOUBLE
208240
addi.d i0, i0, 1
209-
srai.d I, N, 2
210-
bge $r0, I, .L21
211-
slli.d i0, i0, 1 //2
241+
// Init index
242+
slli.d i0, i0, 1 //2
212243
vreplgr2vr.d VINC4, i0
213244
addi.d i0, i0, -3
214245
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@@ -218,10 +249,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
218249
vinsgr2vr.d VI0, i0, 0 //1
219250
addi.d i0, i0, 1
220251
vinsgr2vr.d VI0, i0, 1 //2
252+
253+
srai.d I, N, 2
254+
bge $r0, I, .L21 // N < 4
255+
256+
// Init VM0
257+
ld.d t1, X, 0 * SIZE
258+
ld.d t2, X, 1 * SIZE
259+
add.d i1, X, INCX
260+
ld.d t3, i1, 0 * SIZE
261+
ld.d t4, i1, 1 * SIZE
262+
add.d i1, i1, INCX
263+
vinsgr2vr.d x1, t1, 0
264+
vinsgr2vr.d x2, t2, 0
265+
vinsgr2vr.d x1, t3, 1
266+
vinsgr2vr.d x2, t4, 1
267+
vfmul.d x3, VI4, x1
268+
vfmul.d x4, VI4, x2
269+
vfcmp.clt.d VT0, x1, VI3
270+
vfcmp.clt.d VINC8, x2, VI3
271+
vbitsel.v x1, x1, x3, VT0
272+
vbitsel.v x2, x2, x4, VINC8
273+
vfadd.d VM0, x1, x2
221274
#else
222275
addi.w i0, i0, 1
223-
srai.d I, N, 2
224-
bge $r0, I, .L21
276+
277+
// Init index
225278
slli.w i0, i0, 2 //4
226279
vreplgr2vr.w VINC4, i0
227280
addi.w i0, i0, -7
@@ -240,6 +293,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
240293
vinsgr2vr.w VI0, i0, 2 //3
241294
addi.w i0, i0, 1
242295
vinsgr2vr.w VI0, i0, 3 //4
296+
297+
srai.d I, N, 2
298+
bge $r0, I, .L21 // N < 4
299+
300+
// Init VM0
301+
ld.w t1, X, 0 * SIZE
302+
ld.w t2, X, 1 * SIZE
303+
add.d i1, X, INCX
304+
ld.w t3, i1, 0 * SIZE
305+
ld.w t4, i1, 1 * SIZE
306+
add.d i1, i1, INCX
307+
vinsgr2vr.w x1, t1, 0
308+
vinsgr2vr.w x2, t2, 0
309+
vinsgr2vr.w x1, t3, 1
310+
vinsgr2vr.w x2, t4, 1
311+
ld.w t1, i1, 0 * SIZE
312+
ld.w t2, i1, 1 * SIZE
313+
add.d i1, i1, INCX
314+
ld.w t3, i1, 0 * SIZE
315+
ld.w t4, i1, 1 * SIZE
316+
add.d i1, i1, INCX
317+
vinsgr2vr.w x1, t1, 2
318+
vinsgr2vr.w x2, t2, 2
319+
vinsgr2vr.w x1, t3, 3
320+
vinsgr2vr.w x2, t4, 3
321+
vfcmp.clt.s VT0, x1, VI3
322+
vfcmp.clt.s VINC8, x2, VI3
323+
vbitsel.v x1, x1, x3, VT0
324+
vbitsel.v x2, x2, x4, VINC8
325+
vfadd.s VM0, x1, x2
243326
#endif
244327
.align 3
245328

@@ -300,8 +383,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
300383
vinsgr2vr.w x2, t2, 2
301384
vinsgr2vr.w x1, t3, 3
302385
vinsgr2vr.w x2, t4, 3
303-
vpickev.w x1, VX1, VX0
304-
vpickod.w x2, VX1, VX0
305386
#endif
306387
addi.d I, I, -1
307388
VFMUL x3, VI4, x1
@@ -358,12 +439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
358439
#ifdef DOUBLE
359440
vfmina.d VM0, x1, x2
360441
vfcmp.ceq.d VT0, x1, VM0
442+
vbitsel.v VI0, VI2, VI1, VT0
361443
#else
362444
fcmp.ceq.d $fcc0, $f15, $f10
363445
bceqz $fcc0, .L27
364446
vfcmp.clt.s VT0, VI2, VI0
365-
#endif
366447
vbitsel.v VI0, VI0, VI2, VT0
448+
#endif
367449
.align 3
368450

369451
.L27:

0 commit comments

Comments
 (0)