Skip to content

Commit efa5063

Browse files
authored
[LoongArch] Optimize inserting element to high part of 256bits vector (#146816)
1 parent 4797a6c commit efa5063

File tree

3 files changed

+69
-96
lines changed

3 files changed

+69
-96
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6000,10 +6000,9 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
60006000
Register ScratchReg1 = XSrc;
60016001
if (Idx >= HalfSize) {
60026002
ScratchReg1 = MRI.createVirtualRegister(RC);
6003-
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
6003+
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_D), ScratchReg1)
60046004
.addReg(XSrc)
6005-
.addReg(XSrc)
6006-
.addImm(1);
6005+
.addImm(14);
60076006
}
60086007

60096008
Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);

llvm/test/CodeGen/LoongArch/lasx/build-vector.ll

Lines changed: 65 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -250,84 +250,68 @@ define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
250250
; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 14
251251
; CHECK-NEXT: ld.b $a1, $sp, 72
252252
; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 15
253-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
254-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
253+
; CHECK-NEXT: ld.b $a2, $sp, 80
254+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
255255
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0
256-
; CHECK-NEXT: ld.b $a1, $sp, 80
257-
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
258-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
259-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
260-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
261256
; CHECK-NEXT: ld.b $a1, $sp, 88
262257
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
263-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
264-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
265-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
266-
; CHECK-NEXT: ld.b $a1, $sp, 96
258+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
259+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 1
260+
; CHECK-NEXT: ld.b $a2, $sp, 96
267261
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
268-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
269-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
270-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3
262+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
263+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
271264
; CHECK-NEXT: ld.b $a1, $sp, 104
272265
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
273-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
274-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
275-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
276-
; CHECK-NEXT: ld.b $a1, $sp, 112
266+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
267+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 3
268+
; CHECK-NEXT: ld.b $a2, $sp, 112
277269
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
278-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
279-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
280-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5
270+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
271+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
281272
; CHECK-NEXT: ld.b $a1, $sp, 120
282273
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
283-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
284-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
285-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
286-
; CHECK-NEXT: ld.b $a1, $sp, 128
274+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
275+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 5
276+
; CHECK-NEXT: ld.b $a2, $sp, 128
287277
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
288-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
289-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
290-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7
278+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
279+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
291280
; CHECK-NEXT: ld.b $a1, $sp, 136
292281
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
293-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
294-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
295-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
296-
; CHECK-NEXT: ld.b $a1, $sp, 144
282+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
283+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 7
284+
; CHECK-NEXT: ld.b $a2, $sp, 144
297285
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
298-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
299-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
300-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9
286+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
287+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
301288
; CHECK-NEXT: ld.b $a1, $sp, 152
302289
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
303-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
304-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
305-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
306-
; CHECK-NEXT: ld.b $a1, $sp, 160
290+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
291+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 9
292+
; CHECK-NEXT: ld.b $a2, $sp, 160
307293
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
308-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
309-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
310-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11
294+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
295+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
311296
; CHECK-NEXT: ld.b $a1, $sp, 168
312297
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
313-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
314-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
315-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
316-
; CHECK-NEXT: ld.b $a1, $sp, 176
298+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
299+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 11
300+
; CHECK-NEXT: ld.b $a2, $sp, 176
317301
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
318-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
319-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
320-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13
302+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
303+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
321304
; CHECK-NEXT: ld.b $a1, $sp, 184
322305
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
323-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
324-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
306+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
307+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 13
308+
; CHECK-NEXT: ld.b $a2, $sp, 192
309+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
310+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
325311
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14
326-
; CHECK-NEXT: ld.b $a1, $sp, 192
327312
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
328-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
329-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
330-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15
313+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
314+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 15
331315
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
332316
; CHECK-NEXT: xvst $xr0, $a0, 0
333317
; CHECK-NEXT: ret
@@ -371,54 +355,46 @@ entry:
371355
define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
372356
; CHECK-LABEL: buildvector_v16i16:
373357
; CHECK: # %bb.0: # %entry
374-
; CHECK-NEXT: ld.h $t0, $sp, 8
375-
; CHECK-NEXT: ld.h $t1, $sp, 0
358+
; CHECK-NEXT: ld.h $t0, $sp, 64
359+
; CHECK-NEXT: ld.h $t1, $sp, 56
360+
; CHECK-NEXT: ld.h $t2, $sp, 48
361+
; CHECK-NEXT: ld.h $t3, $sp, 40
362+
; CHECK-NEXT: ld.h $t4, $sp, 32
363+
; CHECK-NEXT: ld.h $t5, $sp, 24
364+
; CHECK-NEXT: ld.h $t6, $sp, 16
365+
; CHECK-NEXT: ld.h $t7, $sp, 8
366+
; CHECK-NEXT: ld.h $t8, $sp, 0
376367
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
377368
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
378369
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
379370
; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3
380371
; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
381372
; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
382373
; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
383-
; CHECK-NEXT: vinsgr2vr.h $vr0, $t1, 7
384-
; CHECK-NEXT: ld.h $a1, $sp, 16
385-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
386-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
387-
; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 0
374+
; CHECK-NEXT: vinsgr2vr.h $vr0, $t8, 7
375+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
376+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t7, 0
388377
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
389-
; CHECK-NEXT: ld.h $a2, $sp, 24
390-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
391-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
392-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
378+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
379+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t6, 1
393380
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
394-
; CHECK-NEXT: ld.h $a1, $sp, 32
395-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
396-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
397-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 2
381+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
382+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t5, 2
398383
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
399-
; CHECK-NEXT: ld.h $a2, $sp, 40
400-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
401-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
402-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
384+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
385+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t4, 3
403386
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
404-
; CHECK-NEXT: ld.h $a1, $sp, 48
405-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
406-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
407-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 4
387+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
388+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t3, 4
408389
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
409-
; CHECK-NEXT: ld.h $a2, $sp, 56
410-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
411-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
412-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
390+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
391+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t2, 5
413392
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
414-
; CHECK-NEXT: ld.h $a1, $sp, 64
415-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
416-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
417-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 6
393+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
394+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t1, 6
418395
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
419-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
420-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
421-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
396+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
397+
; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 7
422398
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
423399
; CHECK-NEXT: xvst $xr0, $a0, 0
424400
; CHECK-NEXT: ret

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind {
1818
; CHECK-LABEL: insert_32xi8_upper:
1919
; CHECK: # %bb.0:
2020
; CHECK-NEXT: xvld $xr0, $a0, 0
21-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
22-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
21+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
2322
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0
2423
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
2524
; CHECK-NEXT: xvst $xr0, $a1, 0
@@ -47,8 +46,7 @@ define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind {
4746
; CHECK-LABEL: insert_16xi16_upper:
4847
; CHECK: # %bb.0:
4948
; CHECK-NEXT: xvld $xr0, $a0, 0
50-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
51-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
49+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
5250
; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0
5351
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
5452
; CHECK-NEXT: xvst $xr0, $a1, 0

0 commit comments

Comments
 (0)