@@ -291,54 +291,61 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
291291; CHECK-LABEL: test_vp_splice_nxv16i64:
292292; CHECK: # %bb.0:
293293; CHECK-NEXT: csrr a4, vlenb
294+ ; CHECK-NEXT: slli a5, a4, 1
295+ ; CHECK-NEXT: addi a5, a5, -1
294296; CHECK-NEXT: slli a1, a4, 3
295- ; CHECK-NEXT: add a5, a0, a1
296- ; CHECK-NEXT: vl8re64.v v24, (a5)
297- ; CHECK-NEXT: mv a5, a2
298- ; CHECK-NEXT: bltu a2, a4, .LBB21_2
297+ ; CHECK-NEXT: mv a7, a2
298+ ; CHECK-NEXT: bltu a2, a5, .LBB21_2
299299; CHECK-NEXT: # %bb.1:
300- ; CHECK-NEXT: mv a5, a4
300+ ; CHECK-NEXT: mv a7, a5
301301; CHECK-NEXT: .LBB21_2:
302302; CHECK-NEXT: addi sp, sp, -80
303303; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
304304; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
305305; CHECK-NEXT: addi s0, sp, 80
306- ; CHECK-NEXT: csrr a6 , vlenb
307- ; CHECK-NEXT: slli a6, a6 , 5
308- ; CHECK-NEXT: sub sp, sp, a6
306+ ; CHECK-NEXT: csrr a5 , vlenb
307+ ; CHECK-NEXT: slli a5, a5 , 5
308+ ; CHECK-NEXT: sub sp, sp, a5
309309; CHECK-NEXT: andi sp, sp, -64
310- ; CHECK-NEXT: vl8re64.v v0, (a0)
311- ; CHECK-NEXT: addi a0, sp, 64
312- ; CHECK-NEXT: sub a6, a2, a4
313- ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
314- ; CHECK-NEXT: vse64.v v8, (a0)
315- ; CHECK-NEXT: sltu a5, a2, a6
316- ; CHECK-NEXT: addi a5, a5, -1
317- ; CHECK-NEXT: and a5, a5, a6
318- ; CHECK-NEXT: add a6, a0, a1
319- ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
320- ; CHECK-NEXT: vse64.v v16, (a6)
321- ; CHECK-NEXT: sub a5, a3, a4
322- ; CHECK-NEXT: slli a2, a2, 3
323- ; CHECK-NEXT: sltu a6, a3, a5
324- ; CHECK-NEXT: add a2, a0, a2
325- ; CHECK-NEXT: addi a0, a6, -1
326- ; CHECK-NEXT: add a6, a2, a1
327- ; CHECK-NEXT: and a0, a0, a5
328- ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
329- ; CHECK-NEXT: vse64.v v24, (a6)
330- ; CHECK-NEXT: bltu a3, a4, .LBB21_4
310+ ; CHECK-NEXT: add a5, a0, a1
311+ ; CHECK-NEXT: slli a7, a7, 3
312+ ; CHECK-NEXT: addi a6, sp, 64
313+ ; CHECK-NEXT: mv t0, a2
314+ ; CHECK-NEXT: bltu a2, a4, .LBB21_4
331315; CHECK-NEXT: # %bb.3:
332- ; CHECK-NEXT: mv a3 , a4
316+ ; CHECK-NEXT: mv t0 , a4
333317; CHECK-NEXT: .LBB21_4:
334- ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
335- ; CHECK-NEXT: vse64.v v0, (a2)
336- ; CHECK-NEXT: addi a2, sp, 104
337- ; CHECK-NEXT: add a1, a2, a1
318+ ; CHECK-NEXT: vl8re64.v v24, (a5)
319+ ; CHECK-NEXT: add a5, a6, a7
320+ ; CHECK-NEXT: vl8re64.v v0, (a0)
321+ ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
322+ ; CHECK-NEXT: vse64.v v8, (a6)
323+ ; CHECK-NEXT: sub a0, a2, a4
324+ ; CHECK-NEXT: sltu a2, a2, a0
325+ ; CHECK-NEXT: addi a2, a2, -1
326+ ; CHECK-NEXT: and a0, a2, a0
327+ ; CHECK-NEXT: add a6, a6, a1
338328; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
329+ ; CHECK-NEXT: vse64.v v16, (a6)
330+ ; CHECK-NEXT: mv a0, a3
331+ ; CHECK-NEXT: bltu a3, a4, .LBB21_6
332+ ; CHECK-NEXT: # %bb.5:
333+ ; CHECK-NEXT: mv a0, a4
334+ ; CHECK-NEXT: .LBB21_6:
335+ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
336+ ; CHECK-NEXT: vse64.v v0, (a5)
337+ ; CHECK-NEXT: sub a2, a3, a4
338+ ; CHECK-NEXT: add a5, a5, a1
339+ ; CHECK-NEXT: sltu a3, a3, a2
340+ ; CHECK-NEXT: addi a3, a3, -1
341+ ; CHECK-NEXT: and a2, a3, a2
342+ ; CHECK-NEXT: addi a3, sp, 104
343+ ; CHECK-NEXT: add a1, a3, a1
344+ ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
345+ ; CHECK-NEXT: vse64.v v24, (a5)
339346; CHECK-NEXT: vle64.v v16, (a1)
340- ; CHECK-NEXT: vsetvli zero, a3 , e64, m8, ta, ma
341- ; CHECK-NEXT: vle64.v v8, (a2 )
347+ ; CHECK-NEXT: vsetvli zero, a0 , e64, m8, ta, ma
348+ ; CHECK-NEXT: vle64.v v8, (a3 )
342349; CHECK-NEXT: addi sp, s0, -80
343350; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
344351; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
@@ -351,14 +358,14 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
351358define <vscale x 16 x i64 > @test_vp_splice_nxv16i64_negative_offset (<vscale x 16 x i64 > %va , <vscale x 16 x i64 > %vb , i32 zeroext %evla , i32 zeroext %evlb ) nounwind {
352359; CHECK-LABEL: test_vp_splice_nxv16i64_negative_offset:
353360; CHECK: # %bb.0:
354- ; CHECK-NEXT: csrr a4 , vlenb
355- ; CHECK-NEXT: slli a1, a4, 3
356- ; CHECK-NEXT: add a5, a0, a1
357- ; CHECK-NEXT: vl8re64.v v0, (a5)
358- ; CHECK-NEXT: mv a5 , a2
359- ; CHECK-NEXT: bltu a2, a4 , .LBB22_2
361+ ; CHECK-NEXT: csrr a5 , vlenb
362+ ; CHECK-NEXT: slli a6, a5, 1
363+ ; CHECK-NEXT: addi a6, a6, -1
364+ ; CHECK-NEXT: slli a1, a5, 3
365+ ; CHECK-NEXT: mv a4 , a2
366+ ; CHECK-NEXT: bltu a2, a6 , .LBB22_2
360367; CHECK-NEXT: # %bb.1:
361- ; CHECK-NEXT: mv a5, a4
368+ ; CHECK-NEXT: mv a4, a6
362369; CHECK-NEXT: .LBB22_2:
363370; CHECK-NEXT: addi sp, sp, -80
364371; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
@@ -368,43 +375,50 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
368375; CHECK-NEXT: slli a6, a6, 5
369376; CHECK-NEXT: sub sp, sp, a6
370377; CHECK-NEXT: andi sp, sp, -64
371- ; CHECK-NEXT: vl8re64.v v24, (a0)
372- ; CHECK-NEXT: addi a0, sp, 64
373- ; CHECK-NEXT: sub a6, a2, a4
374- ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
375- ; CHECK-NEXT: vse64.v v8, (a0)
376- ; CHECK-NEXT: sltu a5, a2, a6
377- ; CHECK-NEXT: addi a5, a5, -1
378- ; CHECK-NEXT: and a5, a5, a6
379378; CHECK-NEXT: add a6, a0, a1
380- ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
381- ; CHECK-NEXT: vse64.v v16, (a6)
382- ; CHECK-NEXT: sub a6, a3, a4
383- ; CHECK-NEXT: slli a2, a2, 3
384- ; CHECK-NEXT: sltu a7, a3, a6
385- ; CHECK-NEXT: add a5, a0, a2
386- ; CHECK-NEXT: addi a7, a7, -1
387- ; CHECK-NEXT: and a0, a7, a6
388- ; CHECK-NEXT: add a6, a5, a1
389- ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
390- ; CHECK-NEXT: vse64.v v0, (a6)
391- ; CHECK-NEXT: bltu a3, a4, .LBB22_4
379+ ; CHECK-NEXT: slli a4, a4, 3
380+ ; CHECK-NEXT: addi a7, sp, 64
381+ ; CHECK-NEXT: mv t0, a2
382+ ; CHECK-NEXT: bltu a2, a5, .LBB22_4
392383; CHECK-NEXT: # %bb.3:
393- ; CHECK-NEXT: mv a3, a4
384+ ; CHECK-NEXT: mv t0, a5
394385; CHECK-NEXT: .LBB22_4:
395- ; CHECK-NEXT: li a4, 8
396- ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
397- ; CHECK-NEXT: vse64.v v24, (a5)
398- ; CHECK-NEXT: bltu a2, a4, .LBB22_6
386+ ; CHECK-NEXT: vl8re64.v v24, (a6)
387+ ; CHECK-NEXT: add a6, a7, a4
388+ ; CHECK-NEXT: vl8re64.v v0, (a0)
389+ ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
390+ ; CHECK-NEXT: vse64.v v8, (a7)
391+ ; CHECK-NEXT: sub a0, a2, a5
392+ ; CHECK-NEXT: sltu a2, a2, a0
393+ ; CHECK-NEXT: addi a2, a2, -1
394+ ; CHECK-NEXT: and a0, a2, a0
395+ ; CHECK-NEXT: add a7, a7, a1
396+ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
397+ ; CHECK-NEXT: vse64.v v16, (a7)
398+ ; CHECK-NEXT: mv a0, a3
399+ ; CHECK-NEXT: bltu a3, a5, .LBB22_6
399400; CHECK-NEXT: # %bb.5:
400- ; CHECK-NEXT: li a2, 8
401+ ; CHECK-NEXT: mv a0, a5
401402; CHECK-NEXT: .LBB22_6:
402- ; CHECK-NEXT: sub a5, a5, a2
403- ; CHECK-NEXT: add a1, a5, a1
404403; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
404+ ; CHECK-NEXT: vse64.v v0, (a6)
405+ ; CHECK-NEXT: sub a2, a3, a5
406+ ; CHECK-NEXT: add a5, a6, a1
407+ ; CHECK-NEXT: sltu a3, a3, a2
408+ ; CHECK-NEXT: addi a3, a3, -1
409+ ; CHECK-NEXT: and a2, a3, a2
410+ ; CHECK-NEXT: li a3, 8
411+ ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
412+ ; CHECK-NEXT: vse64.v v24, (a5)
413+ ; CHECK-NEXT: bltu a4, a3, .LBB22_8
414+ ; CHECK-NEXT: # %bb.7:
415+ ; CHECK-NEXT: li a4, 8
416+ ; CHECK-NEXT: .LBB22_8:
417+ ; CHECK-NEXT: sub a2, a6, a4
418+ ; CHECK-NEXT: add a1, a2, a1
405419; CHECK-NEXT: vle64.v v16, (a1)
406- ; CHECK-NEXT: vsetvli zero, a3 , e64, m8, ta, ma
407- ; CHECK-NEXT: vle64.v v8, (a5 )
420+ ; CHECK-NEXT: vsetvli zero, a0 , e64, m8, ta, ma
421+ ; CHECK-NEXT: vle64.v v8, (a2 )
408422; CHECK-NEXT: addi sp, s0, -80
409423; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
410424; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
0 commit comments