@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
1818; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
1919; RV32-NEXT: blez a6, .LBB0_17
2020; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
21- ; RV32-NEXT: addi t0 , a7, -1
21+ ; RV32-NEXT: addi t3 , a7, -1
2222; RV32-NEXT: csrr t2, vlenb
23- ; RV32-NEXT: mul t3, a1, t0
24- ; RV32-NEXT: mul t4, a3, t0
25- ; RV32-NEXT: mul t5, a5, t0
2623; RV32-NEXT: slli t1, t2, 1
27- ; RV32-NEXT: li t6 , 32
24+ ; RV32-NEXT: li t4 , 32
2825; RV32-NEXT: mv t0, t1
2926; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
3027; RV32-NEXT: li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
3431; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3532; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3633; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
34+ ; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
3735; RV32-NEXT: .cfi_offset s0, -4
3836; RV32-NEXT: .cfi_offset s1, -8
3937; RV32-NEXT: .cfi_offset s2, -12
38+ ; RV32-NEXT: .cfi_offset s3, -16
4039; RV32-NEXT: .cfi_remember_state
41- ; RV32-NEXT: add t3, a0, t3
42- ; RV32-NEXT: add t4, a2, t4
43- ; RV32-NEXT: add s0, a4, t5
44- ; RV32-NEXT: bltu t6, t1, .LBB0_6
40+ ; RV32-NEXT: mul t5, a1, t3
41+ ; RV32-NEXT: add s0, a0, a6
42+ ; RV32-NEXT: mul t6, a3, t3
43+ ; RV32-NEXT: add s2, a2, a6
44+ ; RV32-NEXT: mul s1, a5, t3
45+ ; RV32-NEXT: add s3, a4, a6
46+ ; RV32-NEXT: bltu t4, t1, .LBB0_6
4547; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
4648; RV32-NEXT: li t1, 32
4749; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
48- ; RV32-NEXT: add t3, t3, a6
49- ; RV32-NEXT: add t5, t4, a6
50- ; RV32-NEXT: add t4, s0, a6
50+ ; RV32-NEXT: add t3, s0, t5
51+ ; RV32-NEXT: add t6, s2, t6
52+ ; RV32-NEXT: add t4, s3, s1
5153; RV32-NEXT: j .LBB0_8
5254; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
5355; RV32-NEXT: mv t1, t0
5456; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
5557; RV32-NEXT: .cfi_restore_state
5658; RV32-NEXT: li t0, 0
57- ; RV32-NEXT: sltu t5, a0, t5
59+ ; RV32-NEXT: sltu t5, a0, t6
5860; RV32-NEXT: sltu t6, a2, t3
5961; RV32-NEXT: and t5, t5, t6
6062; RV32-NEXT: sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
140142; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
141143; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
142144; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
145+ ; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
143146; RV32-NEXT: .cfi_restore s0
144147; RV32-NEXT: .cfi_restore s1
145148; RV32-NEXT: .cfi_restore s2
149+ ; RV32-NEXT: .cfi_restore s3
146150; RV32-NEXT: addi sp, sp, 16
147151; RV32-NEXT: .cfi_def_cfa_offset 0
148152; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
190194; RV64P670-NEXT: or t6, s0, s1
191195; RV64P670-NEXT: sltu s1, a0, t5
192196; RV64P670-NEXT: sltu s0, a4, t4
193- ; RV64P670-NEXT: mv t5 , a0
197+ ; RV64P670-NEXT: add t4 , a0, a6
194198; RV64P670-NEXT: and s0, s0, s1
195199; RV64P670-NEXT: or s1, a1, a5
196200; RV64P670-NEXT: srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
200204; RV64P670-NEXT: or s0, t6, s0
201205; RV64P670-NEXT: sltu s1, a6, s1
202206; RV64P670-NEXT: or s0, s0, s1
203- ; RV64P670-NEXT: andi t4 , s0, 1
207+ ; RV64P670-NEXT: andi t5 , s0, 1
204208; RV64P670-NEXT: j .LBB0_4
205209; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
206210; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
207- ; RV64P670-NEXT: add t5, t5 , a1
211+ ; RV64P670-NEXT: add a0, a0 , a1
208212; RV64P670-NEXT: add a2, a2, a3
209213; RV64P670-NEXT: add a4, a4, a5
210214; RV64P670-NEXT: addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
214218; RV64P670-NEXT: # =>This Loop Header: Depth=1
215219; RV64P670-NEXT: # Child Loop BB0_7 Depth 2
216220; RV64P670-NEXT: # Child Loop BB0_10 Depth 2
217- ; RV64P670-NEXT: beqz t4 , .LBB0_6
221+ ; RV64P670-NEXT: beqz t5 , .LBB0_6
218222; RV64P670-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
219223; RV64P670-NEXT: li t6, 0
220224; RV64P670-NEXT: j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
223227; RV64P670-NEXT: slli s1, t2, 28
224228; RV64P670-NEXT: mv s2, a2
225229; RV64P670-NEXT: mv s3, a4
226- ; RV64P670-NEXT: mv s4, t5
230+ ; RV64P670-NEXT: mv s4, a0
227231; RV64P670-NEXT: sub s1, s1, t3
228232; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
229233; RV64P670-NEXT: and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
246250; RV64P670-NEXT: .LBB0_9: # %for.body4.us.preheader
247251; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
248252; RV64P670-NEXT: mul s2, a1, t0
249- ; RV64P670-NEXT: add s0, a0, a6
250- ; RV64P670-NEXT: add s1, t5, t6
253+ ; RV64P670-NEXT: add s1, a0, t6
251254; RV64P670-NEXT: add s4, a4, t6
252255; RV64P670-NEXT: add t6, t6, a2
253- ; RV64P670-NEXT: add s2, s2, s0
256+ ; RV64P670-NEXT: add s2, s2, t4
254257; RV64P670-NEXT: .LBB0_10: # %for.body4.us
255258; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1
256259; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
332335; RV64X60-NEXT: or s0, t4, s0
333336; RV64X60-NEXT: sltu s1, a6, s1
334337; RV64X60-NEXT: or s0, s0, s1
335- ; RV64X60-NEXT: andi t4, s0, 1
336- ; RV64X60-NEXT: mv t5, a0
338+ ; RV64X60-NEXT: add t4, a0, a6
339+ ; RV64X60-NEXT: andi t5, s0, 1
337340; RV64X60-NEXT: j .LBB0_4
338341; RV64X60-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
339342; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
340- ; RV64X60-NEXT: add t5, t5 , a1
343+ ; RV64X60-NEXT: add a0, a0 , a1
341344; RV64X60-NEXT: add a2, a2, a3
342345; RV64X60-NEXT: addiw t1, t1, 1
343346; RV64X60-NEXT: add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
347350; RV64X60-NEXT: # =>This Loop Header: Depth=1
348351; RV64X60-NEXT: # Child Loop BB0_7 Depth 2
349352; RV64X60-NEXT: # Child Loop BB0_10 Depth 2
350- ; RV64X60-NEXT: beqz t4 , .LBB0_6
353+ ; RV64X60-NEXT: beqz t5 , .LBB0_6
351354; RV64X60-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
352355; RV64X60-NEXT: li t6, 0
353356; RV64X60-NEXT: j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
358361; RV64X60-NEXT: and t6, s1, a6
359362; RV64X60-NEXT: mv s2, a2
360363; RV64X60-NEXT: mv s3, a4
361- ; RV64X60-NEXT: mv s4, t5
364+ ; RV64X60-NEXT: mv s4, a0
362365; RV64X60-NEXT: mv s1, t6
363366; RV64X60-NEXT: vsetvli s0, zero, e8, m2, ta, ma
364367; RV64X60-NEXT: .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
379382; RV64X60-NEXT: .LBB0_9: # %for.body4.us.preheader
380383; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
381384; RV64X60-NEXT: mul s2, a1, t0
382- ; RV64X60-NEXT: add s1, a0, a6
383- ; RV64X60-NEXT: add s0, t5, t6
384- ; RV64X60-NEXT: add s2, s2, s1
385+ ; RV64X60-NEXT: add s0, a0, t6
386+ ; RV64X60-NEXT: add s2, s2, t4
385387; RV64X60-NEXT: add s4, a4, t6
386388; RV64X60-NEXT: add t6, t6, a2
387389; RV64X60-NEXT: .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
466468; RV64-NEXT: or s0, a1, a5
467469; RV64-NEXT: srli s0, s0, 63
468470; RV64-NEXT: or t5, t5, s0
471+ ; RV64-NEXT: sltu s0, a6, t4
469472; RV64-NEXT: or t5, t6, t5
470- ; RV64-NEXT: sltu t4, a6, t4
471- ; RV64-NEXT: or t4, t4, t5
472- ; RV64-NEXT: andi t4, t4, 1
473- ; RV64-NEXT: mv t5, a0
473+ ; RV64-NEXT: add t4, a0, a6
474+ ; RV64-NEXT: or t5, s0, t5
475+ ; RV64-NEXT: andi t5, t5, 1
474476; RV64-NEXT: csrwi vxrm, 0
475477; RV64-NEXT: j .LBB0_6
476478; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
477479; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
478- ; RV64-NEXT: add t5, t5 , a1
480+ ; RV64-NEXT: add a0, a0 , a1
479481; RV64-NEXT: add a2, a2, a3
480482; RV64-NEXT: add a4, a4, a5
481483; RV64-NEXT: addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
485487; RV64-NEXT: # =>This Loop Header: Depth=1
486488; RV64-NEXT: # Child Loop BB0_9 Depth 2
487489; RV64-NEXT: # Child Loop BB0_12 Depth 2
488- ; RV64-NEXT: beqz t4 , .LBB0_8
490+ ; RV64-NEXT: beqz t5 , .LBB0_8
489491; RV64-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
490492; RV64-NEXT: li t6, 0
491493; RV64-NEXT: j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
496498; RV64-NEXT: and t6, t6, a6
497499; RV64-NEXT: mv s0, a2
498500; RV64-NEXT: mv s1, a4
499- ; RV64-NEXT: mv s2, t5
501+ ; RV64-NEXT: mv s2, a0
500502; RV64-NEXT: mv s3, t6
501503; RV64-NEXT: vsetvli s4, zero, e8, m2, ta, ma
502504; RV64-NEXT: .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
516518; RV64-NEXT: beq t6, a6, .LBB0_5
517519; RV64-NEXT: .LBB0_11: # %for.body4.us.preheader
518520; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
519- ; RV64-NEXT: mul s1, a1, t2
520- ; RV64-NEXT: add s2, a0, a6
521- ; RV64-NEXT: add s0, t5, t6
522- ; RV64-NEXT: add s1, s2, s1
523- ; RV64-NEXT: add s2, a4, t6
521+ ; RV64-NEXT: mul s2, a1, t2
522+ ; RV64-NEXT: add s0, a0, t6
523+ ; RV64-NEXT: add s1, a4, t6
524+ ; RV64-NEXT: add s2, t4, s2
524525; RV64-NEXT: add t6, a2, t6
525526; RV64-NEXT: .LBB0_12: # %for.body4.us
526527; RV64-NEXT: # Parent Loop BB0_6 Depth=1
527528; RV64-NEXT: # => This Inner Loop Header: Depth=2
528529; RV64-NEXT: lbu s3, 0(t6)
529- ; RV64-NEXT: lbu s4, 0(s2 )
530+ ; RV64-NEXT: lbu s4, 0(s1 )
530531; RV64-NEXT: add s3, s3, s4
531532; RV64-NEXT: addi s3, s3, 1
532533; RV64-NEXT: srli s3, s3, 1
533534; RV64-NEXT: sb s3, 0(s0)
534535; RV64-NEXT: addi s0, s0, 1
535- ; RV64-NEXT: addi s2, s2 , 1
536+ ; RV64-NEXT: addi s1, s1 , 1
536537; RV64-NEXT: addi t6, t6, 1
537- ; RV64-NEXT: bne s0, s1 , .LBB0_12
538+ ; RV64-NEXT: bne s0, s2 , .LBB0_12
538539; RV64-NEXT: j .LBB0_5
539540; RV64-NEXT: .LBB0_13:
540541; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
0 commit comments