@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
18
18
; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
19
19
; RV32-NEXT: blez a6, .LBB0_17
20
20
; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
21
- ; RV32-NEXT: addi t0 , a7, -1
21
+ ; RV32-NEXT: addi t3 , a7, -1
22
22
; RV32-NEXT: csrr t2, vlenb
23
- ; RV32-NEXT: mul t3, a1, t0
24
- ; RV32-NEXT: mul t4, a3, t0
25
- ; RV32-NEXT: mul t5, a5, t0
26
23
; RV32-NEXT: slli t1, t2, 1
27
- ; RV32-NEXT: li t6 , 32
24
+ ; RV32-NEXT: li t4 , 32
28
25
; RV32-NEXT: mv t0, t1
29
26
; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
30
27
; RV32-NEXT: li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
34
31
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
35
32
; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
36
33
; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
34
+ ; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
37
35
; RV32-NEXT: .cfi_offset s0, -4
38
36
; RV32-NEXT: .cfi_offset s1, -8
39
37
; RV32-NEXT: .cfi_offset s2, -12
38
+ ; RV32-NEXT: .cfi_offset s3, -16
40
39
; RV32-NEXT: .cfi_remember_state
41
- ; RV32-NEXT: add t3, a0, t3
42
- ; RV32-NEXT: add t4, a2, t4
43
- ; RV32-NEXT: add s0, a4, t5
44
- ; RV32-NEXT: bltu t6, t1, .LBB0_6
40
+ ; RV32-NEXT: mul t5, a1, t3
41
+ ; RV32-NEXT: add s0, a0, a6
42
+ ; RV32-NEXT: mul t6, a3, t3
43
+ ; RV32-NEXT: add s2, a2, a6
44
+ ; RV32-NEXT: mul s1, a5, t3
45
+ ; RV32-NEXT: add s3, a4, a6
46
+ ; RV32-NEXT: bltu t4, t1, .LBB0_6
45
47
; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
46
48
; RV32-NEXT: li t1, 32
47
49
; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
48
- ; RV32-NEXT: add t3, t3, a6
49
- ; RV32-NEXT: add t5, t4, a6
50
- ; RV32-NEXT: add t4, s0, a6
50
+ ; RV32-NEXT: add t3, s0, t5
51
+ ; RV32-NEXT: add t6, s2, t6
52
+ ; RV32-NEXT: add t4, s3, s1
51
53
; RV32-NEXT: j .LBB0_8
52
54
; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
53
55
; RV32-NEXT: mv t1, t0
54
56
; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
55
57
; RV32-NEXT: .cfi_restore_state
56
58
; RV32-NEXT: li t0, 0
57
- ; RV32-NEXT: sltu t5, a0, t5
59
+ ; RV32-NEXT: sltu t5, a0, t6
58
60
; RV32-NEXT: sltu t6, a2, t3
59
61
; RV32-NEXT: and t5, t5, t6
60
62
; RV32-NEXT: sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
140
142
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
141
143
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
142
144
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
145
+ ; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
143
146
; RV32-NEXT: .cfi_restore s0
144
147
; RV32-NEXT: .cfi_restore s1
145
148
; RV32-NEXT: .cfi_restore s2
149
+ ; RV32-NEXT: .cfi_restore s3
146
150
; RV32-NEXT: addi sp, sp, 16
147
151
; RV32-NEXT: .cfi_def_cfa_offset 0
148
152
; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
190
194
; RV64P670-NEXT: or t6, s0, s1
191
195
; RV64P670-NEXT: sltu s1, a0, t5
192
196
; RV64P670-NEXT: sltu s0, a4, t4
193
- ; RV64P670-NEXT: mv t5 , a0
197
+ ; RV64P670-NEXT: add t4 , a0, a6
194
198
; RV64P670-NEXT: and s0, s0, s1
195
199
; RV64P670-NEXT: or s1, a1, a5
196
200
; RV64P670-NEXT: srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
200
204
; RV64P670-NEXT: or s0, t6, s0
201
205
; RV64P670-NEXT: sltu s1, a6, s1
202
206
; RV64P670-NEXT: or s0, s0, s1
203
- ; RV64P670-NEXT: andi t4 , s0, 1
207
+ ; RV64P670-NEXT: andi t5 , s0, 1
204
208
; RV64P670-NEXT: j .LBB0_4
205
209
; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
206
210
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
207
- ; RV64P670-NEXT: add t5, t5 , a1
211
+ ; RV64P670-NEXT: add a0, a0 , a1
208
212
; RV64P670-NEXT: add a2, a2, a3
209
213
; RV64P670-NEXT: add a4, a4, a5
210
214
; RV64P670-NEXT: addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
214
218
; RV64P670-NEXT: # =>This Loop Header: Depth=1
215
219
; RV64P670-NEXT: # Child Loop BB0_7 Depth 2
216
220
; RV64P670-NEXT: # Child Loop BB0_10 Depth 2
217
- ; RV64P670-NEXT: beqz t4 , .LBB0_6
221
+ ; RV64P670-NEXT: beqz t5 , .LBB0_6
218
222
; RV64P670-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
219
223
; RV64P670-NEXT: li t6, 0
220
224
; RV64P670-NEXT: j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
223
227
; RV64P670-NEXT: slli s1, t2, 28
224
228
; RV64P670-NEXT: mv s2, a2
225
229
; RV64P670-NEXT: mv s3, a4
226
- ; RV64P670-NEXT: mv s4, t5
230
+ ; RV64P670-NEXT: mv s4, a0
227
231
; RV64P670-NEXT: sub s1, s1, t3
228
232
; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
229
233
; RV64P670-NEXT: and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
246
250
; RV64P670-NEXT: .LBB0_9: # %for.body4.us.preheader
247
251
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
248
252
; RV64P670-NEXT: mul s2, a1, t0
249
- ; RV64P670-NEXT: add s0, a0, a6
250
- ; RV64P670-NEXT: add s1, t5, t6
253
+ ; RV64P670-NEXT: add s1, a0, t6
251
254
; RV64P670-NEXT: add s4, a4, t6
252
255
; RV64P670-NEXT: add t6, t6, a2
253
- ; RV64P670-NEXT: add s2, s2, s0
256
+ ; RV64P670-NEXT: add s2, s2, t4
254
257
; RV64P670-NEXT: .LBB0_10: # %for.body4.us
255
258
; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1
256
259
; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
332
335
; RV64X60-NEXT: or s0, t4, s0
333
336
; RV64X60-NEXT: sltu s1, a6, s1
334
337
; RV64X60-NEXT: or s0, s0, s1
335
- ; RV64X60-NEXT: andi t4, s0, 1
336
- ; RV64X60-NEXT: mv t5, a0
338
+ ; RV64X60-NEXT: add t4, a0, a6
339
+ ; RV64X60-NEXT: andi t5, s0, 1
337
340
; RV64X60-NEXT: j .LBB0_4
338
341
; RV64X60-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
339
342
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
340
- ; RV64X60-NEXT: add t5, t5 , a1
343
+ ; RV64X60-NEXT: add a0, a0 , a1
341
344
; RV64X60-NEXT: add a2, a2, a3
342
345
; RV64X60-NEXT: addiw t1, t1, 1
343
346
; RV64X60-NEXT: add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
347
350
; RV64X60-NEXT: # =>This Loop Header: Depth=1
348
351
; RV64X60-NEXT: # Child Loop BB0_7 Depth 2
349
352
; RV64X60-NEXT: # Child Loop BB0_10 Depth 2
350
- ; RV64X60-NEXT: beqz t4 , .LBB0_6
353
+ ; RV64X60-NEXT: beqz t5 , .LBB0_6
351
354
; RV64X60-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
352
355
; RV64X60-NEXT: li t6, 0
353
356
; RV64X60-NEXT: j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
358
361
; RV64X60-NEXT: and t6, s1, a6
359
362
; RV64X60-NEXT: mv s2, a2
360
363
; RV64X60-NEXT: mv s3, a4
361
- ; RV64X60-NEXT: mv s4, t5
364
+ ; RV64X60-NEXT: mv s4, a0
362
365
; RV64X60-NEXT: mv s1, t6
363
366
; RV64X60-NEXT: vsetvli s0, zero, e8, m2, ta, ma
364
367
; RV64X60-NEXT: .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
379
382
; RV64X60-NEXT: .LBB0_9: # %for.body4.us.preheader
380
383
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
381
384
; RV64X60-NEXT: mul s2, a1, t0
382
- ; RV64X60-NEXT: add s1, a0, a6
383
- ; RV64X60-NEXT: add s0, t5, t6
384
- ; RV64X60-NEXT: add s2, s2, s1
385
+ ; RV64X60-NEXT: add s0, a0, t6
386
+ ; RV64X60-NEXT: add s2, s2, t4
385
387
; RV64X60-NEXT: add s4, a4, t6
386
388
; RV64X60-NEXT: add t6, t6, a2
387
389
; RV64X60-NEXT: .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
466
468
; RV64-NEXT: or s0, a1, a5
467
469
; RV64-NEXT: srli s0, s0, 63
468
470
; RV64-NEXT: or t5, t5, s0
471
+ ; RV64-NEXT: sltu s0, a6, t4
469
472
; RV64-NEXT: or t5, t6, t5
470
- ; RV64-NEXT: sltu t4, a6, t4
471
- ; RV64-NEXT: or t4, t4, t5
472
- ; RV64-NEXT: andi t4, t4, 1
473
- ; RV64-NEXT: mv t5, a0
473
+ ; RV64-NEXT: add t4, a0, a6
474
+ ; RV64-NEXT: or t5, s0, t5
475
+ ; RV64-NEXT: andi t5, t5, 1
474
476
; RV64-NEXT: csrwi vxrm, 0
475
477
; RV64-NEXT: j .LBB0_6
476
478
; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
477
479
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
478
- ; RV64-NEXT: add t5, t5 , a1
480
+ ; RV64-NEXT: add a0, a0 , a1
479
481
; RV64-NEXT: add a2, a2, a3
480
482
; RV64-NEXT: add a4, a4, a5
481
483
; RV64-NEXT: addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
485
487
; RV64-NEXT: # =>This Loop Header: Depth=1
486
488
; RV64-NEXT: # Child Loop BB0_9 Depth 2
487
489
; RV64-NEXT: # Child Loop BB0_12 Depth 2
488
- ; RV64-NEXT: beqz t4 , .LBB0_8
490
+ ; RV64-NEXT: beqz t5 , .LBB0_8
489
491
; RV64-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
490
492
; RV64-NEXT: li t6, 0
491
493
; RV64-NEXT: j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
496
498
; RV64-NEXT: and t6, t6, a6
497
499
; RV64-NEXT: mv s0, a2
498
500
; RV64-NEXT: mv s1, a4
499
- ; RV64-NEXT: mv s2, t5
501
+ ; RV64-NEXT: mv s2, a0
500
502
; RV64-NEXT: mv s3, t6
501
503
; RV64-NEXT: vsetvli s4, zero, e8, m2, ta, ma
502
504
; RV64-NEXT: .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
516
518
; RV64-NEXT: beq t6, a6, .LBB0_5
517
519
; RV64-NEXT: .LBB0_11: # %for.body4.us.preheader
518
520
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
519
- ; RV64-NEXT: mul s1, a1, t2
520
- ; RV64-NEXT: add s2, a0, a6
521
- ; RV64-NEXT: add s0, t5, t6
522
- ; RV64-NEXT: add s1, s2, s1
523
- ; RV64-NEXT: add s2, a4, t6
521
+ ; RV64-NEXT: mul s2, a1, t2
522
+ ; RV64-NEXT: add s0, a0, t6
523
+ ; RV64-NEXT: add s1, a4, t6
524
+ ; RV64-NEXT: add s2, t4, s2
524
525
; RV64-NEXT: add t6, a2, t6
525
526
; RV64-NEXT: .LBB0_12: # %for.body4.us
526
527
; RV64-NEXT: # Parent Loop BB0_6 Depth=1
527
528
; RV64-NEXT: # => This Inner Loop Header: Depth=2
528
529
; RV64-NEXT: lbu s3, 0(t6)
529
- ; RV64-NEXT: lbu s4, 0(s2 )
530
+ ; RV64-NEXT: lbu s4, 0(s1 )
530
531
; RV64-NEXT: add s3, s3, s4
531
532
; RV64-NEXT: addi s3, s3, 1
532
533
; RV64-NEXT: srli s3, s3, 1
533
534
; RV64-NEXT: sb s3, 0(s0)
534
535
; RV64-NEXT: addi s0, s0, 1
535
- ; RV64-NEXT: addi s2, s2 , 1
536
+ ; RV64-NEXT: addi s1, s1 , 1
536
537
; RV64-NEXT: addi t6, t6, 1
537
- ; RV64-NEXT: bne s0, s1 , .LBB0_12
538
+ ; RV64-NEXT: bne s0, s2 , .LBB0_12
538
539
; RV64-NEXT: j .LBB0_5
539
540
; RV64-NEXT: .LBB0_13:
540
541
; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
0 commit comments