Skip to content

Commit 3a2c8f7

Browse files
authored
[RISCV] Move MachineCombiner to addILPOpts() (#158071)
So that it runs before `MachineCSE` and other passes. Fixes #158063.
1 parent 76aba5d commit 3a2c8f7

File tree

7 files changed

+87
-92
lines changed

7 files changed

+87
-92
lines changed

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ class RISCVPassConfig : public TargetPassConfig {
395395
void addPreRegAlloc() override;
396396
void addPostRegAlloc() override;
397397
void addFastRegAlloc() override;
398+
bool addILPOpts() override;
398399

399400
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
400401
};
@@ -580,9 +581,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
580581

581582
TargetPassConfig::addMachineSSAOptimization();
582583

583-
if (EnableMachineCombiner)
584-
addPass(&MachineCombinerID);
585-
586584
if (TM->getTargetTriple().isRISCV64()) {
587585
addPass(createRISCVOptWInstrsPass());
588586
}
@@ -617,6 +615,13 @@ void RISCVPassConfig::addPostRegAlloc() {
617615
addPass(createRISCVRedundantCopyEliminationPass());
618616
}
619617

618+
bool RISCVPassConfig::addILPOpts() {
619+
if (EnableMachineCombiner)
620+
addPass(&MachineCombinerID);
621+
622+
return true;
623+
}
624+
620625
void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
621626
PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
622627
OptimizationLevel Level) {

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@
107107
; CHECK-NEXT: Remove dead machine instructions
108108
; CHECK-NEXT: MachineDominator Tree Construction
109109
; CHECK-NEXT: Machine Natural Loop Construction
110+
; CHECK-NEXT: Machine Trace Metrics
111+
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
112+
; CHECK-NEXT: Machine InstCombiner
110113
; CHECK-NEXT: Machine Block Frequency Analysis
111114
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
112115
; CHECK-NEXT: MachineDominator Tree Construction
@@ -117,9 +120,6 @@
117120
; CHECK-NEXT: Machine code sinking
118121
; CHECK-NEXT: Peephole Optimizations
119122
; CHECK-NEXT: Remove dead machine instructions
120-
; CHECK-NEXT: Machine Trace Metrics
121-
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
122-
; CHECK-NEXT: Machine InstCombiner
123123
; RV64-NEXT: RISC-V Optimize W Instructions
124124
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
125125
; CHECK-NEXT: RISC-V Merge Base Offset

llvm/test/CodeGen/RISCV/machine-combiner.ll

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,33 +1094,19 @@ declare float @llvm.maxnum.f32(float, float)
10941094
declare double @llvm.maxnum.f64(double, double)
10951095

10961096
define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
1097-
; CHECK_LOCAL-LABEL: test_fmadd_strategy:
1098-
; CHECK_LOCAL: # %bb.0: # %entry
1099-
; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1
1100-
; CHECK_LOCAL-NEXT: andi a0, a0, 1
1101-
; CHECK_LOCAL-NEXT: fmv.d fa5, fa0
1102-
; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2
1103-
; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2
1104-
; CHECK_LOCAL-NEXT: # %bb.1: # %entry
1105-
; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1
1106-
; CHECK_LOCAL-NEXT: fmadd.d fa5, fa5, fa1, fa0
1107-
; CHECK_LOCAL-NEXT: fsub.d fa0, fa5, fa4
1108-
; CHECK_LOCAL-NEXT: .LBB76_2: # %entry
1109-
; CHECK_LOCAL-NEXT: ret
1110-
;
1111-
; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
1112-
; CHECK_GLOBAL: # %bb.0: # %entry
1113-
; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1
1114-
; CHECK_GLOBAL-NEXT: andi a0, a0, 1
1115-
; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0
1116-
; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2
1117-
; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2
1118-
; CHECK_GLOBAL-NEXT: # %bb.1: # %entry
1119-
; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1
1120-
; CHECK_GLOBAL-NEXT: fadd.d fa4, fa5, fa0
1121-
; CHECK_GLOBAL-NEXT: fsub.d fa0, fa4, fa5
1122-
; CHECK_GLOBAL-NEXT: .LBB76_2: # %entry
1123-
; CHECK_GLOBAL-NEXT: ret
1097+
; CHECK-LABEL: test_fmadd_strategy:
1098+
; CHECK: # %bb.0: # %entry
1099+
; CHECK-NEXT: fsub.d fa5, fa0, fa1
1100+
; CHECK-NEXT: andi a0, a0, 1
1101+
; CHECK-NEXT: beqz a0, .LBB76_2
1102+
; CHECK-NEXT: # %bb.1: # %entry
1103+
; CHECK-NEXT: fmul.d fa4, fa0, fa1
1104+
; CHECK-NEXT: fmadd.d fa5, fa5, fa2, fa4
1105+
; CHECK-NEXT: fsub.d fa0, fa5, fa4
1106+
; CHECK-NEXT: ret
1107+
; CHECK-NEXT: .LBB76_2:
1108+
; CHECK-NEXT: fmul.d fa0, fa5, fa2
1109+
; CHECK-NEXT: ret
11241110
entry:
11251111
%sub = fsub contract double %a0, %a1
11261112
%mul = fmul contract double %sub, %a2
@@ -1132,3 +1118,6 @@ entry:
11321118
%retval.0 = select i1 %tobool.not, double %mul, double %sub3
11331119
ret double %retval.0
11341120
}
1121+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1122+
; CHECK_GLOBAL: {{.*}}
1123+
; CHECK_LOCAL: {{.*}}

llvm/test/CodeGen/RISCV/neg-abs.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
208208
; RV32I-NEXT: sub a1, a1, a3
209209
; RV32I-NEXT: neg a0, a0
210210
; RV32I-NEXT: .LBB5_2:
211-
; RV32I-NEXT: snez a3, a0
212-
; RV32I-NEXT: neg a4, a1
213-
; RV32I-NEXT: sub a3, a4, a3
214-
; RV32I-NEXT: neg a4, a0
211+
; RV32I-NEXT: snez a4, a0
212+
; RV32I-NEXT: neg a3, a0
213+
; RV32I-NEXT: add a4, a1, a4
214+
; RV32I-NEXT: neg a4, a4
215215
; RV32I-NEXT: sw a0, 0(a2)
216216
; RV32I-NEXT: sw a1, 4(a2)
217-
; RV32I-NEXT: mv a0, a4
218-
; RV32I-NEXT: mv a1, a3
217+
; RV32I-NEXT: mv a0, a3
218+
; RV32I-NEXT: mv a1, a4
219219
; RV32I-NEXT: ret
220220
;
221221
; RV32ZBB-LABEL: neg_abs64_multiuse:
@@ -227,14 +227,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
227227
; RV32ZBB-NEXT: sub a1, a1, a3
228228
; RV32ZBB-NEXT: neg a0, a0
229229
; RV32ZBB-NEXT: .LBB5_2:
230-
; RV32ZBB-NEXT: snez a3, a0
231-
; RV32ZBB-NEXT: neg a4, a1
232-
; RV32ZBB-NEXT: sub a3, a4, a3
233-
; RV32ZBB-NEXT: neg a4, a0
230+
; RV32ZBB-NEXT: snez a4, a0
231+
; RV32ZBB-NEXT: neg a3, a0
232+
; RV32ZBB-NEXT: add a4, a1, a4
233+
; RV32ZBB-NEXT: neg a4, a4
234234
; RV32ZBB-NEXT: sw a0, 0(a2)
235235
; RV32ZBB-NEXT: sw a1, 4(a2)
236-
; RV32ZBB-NEXT: mv a0, a4
237-
; RV32ZBB-NEXT: mv a1, a3
236+
; RV32ZBB-NEXT: mv a0, a3
237+
; RV32ZBB-NEXT: mv a1, a4
238238
; RV32ZBB-NEXT: ret
239239
;
240240
; RV64I-LABEL: neg_abs64_multiuse:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
934934
; CHECK-NEXT: add a1, a1, a5
935935
; CHECK-NEXT: slli a3, a3, 32
936936
; CHECK-NEXT: srli a3, a3, 32
937-
; CHECK-NEXT: add a0, a4, a0
937+
; CHECK-NEXT: add a0, a0, a4
938938
; CHECK-NEXT: add a0, a0, a3
939939
; CHECK-NEXT: addi a0, a0, 1
940940
; CHECK-NEXT: .LBB14_6: # %bb35

llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
1818
; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
1919
; RV32-NEXT: blez a6, .LBB0_17
2020
; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
21-
; RV32-NEXT: addi t0, a7, -1
21+
; RV32-NEXT: addi t3, a7, -1
2222
; RV32-NEXT: csrr t2, vlenb
23-
; RV32-NEXT: mul t3, a1, t0
24-
; RV32-NEXT: mul t4, a3, t0
25-
; RV32-NEXT: mul t5, a5, t0
2623
; RV32-NEXT: slli t1, t2, 1
27-
; RV32-NEXT: li t6, 32
24+
; RV32-NEXT: li t4, 32
2825
; RV32-NEXT: mv t0, t1
2926
; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
3027
; RV32-NEXT: li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
3431
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3532
; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3633
; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
34+
; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
3735
; RV32-NEXT: .cfi_offset s0, -4
3836
; RV32-NEXT: .cfi_offset s1, -8
3937
; RV32-NEXT: .cfi_offset s2, -12
38+
; RV32-NEXT: .cfi_offset s3, -16
4039
; RV32-NEXT: .cfi_remember_state
41-
; RV32-NEXT: add t3, a0, t3
42-
; RV32-NEXT: add t4, a2, t4
43-
; RV32-NEXT: add s0, a4, t5
44-
; RV32-NEXT: bltu t6, t1, .LBB0_6
40+
; RV32-NEXT: mul t5, a1, t3
41+
; RV32-NEXT: add s0, a0, a6
42+
; RV32-NEXT: mul t6, a3, t3
43+
; RV32-NEXT: add s2, a2, a6
44+
; RV32-NEXT: mul s1, a5, t3
45+
; RV32-NEXT: add s3, a4, a6
46+
; RV32-NEXT: bltu t4, t1, .LBB0_6
4547
; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
4648
; RV32-NEXT: li t1, 32
4749
; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
48-
; RV32-NEXT: add t3, t3, a6
49-
; RV32-NEXT: add t5, t4, a6
50-
; RV32-NEXT: add t4, s0, a6
50+
; RV32-NEXT: add t3, s0, t5
51+
; RV32-NEXT: add t6, s2, t6
52+
; RV32-NEXT: add t4, s3, s1
5153
; RV32-NEXT: j .LBB0_8
5254
; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
5355
; RV32-NEXT: mv t1, t0
5456
; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
5557
; RV32-NEXT: .cfi_restore_state
5658
; RV32-NEXT: li t0, 0
57-
; RV32-NEXT: sltu t5, a0, t5
59+
; RV32-NEXT: sltu t5, a0, t6
5860
; RV32-NEXT: sltu t6, a2, t3
5961
; RV32-NEXT: and t5, t5, t6
6062
; RV32-NEXT: sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
140142
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
141143
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
142144
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
145+
; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
143146
; RV32-NEXT: .cfi_restore s0
144147
; RV32-NEXT: .cfi_restore s1
145148
; RV32-NEXT: .cfi_restore s2
149+
; RV32-NEXT: .cfi_restore s3
146150
; RV32-NEXT: addi sp, sp, 16
147151
; RV32-NEXT: .cfi_def_cfa_offset 0
148152
; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
190194
; RV64P670-NEXT: or t6, s0, s1
191195
; RV64P670-NEXT: sltu s1, a0, t5
192196
; RV64P670-NEXT: sltu s0, a4, t4
193-
; RV64P670-NEXT: mv t5, a0
197+
; RV64P670-NEXT: add t4, a0, a6
194198
; RV64P670-NEXT: and s0, s0, s1
195199
; RV64P670-NEXT: or s1, a1, a5
196200
; RV64P670-NEXT: srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
200204
; RV64P670-NEXT: or s0, t6, s0
201205
; RV64P670-NEXT: sltu s1, a6, s1
202206
; RV64P670-NEXT: or s0, s0, s1
203-
; RV64P670-NEXT: andi t4, s0, 1
207+
; RV64P670-NEXT: andi t5, s0, 1
204208
; RV64P670-NEXT: j .LBB0_4
205209
; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
206210
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
207-
; RV64P670-NEXT: add t5, t5, a1
211+
; RV64P670-NEXT: add a0, a0, a1
208212
; RV64P670-NEXT: add a2, a2, a3
209213
; RV64P670-NEXT: add a4, a4, a5
210214
; RV64P670-NEXT: addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
214218
; RV64P670-NEXT: # =>This Loop Header: Depth=1
215219
; RV64P670-NEXT: # Child Loop BB0_7 Depth 2
216220
; RV64P670-NEXT: # Child Loop BB0_10 Depth 2
217-
; RV64P670-NEXT: beqz t4, .LBB0_6
221+
; RV64P670-NEXT: beqz t5, .LBB0_6
218222
; RV64P670-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
219223
; RV64P670-NEXT: li t6, 0
220224
; RV64P670-NEXT: j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
223227
; RV64P670-NEXT: slli s1, t2, 28
224228
; RV64P670-NEXT: mv s2, a2
225229
; RV64P670-NEXT: mv s3, a4
226-
; RV64P670-NEXT: mv s4, t5
230+
; RV64P670-NEXT: mv s4, a0
227231
; RV64P670-NEXT: sub s1, s1, t3
228232
; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
229233
; RV64P670-NEXT: and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
246250
; RV64P670-NEXT: .LBB0_9: # %for.body4.us.preheader
247251
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
248252
; RV64P670-NEXT: mul s2, a1, t0
249-
; RV64P670-NEXT: add s0, a0, a6
250-
; RV64P670-NEXT: add s1, t5, t6
253+
; RV64P670-NEXT: add s1, a0, t6
251254
; RV64P670-NEXT: add s4, a4, t6
252255
; RV64P670-NEXT: add t6, t6, a2
253-
; RV64P670-NEXT: add s2, s2, s0
256+
; RV64P670-NEXT: add s2, s2, t4
254257
; RV64P670-NEXT: .LBB0_10: # %for.body4.us
255258
; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1
256259
; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
332335
; RV64X60-NEXT: or s0, t4, s0
333336
; RV64X60-NEXT: sltu s1, a6, s1
334337
; RV64X60-NEXT: or s0, s0, s1
335-
; RV64X60-NEXT: andi t4, s0, 1
336-
; RV64X60-NEXT: mv t5, a0
338+
; RV64X60-NEXT: add t4, a0, a6
339+
; RV64X60-NEXT: andi t5, s0, 1
337340
; RV64X60-NEXT: j .LBB0_4
338341
; RV64X60-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
339342
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
340-
; RV64X60-NEXT: add t5, t5, a1
343+
; RV64X60-NEXT: add a0, a0, a1
341344
; RV64X60-NEXT: add a2, a2, a3
342345
; RV64X60-NEXT: addiw t1, t1, 1
343346
; RV64X60-NEXT: add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
347350
; RV64X60-NEXT: # =>This Loop Header: Depth=1
348351
; RV64X60-NEXT: # Child Loop BB0_7 Depth 2
349352
; RV64X60-NEXT: # Child Loop BB0_10 Depth 2
350-
; RV64X60-NEXT: beqz t4, .LBB0_6
353+
; RV64X60-NEXT: beqz t5, .LBB0_6
351354
; RV64X60-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
352355
; RV64X60-NEXT: li t6, 0
353356
; RV64X60-NEXT: j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
358361
; RV64X60-NEXT: and t6, s1, a6
359362
; RV64X60-NEXT: mv s2, a2
360363
; RV64X60-NEXT: mv s3, a4
361-
; RV64X60-NEXT: mv s4, t5
364+
; RV64X60-NEXT: mv s4, a0
362365
; RV64X60-NEXT: mv s1, t6
363366
; RV64X60-NEXT: vsetvli s0, zero, e8, m2, ta, ma
364367
; RV64X60-NEXT: .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
379382
; RV64X60-NEXT: .LBB0_9: # %for.body4.us.preheader
380383
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
381384
; RV64X60-NEXT: mul s2, a1, t0
382-
; RV64X60-NEXT: add s1, a0, a6
383-
; RV64X60-NEXT: add s0, t5, t6
384-
; RV64X60-NEXT: add s2, s2, s1
385+
; RV64X60-NEXT: add s0, a0, t6
386+
; RV64X60-NEXT: add s2, s2, t4
385387
; RV64X60-NEXT: add s4, a4, t6
386388
; RV64X60-NEXT: add t6, t6, a2
387389
; RV64X60-NEXT: .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
466468
; RV64-NEXT: or s0, a1, a5
467469
; RV64-NEXT: srli s0, s0, 63
468470
; RV64-NEXT: or t5, t5, s0
471+
; RV64-NEXT: sltu s0, a6, t4
469472
; RV64-NEXT: or t5, t6, t5
470-
; RV64-NEXT: sltu t4, a6, t4
471-
; RV64-NEXT: or t4, t4, t5
472-
; RV64-NEXT: andi t4, t4, 1
473-
; RV64-NEXT: mv t5, a0
473+
; RV64-NEXT: add t4, a0, a6
474+
; RV64-NEXT: or t5, s0, t5
475+
; RV64-NEXT: andi t5, t5, 1
474476
; RV64-NEXT: csrwi vxrm, 0
475477
; RV64-NEXT: j .LBB0_6
476478
; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
477479
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
478-
; RV64-NEXT: add t5, t5, a1
480+
; RV64-NEXT: add a0, a0, a1
479481
; RV64-NEXT: add a2, a2, a3
480482
; RV64-NEXT: add a4, a4, a5
481483
; RV64-NEXT: addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
485487
; RV64-NEXT: # =>This Loop Header: Depth=1
486488
; RV64-NEXT: # Child Loop BB0_9 Depth 2
487489
; RV64-NEXT: # Child Loop BB0_12 Depth 2
488-
; RV64-NEXT: beqz t4, .LBB0_8
490+
; RV64-NEXT: beqz t5, .LBB0_8
489491
; RV64-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
490492
; RV64-NEXT: li t6, 0
491493
; RV64-NEXT: j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
496498
; RV64-NEXT: and t6, t6, a6
497499
; RV64-NEXT: mv s0, a2
498500
; RV64-NEXT: mv s1, a4
499-
; RV64-NEXT: mv s2, t5
501+
; RV64-NEXT: mv s2, a0
500502
; RV64-NEXT: mv s3, t6
501503
; RV64-NEXT: vsetvli s4, zero, e8, m2, ta, ma
502504
; RV64-NEXT: .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
516518
; RV64-NEXT: beq t6, a6, .LBB0_5
517519
; RV64-NEXT: .LBB0_11: # %for.body4.us.preheader
518520
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
519-
; RV64-NEXT: mul s1, a1, t2
520-
; RV64-NEXT: add s2, a0, a6
521-
; RV64-NEXT: add s0, t5, t6
522-
; RV64-NEXT: add s1, s2, s1
523-
; RV64-NEXT: add s2, a4, t6
521+
; RV64-NEXT: mul s2, a1, t2
522+
; RV64-NEXT: add s0, a0, t6
523+
; RV64-NEXT: add s1, a4, t6
524+
; RV64-NEXT: add s2, t4, s2
524525
; RV64-NEXT: add t6, a2, t6
525526
; RV64-NEXT: .LBB0_12: # %for.body4.us
526527
; RV64-NEXT: # Parent Loop BB0_6 Depth=1
527528
; RV64-NEXT: # => This Inner Loop Header: Depth=2
528529
; RV64-NEXT: lbu s3, 0(t6)
529-
; RV64-NEXT: lbu s4, 0(s2)
530+
; RV64-NEXT: lbu s4, 0(s1)
530531
; RV64-NEXT: add s3, s3, s4
531532
; RV64-NEXT: addi s3, s3, 1
532533
; RV64-NEXT: srli s3, s3, 1
533534
; RV64-NEXT: sb s3, 0(s0)
534535
; RV64-NEXT: addi s0, s0, 1
535-
; RV64-NEXT: addi s2, s2, 1
536+
; RV64-NEXT: addi s1, s1, 1
536537
; RV64-NEXT: addi t6, t6, 1
537-
; RV64-NEXT: bne s0, s1, .LBB0_12
538+
; RV64-NEXT: bne s0, s2, .LBB0_12
538539
; RV64-NEXT: j .LBB0_5
539540
; RV64-NEXT: .LBB0_13:
540541
; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload

0 commit comments

Comments
 (0)