Skip to content

Commit 1e7bf66

Browse files
vladimirradosavljevicakiramenai
authored andcommitted
[EraVM] Enable CSE between sub and cmp
In order to enable this, we need to do following: 1. Generate _v instead of _s variants of the sub instructions, following the same patterns as for the cmp instruction. 2. Remove Flags from reserved registers. 3. Convert from _v to _s variants in Peephole Optimizer if flags register is not used. 4. Remove identical cmp instructions. This is the same design that AArch64 is using and that is to generate fake cmp instructions from sub to leverage MachineCSE, and to convert fake cmp instructions back to subs in the PeepholeOptimizer. If we want to use different approach we would probably need to teach MachineCSE flag setting counterparts have the same semantic. This could be problematic since MachineCSE uses lookup table to identify identical instructions, so we would always need to generate MI counterparts to check the lookup table and to worry about deallocation of that MI (a lot of local changes + we would need to be very careful not to introduce some issues). PR: #612. Signed-off-by: Vladimir Radosavljevic <[email protected]>
1 parent 17c0d5a commit 1e7bf66

13 files changed

+172
-71
lines changed

llvm/lib/Target/EraVM/EraVMInstrFormats.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ class IBinary<EraVMOpcode opcode,
435435
let Defs = !if(!eq(set_flags, PreserveFlags), [], [Flags]);
436436
let mayLoad = !or(isStackIn<src>.Value, !eq(src, SrcCodeAddr));
437437
let mayStore = isStackOut<dst>.Value;
438+
let isCompare = !and(!eq(opcode, OpSub), !eq(set_flags, SetFlags));
438439

439440
let Opcode = ArithOpcEncoder<opcode.Encoding, opcode.BaseOpcode,
440441
src, dst,

llvm/lib/Target/EraVM/EraVMInstrInfo.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,69 @@ unsigned EraVMInstrInfo::insertBranch(
417417
return Count;
418418
}
419419

420+
bool EraVMInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
421+
Register &SrcReg2, int64_t &CmpMask,
422+
int64_t &CmpValue) const {
423+
return isSub(MI) && isFlagSettingInstruction(MI);
424+
}
425+
426+
bool EraVMInstrInfo::optimizeCompareInstr(
427+
MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
428+
int64_t CmpValue, const MachineRegisterInfo *MRI) const {
429+
assert(CmpInstr.getParent() && "CmpInstr must be in a basic block");
430+
assert(MRI && "MachineRegisterInfo is required");
431+
432+
// If Flags register is not used, convert this compare instruction (sub
433+
// instruction that sets flags) to a sub that doesn't set flags.
434+
int DeadFlagsIdx = CmpInstr.findRegisterDefOperandIdx(EraVM::Flags, true);
435+
if (DeadFlagsIdx != -1) {
436+
int NewOpc = EraVM::getNonFlagSettingOpcode(CmpInstr.getOpcode());
437+
assert(NewOpc != -1 && "Invalid opcode for compare instruction");
438+
CmpInstr.setDesc(get(NewOpc));
439+
CmpInstr.removeOperand(DeadFlagsIdx);
440+
return true;
441+
}
442+
443+
// TODO: #621 Support removing compare instruction which output is used if the
444+
// nearest flag setting instruction is identical.
445+
if (!EraVM::hasRROutAddressingMode(CmpInstr) ||
446+
!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
447+
return false;
448+
449+
// In some cases, we can have the following sequence:
450+
// Out1 = SUB_v In1, In2, CondCC1, implicit-def $flags
451+
// ...
452+
// Out2 = SUB_v In3, In4, CondCC2, implicit-def dead $flags
453+
// ...
454+
// Out3 = SUB_v In1, In2, CondCC1, implicit-def $flags
455+
//
456+
// In this case, `Out2 = SUB_v` prevents MachineCSE to remove
457+
// `Out3 = SUB_v` in favor of `Out1 = SUB_v`, just because of
458+
// `implicit-def dead $flags`.
459+
// Since in this function we will convert `Out2 = SUB_v` to
460+
// `Out2 = SUB_s` and implicit dead $flags will be dropped,
461+
// we can try to find identical compare instruction and remove it here.
462+
//
463+
// Search backwards in order to find the nearest flag setting instruction that
464+
// can be identical to this compare instruction.
465+
auto From = std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
466+
auto FlagSettingInst = std::find_if(
467+
From, CmpInstr.getParent()->rend(), [](const MachineInstr &MI) {
468+
return any_of(MI.implicit_operands(), [](const MachineOperand &MO) {
469+
return MO.isReg() && MO.isDef() && MO.getReg() == EraVM::Flags;
470+
});
471+
});
472+
473+
// Bail out if we didn't find identical compare instruction.
474+
if (FlagSettingInst == CmpInstr.getParent()->rend() ||
475+
!isFlagSettingInstruction(*FlagSettingInst) ||
476+
!FlagSettingInst->isIdenticalTo(CmpInstr, MachineInstr::IgnoreVRegDefs))
477+
return false;
478+
479+
CmpInstr.eraseFromParent();
480+
return true;
481+
}
482+
420483
void EraVMInstrInfo::storeRegToStackSlot(
421484
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
422485
bool isKill, int FrameIndex, const TargetRegisterClass *RC,

llvm/lib/Target/EraVM/EraVMInstrInfo.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,19 @@ class EraVMInstrInfo : public EraVMGenInstrInfo {
313313
const DebugLoc &DL,
314314
int *BytesAdded = nullptr) const override;
315315

316+
// Return true if this is a compare instruction (sub instruction that
317+
// sets flags).
318+
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
319+
Register &SrcReg2, int64_t &CmpMask,
320+
int64_t &CmpValue) const override;
321+
322+
// In case flags register is not used, convert compare instruction (sub
323+
// instruction that sets flag) into sub instruction that does not set flags.
324+
// Also, try to remove redundant compare instruction.
325+
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
326+
Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
327+
const MachineRegisterInfo *MRI) const override;
328+
316329
int64_t getFramePoppedByCallee(const MachineInstr &I) const { return 0; }
317330

318331
unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;

llvm/lib/Target/EraVM/EraVMInstrInfo.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ def constant_pool : SDNodeXForm<imm, [{
124124
return CurDAG->getTargetConstantPool(N->getConstantIntValue(), PtrVT);
125125
}]>;
126126

127+
def negate_constant_pool : SDNodeXForm<imm, [{
128+
APInt Val = -N->getAPIntValue();
129+
const Constant *C = ConstantInt::get(*CurDAG->getContext(), Val);
130+
MVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
131+
return CurDAG->getTargetConstantPool(C, PtrVT);
132+
}]>;
133+
127134
def default_far_return : SDNodeXForm<imm, [{
128135
(void)N;
129136
MVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
@@ -690,6 +697,32 @@ def : Pat<(store_stack GRPTR:$src, stackaddr:$dst), (PTR_ADDrrs_s GRPTR:$src, R0
690697

691698
def : Pat<(EraVMcopy_from_ptrreg GRPTR:$src), (PTR_ADDrrr_s GRPTR:$src, R0, 0)>;
692699

700+
// In order to support CSE between sub and cmp, we need to generate _v variants
701+
// of the sub instructions. These patterns are similar to the cmp patterns
702+
// below, so we can enable CSE between them. Complexity is added in order to
703+
// select _v instead of _s variants.
704+
// After MachineCSE, we convert _v to _s variants in PeepholeOptimizer if
705+
// the definition of the flags register is dead. Whether the flags register is
706+
// dead or not, is calculated during generation of MI instructions.
707+
// The PeepholeOptimizer is not invoked for OptNone functions, preventing the
708+
// conversion of _v to _s variants. As a result, these patterns are disabled
709+
// for OptNone functions.
710+
def Optimize : Predicate<"!MF->getFunction().hasOptNone()">;
711+
let Predicates = [Optimize], AddedComplexity = 1 in {
712+
def : Pat<(sub GR256:$lhs, GR256:$rhs), (SUBrrr_v GR256:$lhs, GR256:$rhs, 0)>;
713+
def : Pat<(sub GR256:$lhs, imm16:$rhs), (SUBxrr_v imm:$rhs, GR256:$lhs, 0)>;
714+
def : Pat<(sub GR256:$lhs, large_imm:$rhs), (SUByrr_v (constant_pool imm:$rhs), 0, GR256:$lhs, 0)>;
715+
716+
def : Pat<(sub (load_code memaddr:$lhs), GR256:$rhs), (SUBcrr_v memaddr:$lhs, GR256:$rhs, 0)>;
717+
def : Pat<(sub GR256:$lhs, (load_code memaddr:$rhs)), (SUByrr_v memaddr:$rhs, GR256:$lhs, 0)>;
718+
def : Pat<(sub (load_stack stackaddr:$lhs), GR256:$rhs), (SUBsrr_v stackaddr:$lhs, GR256:$rhs, 0)>;
719+
def : Pat<(sub GR256:$lhs, (load_stack stackaddr:$rhs)), (SUBzrr_v stackaddr:$rhs, GR256:$lhs, 0)>;
720+
721+
// Generate sub instructions from add, so these instructions can be CSE'd with cmp.
722+
def : Pat<(add GR256:$rs0, neg_imm16:$imm), (SUBxrr_v (negate_imm imm:$imm), GR256:$rs0, 0)>;
723+
def : Pat<(add GR256:$lhs, large_imm:$rhs), (SUByrr_v (negate_constant_pool imm:$rhs), 0, GR256:$lhs, 0)>;
724+
}
725+
693726
// SelecCC, BR_CC supplement
694727
def : Pat<(EraVMcmp GR256:$lhs, GR256:$rhs), (SUBrrr_v GR256:$lhs, GR256:$rhs, 0)>;
695728
// r0 is more profitable than imm 0 because it makes sub! x, r0 combinable with x = load y.

llvm/lib/Target/EraVM/EraVMRegisterInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ EraVMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
4141
BitVector EraVMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
4242
BitVector Reserved(getNumRegs());
4343
Reserved.set(EraVM::SP);
44-
Reserved.set(EraVM::Flags);
4544
Reserved.set(EraVM::R0);
4645
return Reserved;
4746
}

llvm/test/CodeGen/EraVM/cse-sub-cmp.ll

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ declare void @use(i256)
99
define i256 @test_small_imm(i256 %a) {
1010
; CHECK-LABEL: test_small_imm:
1111
; CHECK: ; %bb.0:
12-
; CHECK-NEXT: sub.s 10, r1, r3
13-
; CHECK-NEXT: mul 10, r1, r2, r4
14-
; CHECK-NEXT: sub.s! 10, r1, r1
15-
; CHECK-NEXT: add.ge r3, r0, r2
16-
; CHECK-NEXT: add r2, r0, r1
12+
; CHECK-NEXT: sub.s! 10, r1, r2
13+
; CHECK-NEXT: mul 10, r1, r1, r3
14+
; CHECK-NEXT: add.ge r2, r0, r1
1715
; CHECK-NEXT: ret
1816
%sub = sub i256 %a, 10
1917
%mul = mul i256 %a, 10
@@ -25,11 +23,9 @@ define i256 @test_small_imm(i256 %a) {
2523
define i256 @test_large_imm(i256 %a) {
2624
; CHECK-LABEL: test_large_imm:
2725
; CHECK: ; %bb.0:
28-
; CHECK-NEXT: add @CPI1_0[0], r1, r3
29-
; CHECK-NEXT: mul @CPI1_1[0], r1, r2, r4
30-
; CHECK-NEXT: sub.s! @CPI1_1[0], r1, r1
31-
; CHECK-NEXT: add.ge r3, r0, r2
32-
; CHECK-NEXT: add r2, r0, r1
26+
; CHECK-NEXT: sub.s! @CPI1_0[0], r1, r2
27+
; CHECK-NEXT: mul @CPI1_0[0], r1, r1, r3
28+
; CHECK-NEXT: add.ge r2, r0, r1
3329
; CHECK-NEXT: ret
3430
%sub = sub i256 %a, 123456789
3531
%mul = mul i256 %a, 123456789
@@ -41,11 +37,9 @@ define i256 @test_large_imm(i256 %a) {
4137
define i256 @test_reg(i256 %a, i256 %b) {
4238
; CHECK-LABEL: test_reg:
4339
; CHECK: ; %bb.0:
44-
; CHECK-NEXT: sub r1, r2, r4
45-
; CHECK-NEXT: mul r1, r2, r3, r5
46-
; CHECK-NEXT: sub! r1, r2, r1
47-
; CHECK-NEXT: add.ge r4, r0, r3
48-
; CHECK-NEXT: add r3, r0, r1
40+
; CHECK-NEXT: sub! r1, r2, r3
41+
; CHECK-NEXT: mul r1, r2, r1, r2
42+
; CHECK-NEXT: add.ge r3, r0, r1
4943
; CHECK-NEXT: ret
5044
%sub = sub i256 %a, %b
5145
%mul = mul i256 %a, %b
@@ -57,9 +51,9 @@ define i256 @test_reg(i256 %a, i256 %b) {
5751
define i256 @test_in_different_bb(i256 %a, i256 %b) {
5852
; CHECK-LABEL: test_in_different_bb:
5953
; CHECK: ; %bb.0:
60-
; CHECK-NEXT: sub! r1, r2, r3
61-
; CHECK-NEXT: sub.ge r1, r2, r1
54+
; CHECK-NEXT: sub! r1, r2, r1
6255
; CHECK-NEXT: add.lt r0, r0, r1
56+
; CHECK-NEXT: ; %bb.1: ; %bb2
6357
; CHECK-NEXT: ret
6458
%cmp = icmp ult i256 %a, %b
6559
br i1 %cmp, label %bb1, label %bb2
@@ -75,15 +69,12 @@ bb2:
7569
define i256 @test_with_call(i256 %a, i256 %b) {
7670
; CHECK-LABEL: test_with_call:
7771
; CHECK: ; %bb.0:
78-
; CHECK-NEXT: nop stack+=[2 + r0]
79-
; CHECK-NEXT: add r2, r0, stack-[1] ; 32-byte Folded Spill
80-
; CHECK-NEXT: add r1, r0, stack-[2] ; 32-byte Folded Spill
81-
; CHECK-NEXT: sub! r1, r2, r3
72+
; CHECK-NEXT: nop stack+=[1 + r0]
73+
; CHECK-NEXT: sub! r1, r2, stack-[1]
8274
; CHECK-NEXT: add 10, r0, r1
8375
; CHECK-NEXT: add.lt 15, r0, r1
8476
; CHECK-NEXT: near_call r0, @use, @DEFAULT_UNWIND
85-
; CHECK-NEXT: add stack-[1], r0, r2 ; 32-byte Folded Reload
86-
; CHECK-NEXT: sub stack-[2], r2, r1
77+
; CHECK-NEXT: add stack-[1], r0, r1 ; 32-byte Folded Reload
8778
; CHECK-NEXT: ret
8879
%cmp = icmp ult i256 %a, %b
8980
%select = select i1 %cmp, i256 15, i256 10

llvm/test/CodeGen/EraVM/fold-add-to-select.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define i256 @test_large_imm1(i256 %a) {
1212
; CHECK-LABEL: test_large_imm1:
1313
; CHECK: ; %bb.0:
1414
; CHECK-NEXT: sub.s! @CPI0_1[0], r1, r2
15-
; CHECK-NEXT: add.lt @CPI0_0[0], r1, r1
15+
; CHECK-NEXT: sub.s.lt @CPI0_0[0], r1, r1
1616
; CHECK-NEXT: ret
1717
%add = add i256 %a, 26959946660873538059280334323183841250350249843923952699046031785980
1818
%cmp = icmp ult i256 %a, -26959946660873538059280334323183841250350249843923952699046031785985
@@ -24,7 +24,7 @@ define i256 @test_large_imm2(i256 %a) {
2424
; CHECK-LABEL: test_large_imm2:
2525
; CHECK: ; %bb.0:
2626
; CHECK-NEXT: sub.s! @CPI1_1[0], r1, r2
27-
; CHECK-NEXT: add.lt @CPI1_0[0], r1, r1
27+
; CHECK-NEXT: sub.s.lt @CPI1_0[0], r1, r1
2828
; CHECK-NEXT: ret
2929
%add = add i256 %a, -26959946660873538059280334323183841250350249843923952699046031785985
3030
%cmp = icmp ult i256 %a, 26959946660873538059280334323183841250350249843923952699046031785980
@@ -36,7 +36,7 @@ define i256 @test_large_imm3(i256 %a) {
3636
; CHECK-LABEL: test_large_imm3:
3737
; CHECK: ; %bb.0:
3838
; CHECK-NEXT: sub.s! @CPI2_1[0], r1, r2
39-
; CHECK-NEXT: add.ge @CPI2_0[0], r1, r1
39+
; CHECK-NEXT: sub.s.ge @CPI2_0[0], r1, r1
4040
; CHECK-NEXT: ret
4141
%add = add i256 %a, 26959946660873538059280334323183841250350249843923952699046031785980
4242
%cmp = icmp ult i256 %a, -26959946660873538059280334323183841250350249843923952699046031785985
@@ -48,7 +48,7 @@ define i256 @test_large_imm4(i256 %a) {
4848
; CHECK-LABEL: test_large_imm4:
4949
; CHECK: ; %bb.0:
5050
; CHECK-NEXT: sub.s! @CPI3_1[0], r1, r2
51-
; CHECK-NEXT: add.ge @CPI3_0[0], r1, r1
51+
; CHECK-NEXT: sub.s.ge @CPI3_0[0], r1, r1
5252
; CHECK-NEXT: ret
5353
%add = add i256 %a, -26959946660873538059280334323183841250350249843923952699046031785985
5454
%cmp = icmp ult i256 %a, 26959946660873538059280334323183841250350249843923952699046031785980
@@ -253,7 +253,7 @@ define i256 @test_use_in_other_bb(i256 %a, i1 %cond) {
253253
; CHECK-NEXT: ret
254254
; CHECK-NEXT: .BB18_2: ; %else
255255
; CHECK-NEXT: sub.s! @CPI18_1[0], r1, r2
256-
; CHECK-NEXT: add.lt @CPI18_0[0], r1, r1
256+
; CHECK-NEXT: sub.s.lt @CPI18_0[0], r1, r1
257257
; CHECK-NEXT: ret
258258
%add = add i256 %a, 26959946660873538059280334323183841250350249843923952699046031785980
259259
br i1 %cond, label %then, label %else

llvm/test/CodeGen/EraVM/fold-similar-instructions.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "eravm"
77

88
define i256 @test_large_imm_no_fold1(i256 %a, i1 %cond) {
99
; CHECK-LABEL: test_large_imm_no_fold1
10-
; CHECK: add @CPI0_0[0], r1, r3
10+
; CHECK: sub.s @CPI0_0[0], r1, r3
1111
; CHECK-NEXT: sub.s! @CPI0_1[0], r1, r4
1212
; CHECK-NEXT: add.lt r3, r0, r1
1313
; CHECK-NEXT: sub! r2, r0, r2
@@ -22,7 +22,7 @@ define i256 @test_large_imm_no_fold1(i256 %a, i1 %cond) {
2222

2323
define i256 @test_large_imm_no_fold2(i256 %a, i1 %cond) {
2424
; CHECK-LABEL: test_large_imm_no_fold2
25-
; CHECK: add @CPI1_0[0], r1, r3
25+
; CHECK: sub.s @CPI1_0[0], r1, r3
2626
; CHECK-NEXT: sub.s! @CPI1_1[0], r1, r4
2727
; CHECK-NEXT: add.lt r3, r0, r1
2828
; CHECK-NEXT: sub! r2, r0, r2
@@ -52,7 +52,7 @@ define i256 @test_small_imm_no_fold(i256 %a, i1 %cond) {
5252

5353
define i256 @test_large_imm_ult1(i256 %a, i1 %cond) {
5454
; CHECK-LABEL: test_large_imm_ult1
55-
; CHECK: sub.s! @CPI3_1[0], r1, r3
55+
; CHECK: sub.s! @CPI3_0[0], r1, r3
5656
; CHECK-NEXT: add.lt r3, r0, r1
5757
; CHECK-NEXT: sub! r2, r0, r2
5858
; CHECK-NEXT: add.eq r3, r0, r1
@@ -66,7 +66,7 @@ define i256 @test_large_imm_ult1(i256 %a, i1 %cond) {
6666

6767
define i256 @test_large_imm_ult2(i256 %a, i1 %cond) {
6868
; CHECK-LABEL: test_large_imm_ult2
69-
; CHECK: sub.s! @CPI4_1[0], r1, r3
69+
; CHECK: sub.s! @CPI4_0[0], r1, r3
7070
; CHECK-NEXT: add.lt r3, r0, r1
7171
; CHECK-NEXT: sub! r2, r0, r2
7272
; CHECK-NEXT: add.eq r3, r0, r1
@@ -81,7 +81,7 @@ define i256 @test_large_imm_ult2(i256 %a, i1 %cond) {
8181
; TODO: CPR-1543 This can be folded.
8282
define i256 @test_large_imm_ule1(i256 %a, i1 %cond) {
8383
; CHECK-LABEL: test_large_imm_ule1
84-
; CHECK: add @CPI5_0[0], r1, r3
84+
; CHECK: sub.s @CPI5_0[0], r1, r3
8585
; CHECK-NEXT: sub.s! @CPI5_1[0], r1, r4
8686
; CHECK-NEXT: add.lt r3, r0, r1
8787
; CHECK-NEXT: sub! r2, r0, r2
@@ -97,7 +97,7 @@ define i256 @test_large_imm_ule1(i256 %a, i1 %cond) {
9797
; TODO: CPR-1543 This can be folded.
9898
define i256 @test_large_imm_ule2(i256 %a, i1 %cond) {
9999
; CHECK-LABEL: test_large_imm_ule2
100-
; CHECK: add @CPI6_0[0], r1, r3
100+
; CHECK: sub.s @CPI6_0[0], r1, r3
101101
; CHECK-NEXT: sub.s! @CPI6_1[0], r1, r4
102102
; CHECK-NEXT: add.lt r3, r0, r1
103103
; CHECK-NEXT: sub! r2, r0, r2
@@ -113,7 +113,7 @@ define i256 @test_large_imm_ule2(i256 %a, i1 %cond) {
113113
; TODO: CPR-1543 This can be folded.
114114
define i256 @test_large_imm_uge1(i256 %a, i1 %cond) {
115115
; CHECK-LABEL: test_large_imm_uge1
116-
; CHECK: add @CPI7_0[0], r1, r3
116+
; CHECK: sub.s @CPI7_0[0], r1, r3
117117
; CHECK-NEXT: sub.s! @CPI7_1[0], r1, r4
118118
; CHECK-NEXT: add.gt r3, r0, r1
119119
; CHECK-NEXT: sub! r2, r0, r2
@@ -129,7 +129,7 @@ define i256 @test_large_imm_uge1(i256 %a, i1 %cond) {
129129
; TODO: CPR-1543 This can be folded.
130130
define i256 @test_large_imm_uge2(i256 %a, i1 %cond) {
131131
; CHECK-LABEL: test_large_imm_uge2
132-
; CHECK: add @CPI8_0[0], r1, r3
132+
; CHECK: sub.s @CPI8_0[0], r1, r3
133133
; CHECK-NEXT: sub.s! @CPI8_1[0], r1, r4
134134
; CHECK-NEXT: add.gt r3, r0, r1
135135
; CHECK-NEXT: sub! r2, r0, r2

llvm/test/CodeGen/EraVM/fold-similar-instructions.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ frameInfo:
170170
machineFunctionInfo: {}
171171
body: |
172172
bb.0:
173-
liveins: $r1, $r2
173+
liveins: $r1, $r2, $flags
174174
175175
%0:gr256 = COPY killed $r1
176176
%1:gr256 = COPY killed $r2
@@ -305,7 +305,7 @@ frameInfo:
305305
machineFunctionInfo: {}
306306
body: |
307307
bb.0:
308-
liveins: $r1, $r2
308+
liveins: $r1, $r2, $flags
309309
310310
%0:gr256 = COPY killed $r1
311311
%1:gr256 = COPY killed $r2

llvm/test/CodeGen/EraVM/intrinsic.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt -O3 < %s | llc | FileCheck %s
1+
; RUN: opt -O3 < %s | llc --verify-regalloc=false | FileCheck %s
2+
; Don't verify regalloc, because there is no definition of flags register in flags intrinsics functions (e.g. ifeqrr).
23

34
target datalayout = "E-p:256:256-i256:256:256-S32-a:256:256"
45
target triple = "eravm"

0 commit comments

Comments
 (0)