Skip to content

Commit cf11c11

Browse files
committed
[RISCV] Add a pass to eliminate special copies in order to facilitate shrink-wrap optimization
Some data types that require extension have redundant copy instructions. This pass removes specific copies to help shrink-wrap optimization.
1 parent d013556 commit cf11c11

File tree

8 files changed

+183
-24
lines changed

8 files changed

+183
-24
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ add_llvm_target(RISCVCodeGen
3535
RISCVCodeGenPrepare.cpp
3636
RISCVConstantPoolValue.cpp
3737
RISCVDeadRegisterDefinitions.cpp
38+
RISCVCopyCombine.cpp
3839
RISCVExpandAtomicPseudoInsts.cpp
3940
RISCVExpandPseudoInsts.cpp
4041
RISCVFoldMemOffset.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ void initializeRISCVCodeGenPreparePass(PassRegistry &);
3030

3131
FunctionPass *createRISCVDeadRegisterDefinitionsPass();
3232
void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
33+
FunctionPass *createRISCVCopyCombinePass();
34+
void initializeRISCVCopyCombinePass(PassRegistry &);
3335

3436
FunctionPass *createRISCVIndirectBranchTrackingPass();
3537
void initializeRISCVIndirectBranchTrackingPass(PassRegistry &);
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
//===- RISCVCopyCombine.cpp - Remove special copy for RISC-V --===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This pass attempts a shrink-wrap optimization for special cases, which is
10+
// effective when data types require extension.
11+
//
12+
// After finalize-isel:
13+
// bb0:
14+
// liveins: $x10, $x11
15+
// %1:gpr = COPY $x11 ---- will be delete in this pass
16+
// %0:gpr = COPY $x10
17+
// %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink,
18+
// then delete at regalloc
19+
// BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1
20+
//
21+
// bb1:
22+
// bb2:
23+
// BNE %2:gpr, killed %5:gpr, %bb.2
24+
// ...
25+
// After regalloc
26+
// bb0:
27+
// liveins: $x10, $x11
28+
// renamable $x8 = COPY $x11
29+
// renamable $x11 = ADDI $x0, 57 --- def x11, so COPY can not be sink
30+
// BEQ killed renamable $x10, killed renamable $x11, %bb.4
31+
// PseudoBR %bb.1
32+
//
33+
// bb1:
34+
// bb2:
35+
// BEQ killed renamable $x8, killed renamable $x10, %bb.4
36+
//
37+
// ----->
38+
//
39+
// After this pass:
40+
// bb0:
41+
// liveins: $x10, $x11
42+
// %0:gpr = COPY $x10
43+
// %2:gpr = COPY $x11
44+
// BEQ %0:gpr, killed %3:gpr, %bb.3
45+
// PseudoBR %bb1
46+
//
47+
// bb1:
48+
// bb2:
49+
// BNE %2:gpr, killed %5:gpr, %bb.2
50+
// ...
51+
// After regalloc
52+
// bb0:
53+
// liveins: $x10, $x11
54+
// renamable $x12 = ADDI $x0, 57
55+
// renamable $x8 = COPY $x11
56+
// BEQ killed renamable $x10, killed renamable $x11, %bb.4
57+
// PseudoBR %bb.1
58+
//
59+
// bb1:
60+
// bb2:
61+
// BEQ killed renamable $x8, killed renamable $x10, %bb.4
62+
//===---------------------------------------------------------------------===//
63+
64+
#include "RISCV.h"
65+
#include "RISCVSubtarget.h"
66+
#include "llvm/ADT/Statistic.h"
67+
#include "llvm/CodeGen/MachineFunctionPass.h"
68+
69+
using namespace llvm;
70+
#define DEBUG_TYPE "riscv-copy-combine"
71+
#define RISCV_COPY_COMBINE "RISC-V Copy Combine"
72+
73+
STATISTIC(NumCopyDeleted, "Number of copy deleted");
74+
75+
namespace {
76+
class RISCVCopyCombine : public MachineFunctionPass {
77+
public:
78+
static char ID;
79+
const TargetInstrInfo *TII;
80+
MachineRegisterInfo *MRI;
81+
const TargetRegisterInfo *TRI;
82+
const RISCVSubtarget *ST;
83+
84+
RISCVCopyCombine() : MachineFunctionPass(ID) {}
85+
bool runOnMachineFunction(MachineFunction &MF) override;
86+
MachineFunctionProperties getRequiredProperties() const override {
87+
return MachineFunctionProperties().set(
88+
MachineFunctionProperties::Property::IsSSA);
89+
}
90+
91+
StringRef getPassName() const override { return RISCV_COPY_COMBINE; }
92+
93+
private:
94+
bool optimizeBlock(MachineBasicBlock &MBB);
95+
};
96+
} // end anonymous namespace
97+
98+
char RISCVCopyCombine::ID = 0;
99+
INITIALIZE_PASS(RISCVCopyCombine, DEBUG_TYPE, RISCV_COPY_COMBINE, false, false)
100+
101+
bool RISCVCopyCombine::optimizeBlock(MachineBasicBlock &MBB) {
102+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
103+
SmallVector<MachineOperand, 3> Cond;
104+
if (TII->analyzeBranch(MBB, TBB, FBB, Cond, /*AllowModify*/ false) ||
105+
Cond.empty())
106+
return false;
107+
108+
if (!TBB || Cond.size() != 3)
109+
return false;
110+
111+
MachineInstr *MI = MRI->getVRegDef(Cond[1].getReg());
112+
if (MI->getOpcode() == RISCV::COPY) {
113+
if (MRI->hasOneUse(MI->getOperand(1).getReg()) &&
114+
MI->getOperand(1).getReg().isVirtual() &&
115+
MI->getOperand(0).getReg().isVirtual()) {
116+
MachineInstr *Src = MRI->getVRegDef(MI->getOperand(1).getReg());
117+
if (Src && Src->getOpcode() == RISCV::COPY &&
118+
Src->getParent() == MI->getParent()) {
119+
MRI->replaceRegWith(MI->getOperand(1).getReg(),
120+
Src->getOperand(1).getReg());
121+
LLVM_DEBUG(dbgs() << "Deleting this copy instruction ";
122+
Src->print(dbgs()));
123+
++NumCopyDeleted;
124+
Src->eraseFromParent();
125+
return true;
126+
}
127+
}
128+
}
129+
return false;
130+
}
131+
132+
bool RISCVCopyCombine::runOnMachineFunction(MachineFunction &MF) {
133+
if (skipFunction(MF.getFunction()))
134+
return false;
135+
136+
TII = MF.getSubtarget().getInstrInfo();
137+
;
138+
MRI = &MF.getRegInfo();
139+
TRI = MRI->getTargetRegisterInfo();
140+
141+
bool Changed = false;
142+
for (MachineBasicBlock &MBB : MF)
143+
Changed |= optimizeBlock(MBB);
144+
145+
return Changed;
146+
}
147+
148+
FunctionPass *llvm::createRISCVCopyCombinePass() {
149+
return new RISCVCopyCombine();
150+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
128128
initializeRISCVPostLegalizerCombinerPass(*PR);
129129
initializeKCFIPass(*PR);
130130
initializeRISCVDeadRegisterDefinitionsPass(*PR);
131+
initializeRISCVCopyCombinePass(*PR);
131132
initializeRISCVLateBranchOptPass(*PR);
132133
initializeRISCVMakeCompressibleOptPass(*PR);
133134
initializeRISCVGatherScatterLoweringPass(*PR);
@@ -455,6 +456,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
455456
if (TM->getOptLevel() != CodeGenOptLevel::None &&
456457
EnableRISCVDeadRegisterElimination)
457458
addPass(createRISCVDeadRegisterDefinitionsPass());
459+
458460
return TargetPassConfig::addRegAssignAndRewriteFast();
459461
}
460462

@@ -465,6 +467,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
465467
if (TM->getOptLevel() != CodeGenOptLevel::None &&
466468
EnableRISCVDeadRegisterElimination)
467469
addPass(createRISCVDeadRegisterDefinitionsPass());
470+
468471
return TargetPassConfig::addRegAssignAndRewriteOptimized();
469472
}
470473

@@ -598,6 +601,7 @@ void RISCVPassConfig::addPreEmitPass2() {
598601
}
599602

600603
void RISCVPassConfig::addMachineSSAOptimization() {
604+
addPass(createRISCVCopyCombinePass());
601605
addPass(createRISCVVectorPeepholePass());
602606
addPass(createRISCVFoldMemOffsetPass());
603607

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
; CHECK-NEXT: Lazy Block Frequency Analysis
9797
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
9898
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
99+
; CHECK-NEXT: RISC-V Copy Combine
99100
; CHECK-NEXT: RISC-V Vector Peephole Optimization
100101
; CHECK-NEXT: RISC-V Fold Memory Offset
101102
; CHECK-NEXT: Lazy Machine Block Frequency Analysis

llvm/test/CodeGen/RISCV/overflow-intrinsics.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,33 +1080,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
10801080
; RV32-NEXT: .cfi_offset s5, -28
10811081
; RV32-NEXT: .cfi_offset s6, -32
10821082
; RV32-NEXT: mv s5, a5
1083-
; RV32-NEXT: mv s3, a1
1084-
; RV32-NEXT: andi a1, a5, 1
1085-
; RV32-NEXT: beqz a1, .LBB32_8
1083+
; RV32-NEXT: mv s2, a0
1084+
; RV32-NEXT: andi a0, a5, 1
1085+
; RV32-NEXT: beqz a0, .LBB32_8
10861086
; RV32-NEXT: # %bb.1: # %t
10871087
; RV32-NEXT: mv s0, a4
1088-
; RV32-NEXT: mv s2, a3
1088+
; RV32-NEXT: mv s3, a3
10891089
; RV32-NEXT: mv s1, a2
1090-
; RV32-NEXT: mv s4, a0
1091-
; RV32-NEXT: beq s3, a3, .LBB32_3
1090+
; RV32-NEXT: mv s4, a1
1091+
; RV32-NEXT: beq a1, a3, .LBB32_3
10921092
; RV32-NEXT: # %bb.2: # %t
1093-
; RV32-NEXT: sltu s6, s3, s2
1093+
; RV32-NEXT: sltu s6, s4, s3
10941094
; RV32-NEXT: j .LBB32_4
10951095
; RV32-NEXT: .LBB32_3:
1096-
; RV32-NEXT: sltu s6, s4, s1
1096+
; RV32-NEXT: sltu s6, s2, s1
10971097
; RV32-NEXT: .LBB32_4: # %t
10981098
; RV32-NEXT: mv a0, s6
10991099
; RV32-NEXT: call call
11001100
; RV32-NEXT: beqz s6, .LBB32_8
11011101
; RV32-NEXT: # %bb.5: # %end
1102-
; RV32-NEXT: sltu a1, s4, s1
1102+
; RV32-NEXT: sltu a1, s2, s1
11031103
; RV32-NEXT: mv a0, a1
1104-
; RV32-NEXT: beq s3, s2, .LBB32_7
1104+
; RV32-NEXT: beq s4, s3, .LBB32_7
11051105
; RV32-NEXT: # %bb.6: # %end
1106-
; RV32-NEXT: sltu a0, s3, s2
1106+
; RV32-NEXT: sltu a0, s4, s3
11071107
; RV32-NEXT: .LBB32_7: # %end
1108-
; RV32-NEXT: sub a2, s3, s2
1109-
; RV32-NEXT: sub a3, s4, s1
1108+
; RV32-NEXT: sub a2, s4, s3
1109+
; RV32-NEXT: sub a3, s2, s1
11101110
; RV32-NEXT: sub a2, a2, a1
11111111
; RV32-NEXT: sw a3, 0(s0)
11121112
; RV32-NEXT: sw a2, 4(s0)

llvm/test/CodeGen/RISCV/rv64-double-convert.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
6969
; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
7070
; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
7171
; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
72-
; RV64I-NEXT: mv s0, a0
72+
; RV64I-NEXT: mv s1, a0
7373
; RV64I-NEXT: li a1, -449
7474
; RV64I-NEXT: slli a1, a1, 53
7575
; RV64I-NEXT: call __gedf2
7676
; RV64I-NEXT: mv s2, a0
77-
; RV64I-NEXT: mv a0, s0
77+
; RV64I-NEXT: mv a0, s1
7878
; RV64I-NEXT: call __fixdfti
79-
; RV64I-NEXT: mv s1, a0
79+
; RV64I-NEXT: mv s0, a0
8080
; RV64I-NEXT: mv s3, a1
8181
; RV64I-NEXT: li s5, -1
8282
; RV64I-NEXT: bgez s2, .LBB4_2
@@ -86,23 +86,23 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
8686
; RV64I-NEXT: li a0, 575
8787
; RV64I-NEXT: slli a0, a0, 53
8888
; RV64I-NEXT: addi a1, a0, -1
89-
; RV64I-NEXT: mv a0, s0
89+
; RV64I-NEXT: mv a0, s1
9090
; RV64I-NEXT: call __gtdf2
9191
; RV64I-NEXT: mv s4, a0
9292
; RV64I-NEXT: blez a0, .LBB4_4
9393
; RV64I-NEXT: # %bb.3:
9494
; RV64I-NEXT: srli s3, s5, 1
9595
; RV64I-NEXT: .LBB4_4:
96-
; RV64I-NEXT: mv a0, s0
97-
; RV64I-NEXT: mv a1, s0
96+
; RV64I-NEXT: mv a0, s1
97+
; RV64I-NEXT: mv a1, s1
9898
; RV64I-NEXT: call __unorddf2
9999
; RV64I-NEXT: snez a0, a0
100100
; RV64I-NEXT: slti a1, s2, 0
101101
; RV64I-NEXT: sgtz a2, s4
102102
; RV64I-NEXT: addi a0, a0, -1
103103
; RV64I-NEXT: addi a3, a1, -1
104104
; RV64I-NEXT: and a1, a0, s3
105-
; RV64I-NEXT: and a3, a3, s1
105+
; RV64I-NEXT: and a3, a3, s0
106106
; RV64I-NEXT: neg a2, a2
107107
; RV64I-NEXT: or a2, a2, a3
108108
; RV64I-NEXT: and a0, a0, a2

llvm/test/CodeGen/RISCV/shrinkwrap.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,28 +361,29 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) {
361361
;
362362
; RV64I-SW-LABEL: li_straightline_b:
363363
; RV64I-SW: # %bb.0:
364+
; RV64I-SW-NEXT: li a2, 57
365+
; RV64I-SW-NEXT: beq a0, a2, .LBB3_4
366+
; RV64I-SW-NEXT: # %bb.1: # %do_call
364367
; RV64I-SW-NEXT: addi sp, sp, -16
365368
; RV64I-SW-NEXT: .cfi_def_cfa_offset 16
366369
; RV64I-SW-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
367370
; RV64I-SW-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
368371
; RV64I-SW-NEXT: .cfi_offset ra, -8
369372
; RV64I-SW-NEXT: .cfi_offset s0, -16
370373
; RV64I-SW-NEXT: mv s0, a1
371-
; RV64I-SW-NEXT: li a1, 57
372-
; RV64I-SW-NEXT: beq a0, a1, .LBB3_3
373-
; RV64I-SW-NEXT: # %bb.1: # %do_call
374374
; RV64I-SW-NEXT: call foo
375375
; RV64I-SW-NEXT: li a0, 57
376376
; RV64I-SW-NEXT: beq s0, a0, .LBB3_3
377377
; RV64I-SW-NEXT: # %bb.2: # %do_call2
378378
; RV64I-SW-NEXT: call foo
379-
; RV64I-SW-NEXT: .LBB3_3: # %exit
379+
; RV64I-SW-NEXT: .LBB3_3:
380380
; RV64I-SW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
381381
; RV64I-SW-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
382382
; RV64I-SW-NEXT: .cfi_restore ra
383383
; RV64I-SW-NEXT: .cfi_restore s0
384384
; RV64I-SW-NEXT: addi sp, sp, 16
385385
; RV64I-SW-NEXT: .cfi_def_cfa_offset 0
386+
; RV64I-SW-NEXT: .LBB3_4: # %exit
386387
; RV64I-SW-NEXT: ret
387388
%cmp0 = icmp eq i32 %a, 57
388389
br i1 %cmp0, label %exit, label %do_call

0 commit comments

Comments
 (0)