Skip to content

Commit f8f6682

Browse files
committed
[SOL] Remove expand in order option (#135)
1 parent 70017d4 commit f8f6682

File tree

10 files changed

+15
-409
lines changed

10 files changed

+15
-409
lines changed

llvm/lib/Target/SBF/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ add_llvm_target(SBFCodeGen
2727
SBFMCInstLower.cpp
2828
SBFPreserveDIType.cpp
2929
SBFRegisterInfo.cpp
30-
SBFSelectionDAGInfo.cpp
3130
SBFSubtarget.cpp
3231
SBFTargetMachine.cpp
3332
SBFMIPeephole.cpp

llvm/lib/Target/SBF/SBFISelLowering.cpp

Lines changed: 15 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ using namespace llvm;
2929

3030
#define DEBUG_TYPE "sbf-lower"
3131

32-
static cl::opt<bool> SBFExpandMemcpyInOrder("sbf-expand-memcpy-in-order",
33-
cl::Hidden, cl::init(false),
34-
cl::desc("Expand memcpy into load/store pairs in order"));
35-
3632
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
3733
MachineFunction &MF = DAG.getMachineFunction();
3834
DAG.getContext()->diagnose(
@@ -153,38 +149,20 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,
153149
setMinFunctionAlignment(Align(8));
154150
setPrefFunctionAlignment(Align(8));
155151

156-
if (SBFExpandMemcpyInOrder) {
157-
// LLVM generic code will try to expand memcpy into load/store pairs at this
158-
// stage which is before quite a few IR optimization passes, therefore the
159-
// loads and stores could potentially be moved apart from each other which
160-
// will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
161-
// compilers.
162-
//
163-
// When -sbf-expand-memcpy-in-order specified, we want to defer the expand
164-
// of memcpy to later stage in IR optimization pipeline so those load/store
165-
// pairs won't be touched and could be kept in order. Hence, we set
166-
// MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
167-
// code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
168-
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
169-
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
170-
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
171-
MaxLoadsPerMemcmp = 0;
172-
} else {
173-
// A syscall consumes at least 10 CUs, so we should only invoke it when
174-
// the number of instructions is at least 10.
175-
176-
// Memset translates to stdw or stdxw, so the maximum should be 10.
177-
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10;
178-
// Each store in memcpy follows a load, so the maximum is 5.
179-
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5;
180-
// Each store in memmove follows a load, so the maximum is 5.
181-
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5;
182-
// Memcmp expands to three instructions for each load:
183-
// 1. One load for each pointer being compared.
184-
// 2. One jne for each load.
185-
// The limit here should be three, since 3*3 = 9;
186-
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3;
187-
}
152+
// A syscall consumes at least 10 CUs, so we should only invoke it when
153+
// the number of instructions is at least 10.
154+
155+
// Memset translates to stdw or stdxw, so the maximum should be 10.
156+
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10;
157+
// Each store in memcpy follows a load, so the maximum is 5.
158+
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5;
159+
// Each store in memmove follows a load, so the maximum is 5.
160+
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5;
161+
// Memcmp expands to three instructions for each load:
162+
// 1. One load for each pointer being compared.
163+
// 2. One jne for each load.
164+
// The limit here should be three, since 3*3 = 9;
165+
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3;
188166

189167
// CPU/Feature control
190168
HasAlu32 = STI.getHasAlu32();
@@ -915,8 +893,6 @@ const char *SBFTargetLowering::getTargetNodeName(unsigned Opcode) const {
915893
return "SBFISD::BR_CC";
916894
case SBFISD::Wrapper:
917895
return "SBFISD::Wrapper";
918-
case SBFISD::MEMCPY:
919-
return "SBFISD::MEMCPY";
920896
}
921897
return nullptr;
922898
}
@@ -966,37 +942,6 @@ SBFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
966942
return PromotedReg2;
967943
}
968944

969-
MachineBasicBlock *
970-
SBFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
971-
MachineBasicBlock *BB)
972-
const {
973-
MachineFunction *MF = MI.getParent()->getParent();
974-
MachineRegisterInfo &MRI = MF->getRegInfo();
975-
MachineInstrBuilder MIB(*MF, MI);
976-
unsigned ScratchReg;
977-
978-
// This function does custom insertion during lowering SBFISD::MEMCPY which
979-
// only has two register operands from memcpy semantics, the copy source
980-
// address and the copy destination address.
981-
//
982-
// Because we will expand SBFISD::MEMCPY into load/store pairs, we will need
983-
// a third scratch register to serve as the destination register of load and
984-
// source register of store.
985-
//
986-
// The scratch register here is with the Define | Dead | EarlyClobber flags.
987-
// The EarlyClobber flag has the semantic property that the operand it is
988-
// attached to is clobbered before the rest of the inputs are read. Hence it
989-
// must be unique among the operands to the instruction. The Define flag is
990-
// needed to coerce the machine verifier that an Undef value isn't a problem
991-
// as we anyway is loading memory into it. The Dead flag is needed as the
992-
// value in scratch isn't supposed to be used by any other instruction.
993-
ScratchReg = MRI.createVirtualRegister(&SBF::GPRRegClass);
994-
MIB.addReg(ScratchReg,
995-
RegState::Define | RegState::Dead | RegState::EarlyClobber);
996-
997-
return BB;
998-
}
999-
1000945
MachineBasicBlock *
1001946
SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1002947
MachineBasicBlock *BB) const {
@@ -1008,7 +953,6 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1008953
Opc == SBF::Select_32 ||
1009954
Opc == SBF::Select_32_64);
1010955

1011-
bool isMemcpyOp = Opc == SBF::MEMCPY;
1012956
bool isAtomicFence = Opc == SBF::ATOMIC_FENCE;
1013957

1014958
#ifndef NDEBUG
@@ -1018,12 +962,10 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1018962
Opc == SBF::Select_Ri_32_64);
1019963

1020964

1021-
assert((isSelectRROp || isSelectRIOp || isMemcpyOp || isAtomicFence) &&
965+
assert((isSelectRROp || isSelectRIOp || isAtomicFence) &&
1022966
"Unexpected instr type to insert");
1023967
#endif
1024968

1025-
if (isMemcpyOp)
1026-
return EmitInstrWithCustomInserterMemcpy(MI, BB);
1027969

1028970
if (isAtomicFence) {
1029971
// this is currently a nop

llvm/lib/Target/SBF/SBFISelLowering.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ enum NodeType : unsigned {
2828
SELECT_CC,
2929
BR_CC,
3030
Wrapper,
31-
MEMCPY,
3231
};
3332
}
3433

@@ -159,10 +158,6 @@ class SBFTargetLowering : public TargetLowering {
159158
unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg,
160159
bool isSigned) const;
161160

162-
MachineBasicBlock * EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
163-
MachineBasicBlock *BB)
164-
const;
165-
166161
};
167162
}
168163

llvm/lib/Target/SBF/SBFInstrInfo.cpp

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -51,95 +51,6 @@ void SBFInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
5151
llvm_unreachable("Impossible reg-to-reg copy");
5252
}
5353

54-
void SBFInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
55-
Register DstReg = MI->getOperand(0).getReg();
56-
Register SrcReg = MI->getOperand(1).getReg();
57-
uint64_t CopyLen = MI->getOperand(2).getImm();
58-
uint64_t Alignment = MI->getOperand(3).getImm();
59-
Register ScratchReg = MI->getOperand(4).getReg();
60-
MachineBasicBlock *BB = MI->getParent();
61-
DebugLoc dl = MI->getDebugLoc();
62-
unsigned LdOpc, StOpc;
63-
64-
#define MEM_SWITCH(X) \
65-
LdOpc = NewMemEncoding ? SBF::LD##X##_V2 : SBF::LD##X##_V1; \
66-
StOpc = NewMemEncoding ? SBF::ST##X##_V2 : SBF::ST##X##_V1;
67-
68-
unsigned BytesPerOp = std::min(static_cast<unsigned>(Alignment), 8u);
69-
switch (Alignment) {
70-
case 1:
71-
MEM_SWITCH(B)
72-
break;
73-
case 2:
74-
MEM_SWITCH(H)
75-
break;
76-
case 4:
77-
MEM_SWITCH(W)
78-
break;
79-
case 8:
80-
case 16:
81-
MEM_SWITCH(D)
82-
break;
83-
default:
84-
llvm_unreachable("unsupported memcpy alignment");
85-
}
86-
87-
unsigned IterationNum = (CopyLen >> Log2_64(BytesPerOp));
88-
for (unsigned I = 0; I < IterationNum; ++I) {
89-
BuildMI(*BB, MI, dl, get(LdOpc))
90-
.addReg(ScratchReg, RegState::Define)
91-
.addReg(SrcReg)
92-
.addImm(I * BytesPerOp);
93-
BuildMI(*BB, MI, dl, get(StOpc))
94-
.addReg(ScratchReg, RegState::Kill)
95-
.addReg(DstReg)
96-
.addImm(I * BytesPerOp);
97-
}
98-
99-
unsigned BytesLeft = CopyLen - IterationNum * BytesPerOp;
100-
unsigned Offset;
101-
if (BytesLeft == 0) {
102-
BB->erase(MI);
103-
return;
104-
}
105-
106-
if (BytesLeft < 2) {
107-
Offset = CopyLen - 1;
108-
MEM_SWITCH(B)
109-
} else if (BytesLeft <= 2) {
110-
Offset = CopyLen - 2;
111-
MEM_SWITCH(H)
112-
} else if (BytesLeft <= 4) {
113-
Offset = CopyLen - 4;
114-
MEM_SWITCH(W)
115-
} else if (BytesLeft <= 8) {
116-
Offset = CopyLen - 8;
117-
MEM_SWITCH(D)
118-
} else {
119-
llvm_unreachable("There cannot be more than 8 bytes left");
120-
}
121-
122-
BuildMI(*BB, MI, dl, get(LdOpc))
123-
.addReg(ScratchReg, RegState::Define)
124-
.addReg(SrcReg)
125-
.addImm(Offset);
126-
BuildMI(*BB, MI, dl, get(StOpc))
127-
.addReg(ScratchReg, RegState::Kill)
128-
.addReg(DstReg)
129-
.addImm(Offset);
130-
131-
BB->erase(MI);
132-
}
133-
134-
bool SBFInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
135-
if (MI.getOpcode() == SBF::MEMCPY) {
136-
expandMEMCPY(MI);
137-
return true;
138-
}
139-
140-
return false;
141-
}
142-
14354
void SBFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
14455
MachineBasicBlock::iterator I,
14556
Register SrcReg, bool IsKill, int FI,

llvm/lib/Target/SBF/SBFInstrInfo.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ class SBFInstrInfo : public SBFGenInstrInfo {
3333
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
3434
bool KillSrc) const override;
3535

36-
bool expandPostRAPseudo(MachineInstr &MI) const override;
37-
3836
void storeRegToStackSlot(MachineBasicBlock &MBB,
3937
MachineBasicBlock::iterator MBBI, Register SrcReg,
4038
bool isKill, int FrameIndex,
@@ -61,7 +59,6 @@ class SBFInstrInfo : public SBFGenInstrInfo {
6159
void initializeTargetFeatures(bool HasExplicitSext, bool NewMemEncoding);
6260

6361
private:
64-
void expandMEMCPY(MachineBasicBlock::iterator) const;
6562
bool HasExplicitSignExt;
6663
bool NewMemEncoding;
6764
};

llvm/lib/Target/SBF/SBFInstrInfo.td

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,6 @@ def SDT_SBFBrCC : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
2828
SDTCisVT<3, OtherVT>]>;
2929
def SDT_SBFWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
3030
SDTCisPtrTy<0>]>;
31-
def SDT_SBFMEMCPY : SDTypeProfile<0, 4, [SDTCisVT<0, i64>,
32-
SDTCisVT<1, i64>,
33-
SDTCisVT<2, i64>,
34-
SDTCisVT<3, i64>]>;
3531

3632
def SBFcall : SDNode<"SBFISD::CALL", SDT_SBFCall,
3733
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -47,9 +43,6 @@ def SBFbrcc : SDNode<"SBFISD::BR_CC", SDT_SBFBrCC,
4743

4844
def SBFselectcc : SDNode<"SBFISD::SELECT_CC", SDT_SBFSelectCC, [SDNPInGlue]>;
4945
def SBFWrapper : SDNode<"SBFISD::Wrapper", SDT_SBFWrapper>;
50-
def SBFmemcpy : SDNode<"SBFISD::MEMCPY", SDT_SBFMEMCPY,
51-
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
52-
SDNPMayStore, SDNPMayLoad]>;
5346
def SBFIsLittleEndian : Predicate<"CurDAG->getDataLayout().isLittleEndian()">;
5447
def SBFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">;
5548
def SBFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
@@ -1463,11 +1456,3 @@ let Predicates = [SBFHasALU32, SBFNewMemEncoding] in {
14631456
def : Pat<(i64 (extloadi32 ADDRri:$src)),
14641457
(SUBREG_TO_REG (i64 0), (LDW32_V2 ADDRri:$src), sub_32)>;
14651458
}
1466-
1467-
let usesCustomInserter = 1, isCodeGenOnly = 1 in {
1468-
def MEMCPY : Pseudo<
1469-
(outs),
1470-
(ins GPR:$dst, GPR:$src, i64imm:$len, i64imm:$align, variable_ops),
1471-
"#memcpy dst: $dst, src: $src, len: $len, align: $align",
1472-
[(SBFmemcpy GPR:$dst, GPR:$src, imm:$len, imm:$align)]>;
1473-
}

llvm/lib/Target/SBF/SBFSelectionDAGInfo.cpp

Lines changed: 0 additions & 48 deletions
This file was deleted.

llvm/lib/Target/SBF/SBFSelectionDAGInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,6 @@ namespace llvm {
2020
class SBFSelectionDAGInfo : public SelectionDAGTargetInfo {
2121
public:
2222
SBFSelectionDAGInfo() {}
23-
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
24-
SDValue Chain, SDValue Dst, SDValue Src,
25-
SDValue Size, Align Alignment,
26-
bool isVolatile, bool AlwaysInline,
27-
MachinePointerInfo DstPtrInfo,
28-
MachinePointerInfo SrcPtrInfo) const override;
2923

3024
unsigned getCommonMaxStoresPerMemFunc() const {
3125
return 4;

0 commit comments

Comments
 (0)