@@ -29,10 +29,6 @@ using namespace llvm;
29
29
30
30
#define DEBUG_TYPE " sbf-lower"
31
31
32
- static cl::opt<bool > SBFExpandMemcpyInOrder (" sbf-expand-memcpy-in-order" ,
33
- cl::Hidden, cl::init(false ),
34
- cl::desc(" Expand memcpy into load/store pairs in order" ));
35
-
36
32
static void fail (const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
37
33
MachineFunction &MF = DAG.getMachineFunction ();
38
34
DAG.getContext ()->diagnose (
@@ -153,38 +149,20 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,
153
149
setMinFunctionAlignment (Align (8 ));
154
150
setPrefFunctionAlignment (Align (8 ));
155
151
156
- if (SBFExpandMemcpyInOrder) {
157
- // LLVM generic code will try to expand memcpy into load/store pairs at this
158
- // stage which is before quite a few IR optimization passes, therefore the
159
- // loads and stores could potentially be moved apart from each other which
160
- // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
161
- // compilers.
162
- //
163
- // When -sbf-expand-memcpy-in-order specified, we want to defer the expand
164
- // of memcpy to later stage in IR optimization pipeline so those load/store
165
- // pairs won't be touched and could be kept in order. Hence, we set
166
- // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
167
- // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
168
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0 ;
169
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0 ;
170
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0 ;
171
- MaxLoadsPerMemcmp = 0 ;
172
- } else {
173
- // A syscall consumes at least 10 CUs, so we should only invoke it when
174
- // the number of instructions is at least 10.
175
-
176
- // Memset translates to stdw or stdxw, so the maximum should be 10.
177
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10 ;
178
- // Each store in memcpy follows a load, so the maximum is 5.
179
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5 ;
180
- // Each store in memmove follows a load, so the maximum is 5.
181
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5 ;
182
- // Memcmp expands to three instructions for each load:
183
- // 1. One load for each pointer being compared.
184
- // 2. One jne for each load.
185
- // The limit here should be three, since 3*3 = 9;
186
- MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3 ;
187
- }
152
+ // A syscall consumes at least 10 CUs, so we should only invoke it when
153
+ // the number of instructions is at least 10.
154
+
155
+ // Memset translates to stdw or stdxw, so the maximum should be 10.
156
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10 ;
157
+ // Each store in memcpy follows a load, so the maximum is 5.
158
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5 ;
159
+ // Each store in memmove follows a load, so the maximum is 5.
160
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5 ;
161
+ // Memcmp expands to three instructions for each load:
162
+ // 1. One load for each pointer being compared.
163
+ // 2. One jne for each load.
164
+ // The limit here should be three, since 3*3 = 9;
165
+ MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3 ;
188
166
189
167
// CPU/Feature control
190
168
HasAlu32 = STI.getHasAlu32 ();
@@ -915,8 +893,6 @@ const char *SBFTargetLowering::getTargetNodeName(unsigned Opcode) const {
915
893
return " SBFISD::BR_CC" ;
916
894
case SBFISD::Wrapper:
917
895
return " SBFISD::Wrapper" ;
918
- case SBFISD::MEMCPY:
919
- return " SBFISD::MEMCPY" ;
920
896
}
921
897
return nullptr ;
922
898
}
@@ -966,37 +942,6 @@ SBFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
966
942
return PromotedReg2;
967
943
}
968
944
969
- MachineBasicBlock *
970
- SBFTargetLowering::EmitInstrWithCustomInserterMemcpy (MachineInstr &MI,
971
- MachineBasicBlock *BB)
972
- const {
973
- MachineFunction *MF = MI.getParent ()->getParent ();
974
- MachineRegisterInfo &MRI = MF->getRegInfo ();
975
- MachineInstrBuilder MIB (*MF, MI);
976
- unsigned ScratchReg;
977
-
978
- // This function does custom insertion during lowering SBFISD::MEMCPY which
979
- // only has two register operands from memcpy semantics, the copy source
980
- // address and the copy destination address.
981
- //
982
- // Because we will expand SBFISD::MEMCPY into load/store pairs, we will need
983
- // a third scratch register to serve as the destination register of load and
984
- // source register of store.
985
- //
986
- // The scratch register here is with the Define | Dead | EarlyClobber flags.
987
- // The EarlyClobber flag has the semantic property that the operand it is
988
- // attached to is clobbered before the rest of the inputs are read. Hence it
989
- // must be unique among the operands to the instruction. The Define flag is
990
- // needed to coerce the machine verifier that an Undef value isn't a problem
991
- // as we anyway is loading memory into it. The Dead flag is needed as the
992
- // value in scratch isn't supposed to be used by any other instruction.
993
- ScratchReg = MRI.createVirtualRegister (&SBF::GPRRegClass);
994
- MIB.addReg (ScratchReg,
995
- RegState::Define | RegState::Dead | RegState::EarlyClobber);
996
-
997
- return BB;
998
- }
999
-
1000
945
MachineBasicBlock *
1001
946
SBFTargetLowering::EmitInstrWithCustomInserter (MachineInstr &MI,
1002
947
MachineBasicBlock *BB) const {
@@ -1008,7 +953,6 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1008
953
Opc == SBF::Select_32 ||
1009
954
Opc == SBF::Select_32_64);
1010
955
1011
- bool isMemcpyOp = Opc == SBF::MEMCPY;
1012
956
bool isAtomicFence = Opc == SBF::ATOMIC_FENCE;
1013
957
1014
958
#ifndef NDEBUG
@@ -1018,12 +962,10 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1018
962
Opc == SBF::Select_Ri_32_64);
1019
963
1020
964
1021
- assert ((isSelectRROp || isSelectRIOp || isMemcpyOp || isAtomicFence) &&
965
+ assert ((isSelectRROp || isSelectRIOp || isAtomicFence) &&
1022
966
" Unexpected instr type to insert" );
1023
967
#endif
1024
968
1025
- if (isMemcpyOp)
1026
- return EmitInstrWithCustomInserterMemcpy (MI, BB);
1027
969
1028
970
if (isAtomicFence) {
1029
971
// this is currently a nop
0 commit comments