@@ -34,10 +34,6 @@ static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
3434 clEnumValN(TPLoop::Allow, " allow" ,
3535 " Allow (may be subject to certain conditions) "
3636 " conversion of memcpy to TP loop." )));
37- // #405
38- static cl::opt<bool > EnableInlineMemcpyAsLdSt (
39- " enable-inline-memcpy-ld-st" , cl::init(false ), cl::Hidden,
40- cl::desc(" Inline memcpy with LD/ST instructions." ));
4137
4238ARMSelectionDAGInfo::ARMSelectionDAGInfo ()
4339 : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {}
@@ -226,119 +222,6 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
226222 return CallResult.second ;
227223}
228224
229- // #405
230- SDValue ARMSelectionDAGInfo::EmitMemcpyAsLdSt (
231- SelectionDAG &DAG, SDLoc dl, const ARMSubtarget &Subtarget, SDValue Chain,
232- SDValue Dst, SDValue Src, uint64_t SizeVal, bool isVolatile,
233- MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
234- // Do repeated batches of 4-byte loads and stores.
235- unsigned BytesLeft = SizeVal & 3 ;
236- unsigned NumMemOps = SizeVal >> 2 ;
237- unsigned EmittedNumMemOps = 0 ;
238- EVT VT = MVT::i32 ;
239- unsigned VTSize = 4 ;
240- unsigned I = 0 ;
241- // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
242- const unsigned MaxLoads = Subtarget.isThumb1Only () ? 4 : 6 ;
243- SDValue TFOps[6 ];
244- SDValue Loads[6 ];
245- uint64_t SrcOff = 0 , DstOff = 0 ;
246-
247- MachineMemOperand::Flags MOFlags = MachineMemOperand::Flags::MONone;
248- if (isVolatile)
249- MOFlags = MachineMemOperand::Flags::MOVolatile;
250- MachineMemOperand::Flags LoadMOFlags = MOFlags;
251- if (SrcPtrInfo.isDereferenceable (SizeVal, *DAG.getContext (),
252- DAG.getDataLayout ()))
253- LoadMOFlags |= MachineMemOperand::Flags::MODereferenceable;
254- if (auto *V = SrcPtrInfo.V .dyn_cast <const Value *>())
255- if (isa<GlobalVariable>(V) && cast<GlobalVariable>(V)->isConstant ())
256- LoadMOFlags |= MachineMemOperand::Flags::MOInvariant;
257- MachineMemOperand::Flags StoreMOFlags = MOFlags;
258- if (DstPtrInfo.isDereferenceable (SizeVal, *DAG.getContext (),
259- DAG.getDataLayout ()))
260- StoreMOFlags |= MachineMemOperand::Flags::MODereferenceable;
261-
262- // Emit up to MaxLoads loads, then a TokenFactor barrier, then the
263- // same number of stores. The loads and stores may get combined into
264- // ldm/stm later on.
265- while (EmittedNumMemOps < NumMemOps) {
266- for (I = 0 ; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
267- Loads[I] = DAG.getLoad (VT, dl, Chain,
268- DAG.getNode (ISD::ADD, dl, MVT::i32 , Src,
269- DAG.getConstant (SrcOff, dl, MVT::i32 )),
270- SrcPtrInfo.getWithOffset (SrcOff), MaybeAlign (0 ),
271- LoadMOFlags);
272- TFOps[I] = Loads[I].getValue (1 );
273- SrcOff += VTSize;
274- }
275- Chain = DAG.getNode (ISD::TokenFactor, dl, MVT::Other, ArrayRef (TFOps, I));
276-
277- for (I = 0 ; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
278- TFOps[I] = DAG.getStore (
279- Chain, dl, Loads[I],
280- DAG.getNode (ISD::ADD, dl, MVT::i32 , Dst,
281- DAG.getConstant (DstOff, dl, MVT::i32 )),
282- DstPtrInfo.getWithOffset (DstOff), MaybeAlign (0 ), StoreMOFlags);
283- DstOff += VTSize;
284- }
285- Chain = DAG.getNode (ISD::TokenFactor, dl, MVT::Other, ArrayRef (TFOps, I));
286-
287- EmittedNumMemOps += I;
288- }
289-
290- if (BytesLeft == 0 )
291- return Chain;
292-
293- // Issue loads / stores for the trailing (1 - 3) bytes.
294- unsigned BytesLeftSave = BytesLeft;
295- I = 0 ;
296- while (BytesLeft) {
297- if (BytesLeft >= 2 ) {
298- VT = MVT::i16 ;
299- VTSize = 2 ;
300- } else {
301- VT = MVT::i8 ;
302- VTSize = 1 ;
303- }
304-
305- Loads[I] = DAG.getLoad (VT, dl, Chain,
306- DAG.getNode (ISD::ADD, dl, MVT::i32 , Src,
307- DAG.getConstant (SrcOff, dl, MVT::i32 )),
308- SrcPtrInfo.getWithOffset (SrcOff), MaybeAlign (0 ),
309- LoadMOFlags);
310-
311- TFOps[I] = Loads[I].getValue (1 );
312- ++I;
313- SrcOff += VTSize;
314- BytesLeft -= VTSize;
315- }
316- Chain = DAG.getNode (ISD::TokenFactor, dl, MVT::Other, ArrayRef (TFOps, I));
317-
318- I = 0 ;
319- BytesLeft = BytesLeftSave;
320- while (BytesLeft) {
321- if (BytesLeft >= 2 ) {
322- VT = MVT::i16 ;
323- VTSize = 2 ;
324- } else {
325- VT = MVT::i8 ;
326- VTSize = 1 ;
327- }
328-
329- TFOps[I] = DAG.getStore (Chain, dl, Loads[I],
330- DAG.getNode (ISD::ADD, dl, MVT::i32 , Dst,
331- DAG.getConstant (DstOff, dl, MVT::i32 )),
332- DstPtrInfo.getWithOffset (DstOff), MaybeAlign (0 ),
333- StoreMOFlags);
334- ++I;
335- DstOff += VTSize;
336- BytesLeft -= VTSize;
337- }
338-
339- return DAG.getNode (ISD::TokenFactor, dl, MVT::Other, ArrayRef (TFOps, I));
340- }
341-
342225static bool shouldGenerateInlineTPLoop (const ARMSubtarget &Subtarget,
343226 const SelectionDAG &DAG,
344227 ConstantSDNode *ConstantSize,
@@ -393,11 +276,6 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
393276 return EmitSpecializedLibcall (DAG, dl, Chain, Dst, Src, Size,
394277 Alignment.value (), RTLIB::MEMCPY);
395278
396- // #405
397- if (EnableInlineMemcpyAsLdSt && Subtarget.isMClass () && Subtarget.hasV7Ops ())
398- return EmitMemcpyAsLdSt (DAG, dl, Subtarget, Chain, Dst, Src, SizeVal,
399- isVolatile, DstPtrInfo, SrcPtrInfo);
400-
401279 unsigned BytesLeft = SizeVal & 3 ;
402280 unsigned NumMemOps = SizeVal >> 2 ;
403281 unsigned EmittedNumMemOps = 0 ;
0 commit comments