Skip to content

Commit 530603e

Browse files
authored
[ATFE][ARM] remove -enable-inline-memcpy-ld-st patch (#620)
Downstream patch #406 caused build failures due to an upstream change to the ARM backend. This patch removes the support enabled by this downstream patch as I no longer think it's worth maintaining for performance improvements. Further work should be done upstream to regain any lost performance. Removes downstream issue: #405
2 parents 604c482 + 82ebfa7 commit 530603e

File tree

5 files changed

+0
-295
lines changed

5 files changed

+0
-295
lines changed

arm-software/embedded/Omax.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,5 @@
88
-mllvm -enable-dfa-jump-thread \
99
-mllvm -enable-loop-flatten \
1010
-mllvm -enable-unroll-and-jam \
11-
-mllvm -enable-inline-memcpy-ld-st \
1211
-mllvm -enable-loop-versioning-licm \
1312
-mllvm -aarch64-force-unroll-threshold=12

arm-software/embedded/OmaxLTO.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,4 @@
1212
-Wl,-plugin-opt=-enable-dfa-jump-thread \
1313
-Wl,-plugin-opt=-enable-loop-flatten \
1414
-Wl,-plugin-opt=-enable-unroll-and-jam \
15-
-Wl,-plugin-opt=-enable-inline-memcpy-ld-st \
1615
-Wl,-plugin-opt=-enable-loop-versioning-licm

llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp

Lines changed: 0 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@ static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
3434
clEnumValN(TPLoop::Allow, "allow",
3535
"Allow (may be subject to certain conditions) "
3636
"conversion of memcpy to TP loop.")));
37-
// #405
38-
static cl::opt<bool> EnableInlineMemcpyAsLdSt(
39-
"enable-inline-memcpy-ld-st", cl::init(false), cl::Hidden,
40-
cl::desc("Inline memcpy with LD/ST instructions."));
4137

4238
ARMSelectionDAGInfo::ARMSelectionDAGInfo()
4339
: SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {}
@@ -226,119 +222,6 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
226222
return CallResult.second;
227223
}
228224

229-
// #405
230-
SDValue ARMSelectionDAGInfo::EmitMemcpyAsLdSt(
231-
SelectionDAG &DAG, SDLoc dl, const ARMSubtarget &Subtarget, SDValue Chain,
232-
SDValue Dst, SDValue Src, uint64_t SizeVal, bool isVolatile,
233-
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
234-
// Do repeated batches of 4-byte loads and stores.
235-
unsigned BytesLeft = SizeVal & 3;
236-
unsigned NumMemOps = SizeVal >> 2;
237-
unsigned EmittedNumMemOps = 0;
238-
EVT VT = MVT::i32;
239-
unsigned VTSize = 4;
240-
unsigned I = 0;
241-
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
242-
const unsigned MaxLoads = Subtarget.isThumb1Only() ? 4 : 6;
243-
SDValue TFOps[6];
244-
SDValue Loads[6];
245-
uint64_t SrcOff = 0, DstOff = 0;
246-
247-
MachineMemOperand::Flags MOFlags = MachineMemOperand::Flags::MONone;
248-
if (isVolatile)
249-
MOFlags = MachineMemOperand::Flags::MOVolatile;
250-
MachineMemOperand::Flags LoadMOFlags = MOFlags;
251-
if (SrcPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
252-
DAG.getDataLayout()))
253-
LoadMOFlags |= MachineMemOperand::Flags::MODereferenceable;
254-
if (auto *V = SrcPtrInfo.V.dyn_cast<const Value *>())
255-
if (isa<GlobalVariable>(V) && cast<GlobalVariable>(V)->isConstant())
256-
LoadMOFlags |= MachineMemOperand::Flags::MOInvariant;
257-
MachineMemOperand::Flags StoreMOFlags = MOFlags;
258-
if (DstPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
259-
DAG.getDataLayout()))
260-
StoreMOFlags |= MachineMemOperand::Flags::MODereferenceable;
261-
262-
// Emit up to MaxLoads loads, then a TokenFactor barrier, then the
263-
// same number of stores. The loads and stores may get combined into
264-
// ldm/stm later on.
265-
while (EmittedNumMemOps < NumMemOps) {
266-
for (I = 0; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
267-
Loads[I] = DAG.getLoad(VT, dl, Chain,
268-
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
269-
DAG.getConstant(SrcOff, dl, MVT::i32)),
270-
SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
271-
LoadMOFlags);
272-
TFOps[I] = Loads[I].getValue(1);
273-
SrcOff += VTSize;
274-
}
275-
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, I));
276-
277-
for (I = 0; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
278-
TFOps[I] = DAG.getStore(
279-
Chain, dl, Loads[I],
280-
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
281-
DAG.getConstant(DstOff, dl, MVT::i32)),
282-
DstPtrInfo.getWithOffset(DstOff), MaybeAlign(0), StoreMOFlags);
283-
DstOff += VTSize;
284-
}
285-
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, I));
286-
287-
EmittedNumMemOps += I;
288-
}
289-
290-
if (BytesLeft == 0)
291-
return Chain;
292-
293-
// Issue loads / stores for the trailing (1 - 3) bytes.
294-
unsigned BytesLeftSave = BytesLeft;
295-
I = 0;
296-
while (BytesLeft) {
297-
if (BytesLeft >= 2) {
298-
VT = MVT::i16;
299-
VTSize = 2;
300-
} else {
301-
VT = MVT::i8;
302-
VTSize = 1;
303-
}
304-
305-
Loads[I] = DAG.getLoad(VT, dl, Chain,
306-
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
307-
DAG.getConstant(SrcOff, dl, MVT::i32)),
308-
SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
309-
LoadMOFlags);
310-
311-
TFOps[I] = Loads[I].getValue(1);
312-
++I;
313-
SrcOff += VTSize;
314-
BytesLeft -= VTSize;
315-
}
316-
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, I));
317-
318-
I = 0;
319-
BytesLeft = BytesLeftSave;
320-
while (BytesLeft) {
321-
if (BytesLeft >= 2) {
322-
VT = MVT::i16;
323-
VTSize = 2;
324-
} else {
325-
VT = MVT::i8;
326-
VTSize = 1;
327-
}
328-
329-
TFOps[I] = DAG.getStore(Chain, dl, Loads[I],
330-
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
331-
DAG.getConstant(DstOff, dl, MVT::i32)),
332-
DstPtrInfo.getWithOffset(DstOff), MaybeAlign(0),
333-
StoreMOFlags);
334-
++I;
335-
DstOff += VTSize;
336-
BytesLeft -= VTSize;
337-
}
338-
339-
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, I));
340-
}
341-
342225
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
343226
const SelectionDAG &DAG,
344227
ConstantSDNode *ConstantSize,
@@ -393,11 +276,6 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
393276
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
394277
Alignment.value(), RTLIB::MEMCPY);
395278

396-
// #405
397-
if (EnableInlineMemcpyAsLdSt && Subtarget.isMClass() && Subtarget.hasV7Ops())
398-
return EmitMemcpyAsLdSt(DAG, dl, Subtarget, Chain, Dst, Src, SizeVal,
399-
isVolatile, DstPtrInfo, SrcPtrInfo);
400-
401279
unsigned BytesLeft = SizeVal & 3;
402280
unsigned NumMemOps = SizeVal >> 2;
403281
unsigned EmittedNumMemOps = 0;

llvm/lib/Target/ARM/ARMSelectionDAGInfo.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,6 @@ class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo {
108108
MachinePointerInfo DstPtrInfo,
109109
MachinePointerInfo SrcPtrInfo) const override;
110110

111-
// #405
112-
SDValue EmitMemcpyAsLdSt(SelectionDAG &DAG, SDLoc dl,
113-
const ARMSubtarget &Subtarget, SDValue Chain,
114-
SDValue Dst, SDValue Src, uint64_t SizeVal,
115-
bool isVolatile, MachinePointerInfo DstPtrInfo,
116-
MachinePointerInfo SrcPtrInfo) const;
117-
118111
SDValue
119112
EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
120113
SDValue Dst, SDValue Src, SDValue Size,

llvm/test/CodeGen/ARM/memcpy-v7m.ll

Lines changed: 0 additions & 164 deletions
This file was deleted.

0 commit comments

Comments
 (0)