1- From 8aa999e52ef03be7d8c05f4bd151d7df60d17e8f Mon Sep 17 00:00:00 2001
2- From: Scott Douglass <scott.douglass @arm.com>
3- Date: Tue, 13 Aug 2024 10:55:51 +0100
4- Subject: [PATCH] [ARM][CodeGen]Prefer MEMCPY LDM/STM inlining for v7-m
1+ From 411bab1ff439215c060127b6a5188ed0c9ed5d65 Mon Sep 17 00:00:00 2001
2+ From: Vrukesh V Panse <vrukesh.panse @arm.com>
3+ Date: Thu, 2 Jan 2025 10:29:56 +0000
4+ Subject: [NFC]: Update the patch file with upstream changes of SelectionDAG
55
6- This patch changes the behaviour of memcpy inlining on v7m targets.
7- The old behaviour was to inline memcpys with LDM/STM instructions.
8- Alternatively, using LD/ST instructions for memcpy inlining allowed
9- for performance gains of 1% to 2% on selected benchmarks.
10-
11- Co-authored-by: Nashe Mncube <
[email protected] >
126---
137 llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 120 +++++++++++++++
148 llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 6 +
1711 create mode 100644 llvm/test/CodeGen/ARM/memcpy-v7m.ll
1812
1913diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
20- index e7ea10ff971a..09ed4ab219a1 100644
14+ index a39487c318f8..ca8de16e66c4 100644
2115--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
2216+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
2317@@ -30,6 +30,10 @@ cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
@@ -28,10 +22,10 @@ index e7ea10ff971a..09ed4ab219a1 100644
2822+ "enable-inline-memcpy-ld-st", cl::init(false), cl::Hidden,
2923+ cl::desc("Inline memcpy with LD/ST instructions."));
3024+
31- // Emit, if possible, a specialized version of the given Libcall. Typically this
32- // means selecting the appropriately aligned version, but we also convert memset
33- // of 0 into memclr.
34- @@ -136 ,6 +140 ,118 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
25+ bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
26+ return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
27+ Opcode <= ARMISD::LAST_MEMORY_OPCODE;
28+ @@ -141 ,6 +145 ,118 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
3529 return CallResult.second;
3630 }
3731
@@ -150,7 +144,7 @@ index e7ea10ff971a..09ed4ab219a1 100644
150144 static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
151145 const SelectionDAG &DAG,
152146 ConstantSDNode *ConstantSize,
153- @@ -190 ,6 +306 ,10 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
147+ @@ -195 ,6 +311 ,10 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
154148 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
155149 Alignment.value(), RTLIB::MEMCPY);
156150
@@ -162,10 +156,10 @@ index e7ea10ff971a..09ed4ab219a1 100644
162156 unsigned NumMemOps = SizeVal >> 2;
163157 unsigned EmittedNumMemOps = 0;
164158diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
165- index 275b1c0f8dc0..6ff422c15b12 100644
159+ index d68150e66567..bdc9bbd697a3 100644
166160--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
167161+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
168- @@ -44 ,6 +44 ,12 @@ public:
162+ @@ -46 ,6 +46 ,12 @@ public:
169163 MachinePointerInfo DstPtrInfo,
170164 MachinePointerInfo SrcPtrInfo) const override;
171165
@@ -180,7 +174,7 @@ index 275b1c0f8dc0..6ff422c15b12 100644
180174 SDValue Dst, SDValue Src, SDValue Size,
181175diff --git a/llvm/test/CodeGen/ARM/memcpy-v7m.ll b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
182176new file mode 100644
183- index 000000000000..12f74c04087e
177+ index 000000000000..e549958494dc
184178--- /dev/null
185179+++ b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
186180@@ -0,0 +1,161 @@
0 commit comments