Skip to content

Commit 8bc0d1c

Browse files
committed
Disable MEMCPY LDM/STM inlining for Cortex m7
Performance improvements have been seen of around 1 to 2% on selected benchmarks when LDM/STM inlining is disabled for Cortex m7. This adds a patch file that enables this optimisation. Change-Id: I5d2cdcfc76a24c7cfbe63a6ca5fe9ea00e1d1fda
1 parent 4fbf466 commit 8bc0d1c

File tree

1 file changed

+399
-0
lines changed

1 file changed

+399
-0
lines changed
Lines changed: 399 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,399 @@
1+
From 61af6af10d10a08b81d3924fa5b35bfb548b2a05 Mon Sep 17 00:00:00 2001
2+
From: nasmnc01 <[email protected]>
3+
Date: Tue, 13 Aug 2024 10:55:51 +0100
4+
Subject: [PATCH] [ARM][CodeGen] Disable MEMCPY LDM/STM inlining for v7-m
5+
6+
This patch disables the expansion of MEMCPY to LDM/STM
7+
on v7-m targets. This is due to a slowdown caused
8+
by this inlining method.
9+
10+
Change-Id: I91095299c2c67670a16849d08540bdbc07a95adc
11+
---
12+
llvm/lib/Target/ARM/ARMFeatures.td | 5 +
13+
llvm/lib/Target/ARM/ARMProcessors.td | 2 +-
14+
llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 121 ++++++++++++++
15+
llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 6 +
16+
llvm/lib/Target/ARM/ARMSubtarget.h | 2 +
17+
llvm/test/CodeGen/ARM/memcpy-v7m.ll | 165 ++++++++++++++++++++
18+
6 files changed, 300 insertions(+), 1 deletion(-)
19+
create mode 100644 llvm/test/CodeGen/ARM/memcpy-v7m.ll
20+
21+
diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td
22+
index bb437698296c..f7fa00aba424 100644
23+
--- a/llvm/lib/Target/ARM/ARMFeatures.td
24+
+++ b/llvm/lib/Target/ARM/ARMFeatures.td
25+
@@ -510,6 +510,11 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
26+
"DisablePostRAScheduler", "true",
27+
"Don't schedule again after register allocation">;
28+
29+
+def FeatureUseInlineMemcpyAsLdSt :
30+
+ SubtargetFeature<"use-inline-memcpy-ldst", "UseInlineMemcpyAsLdSt",
31+
+ "true", "Use memcpy inlining as LD/ST instructions">;
32+
+
33+
+
34+
// Armv8.5-A extensions
35+
36+
// Has speculation barrier.
37+
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
38+
index b94a5fc16146..ffb0c86bc687 100644
39+
--- a/llvm/lib/Target/ARM/ARMProcessors.td
40+
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
41+
@@ -96,7 +96,7 @@ def ProcR52plus : SubtargetFeature<"r52plus", "ARMProcFamily", "CortexR52plus",
42+
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
43+
"Cortex-M3 ARM processors", []>;
44+
def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
45+
- "Cortex-M7 ARM processors", []>;
46+
+ "Cortex-M7 ARM processors", [FeatureUseInlineMemcpyAsLdSt]>;
47+
48+
//===----------------------------------------------------------------------===//
49+
// ARM processors
50+
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
51+
index c57825949c1c..12db2ab1fca2 100644
52+
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
53+
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
54+
@@ -12,6 +12,7 @@
55+
56+
#include "ARMTargetMachine.h"
57+
#include "ARMTargetTransformInfo.h"
58+
+#include "llvm/ADT/SmallVector.h"
59+
#include "llvm/CodeGen/SelectionDAG.h"
60+
#include "llvm/IR/DerivedTypes.h"
61+
#include "llvm/Support/CommandLine.h"
62+
@@ -138,6 +139,122 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
63+
return CallResult.second;
64+
}
65+
66+
+SDValue ARMSelectionDAGInfo::EmitMemcpyAsLdSt(
67+
+ SelectionDAG &DAG, SDLoc dl, const ARMSubtarget &Subtarget, SDValue Chain,
68+
+ SDValue Dst, SDValue Src, uint64_t SizeVal, bool isVolatile,
69+
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
70+
+ // Do repeated batches of 4-byte loads and stores.
71+
+ unsigned BytesLeft = SizeVal & 3;
72+
+ unsigned NumMemOps = SizeVal >> 2;
73+
+ unsigned EmittedNumMemOps = 0;
74+
+ EVT VT = MVT::i32;
75+
+ unsigned VTSize = 4;
76+
+ unsigned I = 0;
77+
+ // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
78+
+ const unsigned MaxLoads = Subtarget.isThumb1Only() ? 4 : 6;
79+
+ SmallVector<SDValue> TFOps(6);
80+
+ SmallVector<SDValue> Loads(6);
81+
+ uint64_t SrcOff = 0, DstOff = 0;
82+
+
83+
+ MachineMemOperand::Flags MOFlags = MachineMemOperand::Flags::MONone;
84+
+ if (isVolatile)
85+
+ MOFlags = MachineMemOperand::Flags::MOVolatile;
86+
+ MachineMemOperand::Flags LoadMOFlags = MOFlags;
87+
+ if (SrcPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
88+
+ DAG.getDataLayout()))
89+
+ LoadMOFlags |= MachineMemOperand::Flags::MODereferenceable;
90+
+ if (auto *V = SrcPtrInfo.V.dyn_cast<const Value *>())
91+
+ if (isa<GlobalVariable>(V) && cast<GlobalVariable>(V)->isConstant())
92+
+ LoadMOFlags |= MachineMemOperand::Flags::MOInvariant;
93+
+ MachineMemOperand::Flags StoreMOFlags = MOFlags;
94+
+ if (DstPtrInfo.isDereferenceable(SizeVal, *DAG.getContext(),
95+
+ DAG.getDataLayout()))
96+
+ StoreMOFlags |= MachineMemOperand::Flags::MODereferenceable;
97+
+
98+
+ // Emit up to MaxLoads loads, then a TokenFactor barrier, then the
99+
+ // same number of stores. The loads and stores may get combined into
100+
+ // ldm/stm later on.
101+
+ while (EmittedNumMemOps < NumMemOps) {
102+
+ for (I = 0; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
103+
+ Loads[I] = DAG.getLoad(VT, dl, Chain,
104+
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
105+
+ DAG.getConstant(SrcOff, dl, MVT::i32)),
106+
+ SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
107+
+ LoadMOFlags);
108+
+ TFOps[I] = Loads[I].getValue(1);
109+
+ SrcOff += VTSize;
110+
+ }
111+
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
112+
+ ArrayRef(TFOps.data(), I));
113+
+
114+
+ for (I = 0; I < MaxLoads && EmittedNumMemOps + I < NumMemOps; ++I) {
115+
+ TFOps[I] = DAG.getStore(
116+
+ Chain, dl, Loads[I],
117+
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
118+
+ DAG.getConstant(DstOff, dl, MVT::i32)),
119+
+ DstPtrInfo.getWithOffset(DstOff), MaybeAlign(0), StoreMOFlags);
120+
+ DstOff += VTSize;
121+
+ }
122+
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
123+
+ ArrayRef(TFOps.data(), I));
124+
+
125+
+ EmittedNumMemOps += I;
126+
+ }
127+
+
128+
+ if (BytesLeft == 0)
129+
+ return Chain;
130+
+
131+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
132+
+ unsigned BytesLeftSave = BytesLeft;
133+
+ I = 0;
134+
+ while (BytesLeft) {
135+
+ if (BytesLeft >= 2) {
136+
+ VT = MVT::i16;
137+
+ VTSize = 2;
138+
+ } else {
139+
+ VT = MVT::i8;
140+
+ VTSize = 1;
141+
+ }
142+
+
143+
+ Loads[I] = DAG.getLoad(VT, dl, Chain,
144+
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
145+
+ DAG.getConstant(SrcOff, dl, MVT::i32)),
146+
+ SrcPtrInfo.getWithOffset(SrcOff), MaybeAlign(0),
147+
+ LoadMOFlags);
148+
+
149+
+ TFOps[I] = Loads[I].getValue(1);
150+
+ ++I;
151+
+ SrcOff += VTSize;
152+
+ BytesLeft -= VTSize;
153+
+ }
154+
+ Chain =
155+
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps.data(), I));
156+
+
157+
+ I = 0;
158+
+ BytesLeft = BytesLeftSave;
159+
+ while (BytesLeft) {
160+
+ if (BytesLeft >= 2) {
161+
+ VT = MVT::i16;
162+
+ VTSize = 2;
163+
+ } else {
164+
+ VT = MVT::i8;
165+
+ VTSize = 1;
166+
+ }
167+
+
168+
+ TFOps[I] = DAG.getStore(Chain, dl, Loads[I],
169+
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
170+
+ DAG.getConstant(DstOff, dl, MVT::i32)),
171+
+ DstPtrInfo.getWithOffset(DstOff), MaybeAlign(0),
172+
+ StoreMOFlags);
173+
+ ++I;
174+
+ DstOff += VTSize;
175+
+ BytesLeft -= VTSize;
176+
+ }
177+
+
178+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
179+
+ ArrayRef(TFOps.data(), I));
180+
+}
181+
+
182+
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
183+
const SelectionDAG &DAG,
184+
ConstantSDNode *ConstantSize,
185+
@@ -192,6 +309,10 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
186+
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
187+
Alignment.value(), RTLIB::MEMCPY);
188+
189+
+ if (Subtarget.allowInlineMemcpyAsLdSt())
190+
+ return EmitMemcpyAsLdSt(DAG, dl, Subtarget, Chain, Dst, Src, SizeVal,
191+
+ isVolatile, DstPtrInfo, SrcPtrInfo);
192+
+
193+
unsigned BytesLeft = SizeVal & 3;
194+
unsigned NumMemOps = SizeVal >> 2;
195+
unsigned EmittedNumMemOps = 0;
196+
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
197+
index 275b1c0f8dc0..6ff422c15b12 100644
198+
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
199+
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
200+
@@ -44,6 +44,12 @@ public:
201+
MachinePointerInfo DstPtrInfo,
202+
MachinePointerInfo SrcPtrInfo) const override;
203+
204+
+ SDValue EmitMemcpyAsLdSt(SelectionDAG &DAG, SDLoc dl,
205+
+ const ARMSubtarget &Subtarget, SDValue Chain,
206+
+ SDValue Dst, SDValue Src, uint64_t SizeVal,
207+
+ bool isVolatile, MachinePointerInfo DstPtrInfo,
208+
+ MachinePointerInfo SrcPtrInfo) const;
209+
+
210+
SDValue
211+
EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
212+
SDValue Dst, SDValue Src, SDValue Size,
213+
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
214+
index 2f7af05a259f..0acf919b1360 100644
215+
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
216+
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
217+
@@ -523,6 +523,8 @@ public:
218+
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
219+
unsigned PhysReg) const override;
220+
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
221+
+
222+
+ bool allowInlineMemcpyAsLdSt() const { return UseInlineMemcpyAsLdSt; }
223+
};
224+
225+
} // end namespace llvm
226+
diff --git a/llvm/test/CodeGen/ARM/memcpy-v7m.ll b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
227+
new file mode 100644
228+
index 000000000000..2a90f44fe3d3
229+
--- /dev/null
230+
+++ b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
231+
@@ -0,0 +1,165 @@
232+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
233+
+; RUN: llc -mtriple=thumbv7em-eabi -mcpu=cortex-m7 -verify-machineinstrs %s -o - | FileCheck %s
234+
+
235+
+@d = external global [64 x i32]
236+
+@s = external global [64 x i32]
237+
+@d_32 = external global[32 x i32]
238+
+@s_32 = external global[32 x i32]
239+
+
240+
+
241+
+; Function Attrs: nounwind
242+
+define void @t1() #0 {
243+
+; CHECK-LABEL: t1:
244+
+; CHECK: @ %bb.0: @ %entry
245+
+; CHECK-NEXT: movw r0, :lower16:d
246+
+; CHECK-NEXT: movw r2, :lower16:s
247+
+; CHECK-NEXT: movt r0, :upper16:d
248+
+; CHECK-NEXT: movt r2, :upper16:s
249+
+; CHECK-NEXT: ldr r1, [r0]
250+
+; CHECK-NEXT: str r1, [r2]
251+
+; CHECK-NEXT: ldr r3, [r0, #4]
252+
+; CHECK-NEXT: str r3, [r2, #4]
253+
+; CHECK-NEXT: ldr r1, [r0, #8]
254+
+; CHECK-NEXT: ldr r3, [r0, #12]
255+
+; CHECK-NEXT: ldrb r0, [r0, #16]
256+
+; CHECK-NEXT: strd r1, r3, [r2, #8]
257+
+; CHECK-NEXT: strb r0, [r2, #16]
258+
+; CHECK-NEXT: bx lr
259+
+entry:
260+
+; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr'
261+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
262+
+ ret void
263+
+}
264+
+
265+
+; Function Attrs: nounwind
266+
+define void @t2() #0 {
267+
+; CHECK-LABEL: t2:
268+
+; CHECK: @ %bb.0: @ %entry
269+
+; CHECK-NEXT: movw r0, :lower16:d
270+
+; CHECK-NEXT: movw r1, :lower16:s
271+
+; CHECK-NEXT: movt r0, :upper16:d
272+
+; CHECK-NEXT: movt r1, :upper16:s
273+
+; CHECK-NEXT: ldr.w r2, [r0, #11]
274+
+; CHECK-NEXT: str.w r2, [r1, #11]
275+
+; CHECK-NEXT: ldr r2, [r0]
276+
+; CHECK-NEXT: str r2, [r1]
277+
+; CHECK-NEXT: ldr r2, [r0, #4]
278+
+; CHECK-NEXT: str r2, [r1, #4]
279+
+; CHECK-NEXT: ldr r0, [r0, #8]
280+
+; CHECK-NEXT: str r0, [r1, #8]
281+
+; CHECK-NEXT: bx lr
282+
+entry:
283+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
284+
+ ret void
285+
+}
286+
+
287+
+; Function Attrs: nounwind
288+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
289+
+
290+
+
291+
+define void @t3() #0 {
292+
+; CHECK-LABEL: t3:
293+
+; CHECK: @ %bb.0:
294+
+; CHECK-NEXT: movw r0, :lower16:d_32
295+
+; CHECK-NEXT: movw r2, :lower16:s_32
296+
+; CHECK-NEXT: movt r0, :upper16:d_32
297+
+; CHECK-NEXT: movt r2, :upper16:s_32
298+
+; CHECK-NEXT: ldr r1, [r0]
299+
+; CHECK-NEXT: str r1, [r2]
300+
+; CHECK-NEXT: ldr r3, [r0, #4]
301+
+; CHECK-NEXT: str r3, [r2, #4]
302+
+; CHECK-NEXT: ldr r1, [r0, #8]
303+
+; CHECK-NEXT: ldr r3, [r0, #12]
304+
+; CHECK-NEXT: ldrb r0, [r0, #16]
305+
+; CHECK-NEXT: strd r1, r3, [r2, #8]
306+
+; CHECK-NEXT: strb r0, [r2, #16]
307+
+; CHECK-NEXT: bx lr
308+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 17, i32 4, i1 false)
309+
+ ret void
310+
+}
311+
+
312+
+define void @t4() #0 {
313+
+; CHECK-LABEL: t4:
314+
+; CHECK: @ %bb.0:
315+
+; CHECK-NEXT: movw r0, :lower16:d_32
316+
+; CHECK-NEXT: movw r1, :lower16:s_32
317+
+; CHECK-NEXT: movt r0, :upper16:d_32
318+
+; CHECK-NEXT: movt r1, :upper16:s_32
319+
+; CHECK-NEXT: ldr.w r2, [r0, #11]
320+
+; CHECK-NEXT: str.w r2, [r1, #11]
321+
+; CHECK-NEXT: ldr r2, [r0]
322+
+; CHECK-NEXT: str r2, [r1]
323+
+; CHECK-NEXT: ldr r2, [r0, #4]
324+
+; CHECK-NEXT: str r2, [r1, #4]
325+
+; CHECK-NEXT: ldr r0, [r0, #8]
326+
+; CHECK-NEXT: str r0, [r1, #8]
327+
+; CHECK-NEXT: bx lr
328+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 15, i32 4, i1 false)
329+
+ ret void
330+
+}
331+
+
332+
+define void @t5() #0 {
333+
+; CHECK-LABEL: t5:
334+
+; CHECK: @ %bb.0: @ %entry
335+
+; CHECK-NEXT: .save {r4, r5, r7, lr}
336+
+; CHECK-NEXT: push {r4, r5, r7, lr}
337+
+; CHECK-NEXT: movw r0, :lower16:d
338+
+; CHECK-NEXT: movw r1, :lower16:s
339+
+; CHECK-NEXT: movt r0, :upper16:d
340+
+; CHECK-NEXT: movt r1, :upper16:s
341+
+; CHECK-NEXT: ldr r0, [r0]
342+
+; CHECK-NEXT: ldr r1, [r1]
343+
+; CHECK-NEXT: add.w r12, r0, #12
344+
+; CHECK-NEXT: ldr r3, [r0, #24]
345+
+; CHECK-NEXT: ldrd r2, lr, [r0, #4]
346+
+; CHECK-NEXT: ldm.w r12, {r4, r5, r12}
347+
+; CHECK-NEXT: str r3, [r1, #24]
348+
+; CHECK-NEXT: add.w r3, r1, #12
349+
+; CHECK-NEXT: strd r2, lr, [r1, #4]
350+
+; CHECK-NEXT: stm.w r3, {r4, r5, r12}
351+
+; CHECK-NEXT: ldr r0, [r0, #28]
352+
+; CHECK-NEXT: str r0, [r1, #28]
353+
+; CHECK-NEXT: pop {r4, r5, r7, pc}
354+
+entry:
355+
+ %0 = load i32*, i32** @s, align 4
356+
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
357+
+ %1 = bitcast i32* %arrayidx to i8*
358+
+ %2 = load i32*, i32** @d, align 4
359+
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
360+
+ %3 = bitcast i32* %arrayidx1 to i8*
361+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
362+
+ ret void
363+
+}
364+
+
365+
+define void @t6() #0 {
366+
+; CHECK-LABEL: t6:
367+
+; CHECK: @ %bb.0: @ %entry
368+
+; CHECK-NEXT: .save {r4, r5, r7, lr}
369+
+; CHECK-NEXT: push {r4, r5, r7, lr}
370+
+; CHECK-NEXT: movw r0, :lower16:d
371+
+; CHECK-NEXT: movw r1, :lower16:s
372+
+; CHECK-NEXT: movt r0, :upper16:d
373+
+; CHECK-NEXT: movt r1, :upper16:s
374+
+; CHECK-NEXT: ldr r0, [r0]
375+
+; CHECK-NEXT: ldr r1, [r1]
376+
+; CHECK-NEXT: add.w r12, r0, #12
377+
+; CHECK-NEXT: ldr r3, [r0, #24]
378+
+; CHECK-NEXT: ldrd r2, lr, [r0, #4]
379+
+; CHECK-NEXT: ldm.w r12, {r4, r5, r12}
380+
+; CHECK-NEXT: str r3, [r1, #24]
381+
+; CHECK-NEXT: add.w r3, r1, #12
382+
+; CHECK-NEXT: strd r2, lr, [r1, #4]
383+
+; CHECK-NEXT: stm.w r3, {r4, r5, r12}
384+
+; CHECK-NEXT: ldr r0, [r0, #28]
385+
+; CHECK-NEXT: str r0, [r1, #28]
386+
+; CHECK-NEXT: pop {r4, r5, r7, pc}
387+
+entry:
388+
+ %0 = load i32*, i32** @s, align 8
389+
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
390+
+ %1 = bitcast i32* %arrayidx to i8*
391+
+ %2 = load i32*, i32** @d, align 8
392+
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
393+
+ %3 = bitcast i32* %arrayidx1 to i8*
394+
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
395+
+ ret void
396+
+}
397+
--
398+
2.34.1
399+

0 commit comments

Comments
 (0)