Skip to content

Commit 475c280

Browse files
committed
Adding author and coauthor fields
Change-Id: I43e0ab51f19452e1ebbfd7739d81fe99716acd6e
1 parent 8bc0d1c commit 475c280

File tree

1 file changed

+2
-171
lines changed

1 file changed

+2
-171
lines changed

patches/llvm-project-perf/0002-ARM-CodeGen-Disable-MEMCPY-LDM-STM-inlining-for-v7-m.patch

Lines changed: 2 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
From 61af6af10d10a08b81d3924fa5b35bfb548b2a05 Mon Sep 17 00:00:00 2001
22
From: nasmnc01 <[email protected]>
3+
Author: Scott Douglass <[email protected]>
34
Date: Tue, 13 Aug 2024 10:55:51 +0100
45
Subject: [PATCH] [ARM][CodeGen] Disable MEMCPY LDM/STM inlining for v7-m
56

67
This patch disables the expansion of MEMCPY to LDM/STM
78
on v7-m targets. This is due to a slowdown caused
89
by this inlining method.
910

11+
Co-authored-by: Nashe Mncube <[email protected]>
1012
Change-Id: I91095299c2c67670a16849d08540bdbc07a95adc
1113
---
1214
llvm/lib/Target/ARM/ARMFeatures.td | 5 +
@@ -223,177 +225,6 @@ index 2f7af05a259f..0acf919b1360 100644
223225
};
224226

225227
} // end namespace llvm
226-
diff --git a/llvm/test/CodeGen/ARM/memcpy-v7m.ll b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
227-
new file mode 100644
228-
index 000000000000..2a90f44fe3d3
229-
--- /dev/null
230-
+++ b/llvm/test/CodeGen/ARM/memcpy-v7m.ll
231-
@@ -0,0 +1,165 @@
232-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
233-
+; RUN: llc -mtriple=thumbv7em-eabi -mcpu=cortex-m7 -verify-machineinstrs %s -o - | FileCheck %s
234-
+
235-
+@d = external global [64 x i32]
236-
+@s = external global [64 x i32]
237-
+@d_32 = external global[32 x i32]
238-
+@s_32 = external global[32 x i32]
239-
+
240-
+
241-
+; Function Attrs: nounwind
242-
+define void @t1() #0 {
243-
+; CHECK-LABEL: t1:
244-
+; CHECK: @ %bb.0: @ %entry
245-
+; CHECK-NEXT: movw r0, :lower16:d
246-
+; CHECK-NEXT: movw r2, :lower16:s
247-
+; CHECK-NEXT: movt r0, :upper16:d
248-
+; CHECK-NEXT: movt r2, :upper16:s
249-
+; CHECK-NEXT: ldr r1, [r0]
250-
+; CHECK-NEXT: str r1, [r2]
251-
+; CHECK-NEXT: ldr r3, [r0, #4]
252-
+; CHECK-NEXT: str r3, [r2, #4]
253-
+; CHECK-NEXT: ldr r1, [r0, #8]
254-
+; CHECK-NEXT: ldr r3, [r0, #12]
255-
+; CHECK-NEXT: ldrb r0, [r0, #16]
256-
+; CHECK-NEXT: strd r1, r3, [r2, #8]
257-
+; CHECK-NEXT: strb r0, [r2, #16]
258-
+; CHECK-NEXT: bx lr
259-
+entry:
260-
+; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr'
261-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
262-
+ ret void
263-
+}
264-
+
265-
+; Function Attrs: nounwind
266-
+define void @t2() #0 {
267-
+; CHECK-LABEL: t2:
268-
+; CHECK: @ %bb.0: @ %entry
269-
+; CHECK-NEXT: movw r0, :lower16:d
270-
+; CHECK-NEXT: movw r1, :lower16:s
271-
+; CHECK-NEXT: movt r0, :upper16:d
272-
+; CHECK-NEXT: movt r1, :upper16:s
273-
+; CHECK-NEXT: ldr.w r2, [r0, #11]
274-
+; CHECK-NEXT: str.w r2, [r1, #11]
275-
+; CHECK-NEXT: ldr r2, [r0]
276-
+; CHECK-NEXT: str r2, [r1]
277-
+; CHECK-NEXT: ldr r2, [r0, #4]
278-
+; CHECK-NEXT: str r2, [r1, #4]
279-
+; CHECK-NEXT: ldr r0, [r0, #8]
280-
+; CHECK-NEXT: str r0, [r1, #8]
281-
+; CHECK-NEXT: bx lr
282-
+entry:
283-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
284-
+ ret void
285-
+}
286-
+
287-
+; Function Attrs: nounwind
288-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
289-
+
290-
+
291-
+define void @t3() #0 {
292-
+; CHECK-LABEL: t3:
293-
+; CHECK: @ %bb.0:
294-
+; CHECK-NEXT: movw r0, :lower16:d_32
295-
+; CHECK-NEXT: movw r2, :lower16:s_32
296-
+; CHECK-NEXT: movt r0, :upper16:d_32
297-
+; CHECK-NEXT: movt r2, :upper16:s_32
298-
+; CHECK-NEXT: ldr r1, [r0]
299-
+; CHECK-NEXT: str r1, [r2]
300-
+; CHECK-NEXT: ldr r3, [r0, #4]
301-
+; CHECK-NEXT: str r3, [r2, #4]
302-
+; CHECK-NEXT: ldr r1, [r0, #8]
303-
+; CHECK-NEXT: ldr r3, [r0, #12]
304-
+; CHECK-NEXT: ldrb r0, [r0, #16]
305-
+; CHECK-NEXT: strd r1, r3, [r2, #8]
306-
+; CHECK-NEXT: strb r0, [r2, #16]
307-
+; CHECK-NEXT: bx lr
308-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 17, i32 4, i1 false)
309-
+ ret void
310-
+}
311-
+
312-
+define void @t4() #0 {
313-
+; CHECK-LABEL: t4:
314-
+; CHECK: @ %bb.0:
315-
+; CHECK-NEXT: movw r0, :lower16:d_32
316-
+; CHECK-NEXT: movw r1, :lower16:s_32
317-
+; CHECK-NEXT: movt r0, :upper16:d_32
318-
+; CHECK-NEXT: movt r1, :upper16:s_32
319-
+; CHECK-NEXT: ldr.w r2, [r0, #11]
320-
+; CHECK-NEXT: str.w r2, [r1, #11]
321-
+; CHECK-NEXT: ldr r2, [r0]
322-
+; CHECK-NEXT: str r2, [r1]
323-
+; CHECK-NEXT: ldr r2, [r0, #4]
324-
+; CHECK-NEXT: str r2, [r1, #4]
325-
+; CHECK-NEXT: ldr r0, [r0, #8]
326-
+; CHECK-NEXT: str r0, [r1, #8]
327-
+; CHECK-NEXT: bx lr
328-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 15, i32 4, i1 false)
329-
+ ret void
330-
+}
331-
+
332-
+define void @t5() #0 {
333-
+; CHECK-LABEL: t5:
334-
+; CHECK: @ %bb.0: @ %entry
335-
+; CHECK-NEXT: .save {r4, r5, r7, lr}
336-
+; CHECK-NEXT: push {r4, r5, r7, lr}
337-
+; CHECK-NEXT: movw r0, :lower16:d
338-
+; CHECK-NEXT: movw r1, :lower16:s
339-
+; CHECK-NEXT: movt r0, :upper16:d
340-
+; CHECK-NEXT: movt r1, :upper16:s
341-
+; CHECK-NEXT: ldr r0, [r0]
342-
+; CHECK-NEXT: ldr r1, [r1]
343-
+; CHECK-NEXT: add.w r12, r0, #12
344-
+; CHECK-NEXT: ldr r3, [r0, #24]
345-
+; CHECK-NEXT: ldrd r2, lr, [r0, #4]
346-
+; CHECK-NEXT: ldm.w r12, {r4, r5, r12}
347-
+; CHECK-NEXT: str r3, [r1, #24]
348-
+; CHECK-NEXT: add.w r3, r1, #12
349-
+; CHECK-NEXT: strd r2, lr, [r1, #4]
350-
+; CHECK-NEXT: stm.w r3, {r4, r5, r12}
351-
+; CHECK-NEXT: ldr r0, [r0, #28]
352-
+; CHECK-NEXT: str r0, [r1, #28]
353-
+; CHECK-NEXT: pop {r4, r5, r7, pc}
354-
+entry:
355-
+ %0 = load i32*, i32** @s, align 4
356-
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
357-
+ %1 = bitcast i32* %arrayidx to i8*
358-
+ %2 = load i32*, i32** @d, align 4
359-
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
360-
+ %3 = bitcast i32* %arrayidx1 to i8*
361-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
362-
+ ret void
363-
+}
364-
+
365-
+define void @t6() #0 {
366-
+; CHECK-LABEL: t6:
367-
+; CHECK: @ %bb.0: @ %entry
368-
+; CHECK-NEXT: .save {r4, r5, r7, lr}
369-
+; CHECK-NEXT: push {r4, r5, r7, lr}
370-
+; CHECK-NEXT: movw r0, :lower16:d
371-
+; CHECK-NEXT: movw r1, :lower16:s
372-
+; CHECK-NEXT: movt r0, :upper16:d
373-
+; CHECK-NEXT: movt r1, :upper16:s
374-
+; CHECK-NEXT: ldr r0, [r0]
375-
+; CHECK-NEXT: ldr r1, [r1]
376-
+; CHECK-NEXT: add.w r12, r0, #12
377-
+; CHECK-NEXT: ldr r3, [r0, #24]
378-
+; CHECK-NEXT: ldrd r2, lr, [r0, #4]
379-
+; CHECK-NEXT: ldm.w r12, {r4, r5, r12}
380-
+; CHECK-NEXT: str r3, [r1, #24]
381-
+; CHECK-NEXT: add.w r3, r1, #12
382-
+; CHECK-NEXT: strd r2, lr, [r1, #4]
383-
+; CHECK-NEXT: stm.w r3, {r4, r5, r12}
384-
+; CHECK-NEXT: ldr r0, [r0, #28]
385-
+; CHECK-NEXT: str r0, [r1, #28]
386-
+; CHECK-NEXT: pop {r4, r5, r7, pc}
387-
+entry:
388-
+ %0 = load i32*, i32** @s, align 8
389-
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
390-
+ %1 = bitcast i32* %arrayidx to i8*
391-
+ %2 = load i32*, i32** @d, align 8
392-
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
393-
+ %3 = bitcast i32* %arrayidx1 to i8*
394-
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
395-
+ ret void
396-
+}
397228
--
398229
2.34.1
399230

0 commit comments

Comments
 (0)