|
1 | 1 | From 61af6af10d10a08b81d3924fa5b35bfb548b2a05 Mon Sep 17 00:00:00 2001 |
2 | 2 | From: nasmnc01 < [email protected]> |
| 3 | +Author: Scott Douglass < [email protected]> |
3 | 4 | Date: Tue, 13 Aug 2024 10:55:51 +0100 |
4 | 5 | Subject: [PATCH] [ARM][CodeGen] Disable MEMCPY LDM/STM inlining for v7-m |
5 | 6 |
|
6 | 7 | This patch disables the expansion of MEMCPY to LDM/STM |
7 | 8 | on v7-m targets. This is due to a slowdown caused |
8 | 9 | by this inlining method. |
9 | 10 |
|
| 11 | +Co-authored-by: Nashe Mncube < [email protected]> |
10 | 12 | Change-Id: I91095299c2c67670a16849d08540bdbc07a95adc |
11 | 13 | --- |
12 | 14 | llvm/lib/Target/ARM/ARMFeatures.td | 5 + |
@@ -223,177 +225,6 @@ index 2f7af05a259f..0acf919b1360 100644 |
223 | 225 | }; |
224 | 226 |
|
225 | 227 | } // end namespace llvm |
226 | | -diff --git a/llvm/test/CodeGen/ARM/memcpy-v7m.ll b/llvm/test/CodeGen/ARM/memcpy-v7m.ll |
227 | | -new file mode 100644 |
228 | | -index 000000000000..2a90f44fe3d3 |
229 | | ---- /dev/null |
230 | | -+++ b/llvm/test/CodeGen/ARM/memcpy-v7m.ll |
231 | | -@@ -0,0 +1,165 @@ |
232 | | -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
233 | | -+; RUN: llc -mtriple=thumbv7em-eabi -mcpu=cortex-m7 -verify-machineinstrs %s -o - | FileCheck %s |
234 | | -+ |
235 | | -+@d = external global [64 x i32] |
236 | | -+@s = external global [64 x i32] |
237 | | -+@d_32 = external global[32 x i32] |
238 | | -+@s_32 = external global[32 x i32] |
239 | | -+ |
240 | | -+ |
241 | | -+; Function Attrs: nounwind |
242 | | -+define void @t1() #0 { |
243 | | -+; CHECK-LABEL: t1: |
244 | | -+; CHECK: @ %bb.0: @ %entry |
245 | | -+; CHECK-NEXT: movw r0, :lower16:d |
246 | | -+; CHECK-NEXT: movw r2, :lower16:s |
247 | | -+; CHECK-NEXT: movt r0, :upper16:d |
248 | | -+; CHECK-NEXT: movt r2, :upper16:s |
249 | | -+; CHECK-NEXT: ldr r1, [r0] |
250 | | -+; CHECK-NEXT: str r1, [r2] |
251 | | -+; CHECK-NEXT: ldr r3, [r0, #4] |
252 | | -+; CHECK-NEXT: str r3, [r2, #4] |
253 | | -+; CHECK-NEXT: ldr r1, [r0, #8] |
254 | | -+; CHECK-NEXT: ldr r3, [r0, #12] |
255 | | -+; CHECK-NEXT: ldrb r0, [r0, #16] |
256 | | -+; CHECK-NEXT: strd r1, r3, [r2, #8] |
257 | | -+; CHECK-NEXT: strb r0, [r2, #16] |
258 | | -+; CHECK-NEXT: bx lr |
259 | | -+entry: |
260 | | -+; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr' |
261 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false) |
262 | | -+ ret void |
263 | | -+} |
264 | | -+ |
265 | | -+; Function Attrs: nounwind |
266 | | -+define void @t2() #0 { |
267 | | -+; CHECK-LABEL: t2: |
268 | | -+; CHECK: @ %bb.0: @ %entry |
269 | | -+; CHECK-NEXT: movw r0, :lower16:d |
270 | | -+; CHECK-NEXT: movw r1, :lower16:s |
271 | | -+; CHECK-NEXT: movt r0, :upper16:d |
272 | | -+; CHECK-NEXT: movt r1, :upper16:s |
273 | | -+; CHECK-NEXT: ldr.w r2, [r0, #11] |
274 | | -+; CHECK-NEXT: str.w r2, [r1, #11] |
275 | | -+; CHECK-NEXT: ldr r2, [r0] |
276 | | -+; CHECK-NEXT: str r2, [r1] |
277 | | -+; CHECK-NEXT: ldr r2, [r0, #4] |
278 | | -+; CHECK-NEXT: str r2, [r1, #4] |
279 | | -+; CHECK-NEXT: ldr r0, [r0, #8] |
280 | | -+; CHECK-NEXT: str r0, [r1, #8] |
281 | | -+; CHECK-NEXT: bx lr |
282 | | -+entry: |
283 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false) |
284 | | -+ ret void |
285 | | -+} |
286 | | -+ |
287 | | -+; Function Attrs: nounwind |
288 | | -+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 |
289 | | -+ |
290 | | -+ |
291 | | -+define void @t3() #0 { |
292 | | -+; CHECK-LABEL: t3: |
293 | | -+; CHECK: @ %bb.0: |
294 | | -+; CHECK-NEXT: movw r0, :lower16:d_32 |
295 | | -+; CHECK-NEXT: movw r2, :lower16:s_32 |
296 | | -+; CHECK-NEXT: movt r0, :upper16:d_32 |
297 | | -+; CHECK-NEXT: movt r2, :upper16:s_32 |
298 | | -+; CHECK-NEXT: ldr r1, [r0] |
299 | | -+; CHECK-NEXT: str r1, [r2] |
300 | | -+; CHECK-NEXT: ldr r3, [r0, #4] |
301 | | -+; CHECK-NEXT: str r3, [r2, #4] |
302 | | -+; CHECK-NEXT: ldr r1, [r0, #8] |
303 | | -+; CHECK-NEXT: ldr r3, [r0, #12] |
304 | | -+; CHECK-NEXT: ldrb r0, [r0, #16] |
305 | | -+; CHECK-NEXT: strd r1, r3, [r2, #8] |
306 | | -+; CHECK-NEXT: strb r0, [r2, #16] |
307 | | -+; CHECK-NEXT: bx lr |
308 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 17, i32 4, i1 false) |
309 | | -+ ret void |
310 | | -+} |
311 | | -+ |
312 | | -+define void @t4() #0 { |
313 | | -+; CHECK-LABEL: t4: |
314 | | -+; CHECK: @ %bb.0: |
315 | | -+; CHECK-NEXT: movw r0, :lower16:d_32 |
316 | | -+; CHECK-NEXT: movw r1, :lower16:s_32 |
317 | | -+; CHECK-NEXT: movt r0, :upper16:d_32 |
318 | | -+; CHECK-NEXT: movt r1, :upper16:s_32 |
319 | | -+; CHECK-NEXT: ldr.w r2, [r0, #11] |
320 | | -+; CHECK-NEXT: str.w r2, [r1, #11] |
321 | | -+; CHECK-NEXT: ldr r2, [r0] |
322 | | -+; CHECK-NEXT: str r2, [r1] |
323 | | -+; CHECK-NEXT: ldr r2, [r0, #4] |
324 | | -+; CHECK-NEXT: str r2, [r1, #4] |
325 | | -+; CHECK-NEXT: ldr r0, [r0, #8] |
326 | | -+; CHECK-NEXT: str r0, [r1, #8] |
327 | | -+; CHECK-NEXT: bx lr |
328 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([32 x i32]* @s_32 to i8*), i8* bitcast ([32 x i32]* @d_32 to i8*), i32 15, i32 4, i1 false) |
329 | | -+ ret void |
330 | | -+} |
331 | | -+ |
332 | | -+define void @t5() #0 { |
333 | | -+; CHECK-LABEL: t5: |
334 | | -+; CHECK: @ %bb.0: @ %entry |
335 | | -+; CHECK-NEXT: .save {r4, r5, r7, lr} |
336 | | -+; CHECK-NEXT: push {r4, r5, r7, lr} |
337 | | -+; CHECK-NEXT: movw r0, :lower16:d |
338 | | -+; CHECK-NEXT: movw r1, :lower16:s |
339 | | -+; CHECK-NEXT: movt r0, :upper16:d |
340 | | -+; CHECK-NEXT: movt r1, :upper16:s |
341 | | -+; CHECK-NEXT: ldr r0, [r0] |
342 | | -+; CHECK-NEXT: ldr r1, [r1] |
343 | | -+; CHECK-NEXT: add.w r12, r0, #12 |
344 | | -+; CHECK-NEXT: ldr r3, [r0, #24] |
345 | | -+; CHECK-NEXT: ldrd r2, lr, [r0, #4] |
346 | | -+; CHECK-NEXT: ldm.w r12, {r4, r5, r12} |
347 | | -+; CHECK-NEXT: str r3, [r1, #24] |
348 | | -+; CHECK-NEXT: add.w r3, r1, #12 |
349 | | -+; CHECK-NEXT: strd r2, lr, [r1, #4] |
350 | | -+; CHECK-NEXT: stm.w r3, {r4, r5, r12} |
351 | | -+; CHECK-NEXT: ldr r0, [r0, #28] |
352 | | -+; CHECK-NEXT: str r0, [r1, #28] |
353 | | -+; CHECK-NEXT: pop {r4, r5, r7, pc} |
354 | | -+entry: |
355 | | -+ %0 = load i32*, i32** @s, align 4 |
356 | | -+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1 |
357 | | -+ %1 = bitcast i32* %arrayidx to i8* |
358 | | -+ %2 = load i32*, i32** @d, align 4 |
359 | | -+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 |
360 | | -+ %3 = bitcast i32* %arrayidx1 to i8* |
361 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) |
362 | | -+ ret void |
363 | | -+} |
364 | | -+ |
365 | | -+define void @t6() #0 { |
366 | | -+; CHECK-LABEL: t6: |
367 | | -+; CHECK: @ %bb.0: @ %entry |
368 | | -+; CHECK-NEXT: .save {r4, r5, r7, lr} |
369 | | -+; CHECK-NEXT: push {r4, r5, r7, lr} |
370 | | -+; CHECK-NEXT: movw r0, :lower16:d |
371 | | -+; CHECK-NEXT: movw r1, :lower16:s |
372 | | -+; CHECK-NEXT: movt r0, :upper16:d |
373 | | -+; CHECK-NEXT: movt r1, :upper16:s |
374 | | -+; CHECK-NEXT: ldr r0, [r0] |
375 | | -+; CHECK-NEXT: ldr r1, [r1] |
376 | | -+; CHECK-NEXT: add.w r12, r0, #12 |
377 | | -+; CHECK-NEXT: ldr r3, [r0, #24] |
378 | | -+; CHECK-NEXT: ldrd r2, lr, [r0, #4] |
379 | | -+; CHECK-NEXT: ldm.w r12, {r4, r5, r12} |
380 | | -+; CHECK-NEXT: str r3, [r1, #24] |
381 | | -+; CHECK-NEXT: add.w r3, r1, #12 |
382 | | -+; CHECK-NEXT: strd r2, lr, [r1, #4] |
383 | | -+; CHECK-NEXT: stm.w r3, {r4, r5, r12} |
384 | | -+; CHECK-NEXT: ldr r0, [r0, #28] |
385 | | -+; CHECK-NEXT: str r0, [r1, #28] |
386 | | -+; CHECK-NEXT: pop {r4, r5, r7, pc} |
387 | | -+entry: |
388 | | -+ %0 = load i32*, i32** @s, align 8 |
389 | | -+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1 |
390 | | -+ %1 = bitcast i32* %arrayidx to i8* |
391 | | -+ %2 = load i32*, i32** @d, align 8 |
392 | | -+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 |
393 | | -+ %3 = bitcast i32* %arrayidx1 to i8* |
394 | | -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) |
395 | | -+ ret void |
396 | | -+} |
397 | 228 | -- |
398 | 229 | 2.34.1 |
399 | 230 |
|
0 commit comments