Skip to content

Commit 2d48cac

Browse files
committed
[DSE] Add more tests for optimizing shrinkage of memset/memcpy; NFC
1 parent 22c8b1d commit 2d48cac

File tree

2 files changed

+241
-17
lines changed

2 files changed

+241
-17
lines changed

llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll

Lines changed: 149 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes=dse -S | FileCheck %s
2+
; RUN: opt < %s -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM4
3+
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM16
34

45
define void @write4to7(ptr nocapture %p) {
56
; CHECK-LABEL: @write4to7(
@@ -23,8 +24,8 @@ define void @write4to7_weird_element_type(ptr nocapture %p) {
2324
; CHECK-LABEL: @write4to7_weird_element_type(
2425
; CHECK-NEXT: entry:
2526
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
26-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
27-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false)
27+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
28+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
2829
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
2930
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
3031
; CHECK-NEXT: ret void
@@ -269,14 +270,23 @@ entry:
269270
}
270271

271272
define void @write8To15AndThen0To7(ptr nocapture %P) {
272-
; CHECK-LABEL: @write8To15AndThen0To7(
273-
; CHECK-NEXT: entry:
274-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
275-
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
276-
; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
277-
; CHECK-NEXT: store i64 1, ptr [[BASE64_1]], align 4
278-
; CHECK-NEXT: store i64 2, ptr [[P]], align 4
279-
; CHECK-NEXT: ret void
273+
; CHECK-MEM4-LABEL: @write8To15AndThen0To7(
274+
; CHECK-MEM4-NEXT: entry:
275+
; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
276+
; CHECK-MEM4-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
277+
; CHECK-MEM4-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
278+
; CHECK-MEM4-NEXT: store i64 1, ptr [[BASE64_1]], align 4
279+
; CHECK-MEM4-NEXT: store i64 2, ptr [[P]], align 4
280+
; CHECK-MEM4-NEXT: ret void
281+
;
282+
; CHECK-MEM16-LABEL: @write8To15AndThen0To7(
283+
; CHECK-MEM16-NEXT: entry:
284+
; CHECK-MEM16-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
285+
; CHECK-MEM16-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
286+
; CHECK-MEM16-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
287+
; CHECK-MEM16-NEXT: store i64 1, ptr [[BASE64_1]], align 8
288+
; CHECK-MEM16-NEXT: store i64 2, ptr [[P]], align 8
289+
; CHECK-MEM16-NEXT: ret void
280290
;
281291
entry:
282292

@@ -402,3 +412,131 @@ entry:
402412
store i64 1, ptr %p, align 1
403413
ret void
404414
}
415+
416+
define void @memset_optimize_size_lo_33_to_x86_32_generic_28(ptr %p) {
417+
; CHECK-LABEL: @memset_optimize_size_lo_33_to_x86_32_generic_28(
418+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
419+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
420+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 5
421+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 28, i1 false)
422+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
423+
; CHECK-NEXT: ret void
424+
;
425+
%p0 = getelementptr inbounds i8, ptr %p, i64 3
426+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
427+
call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 33, i1 false)
428+
store i64 0, ptr %p1, align 1
429+
ret void
430+
}
431+
432+
define void @memset_optimize_size_lo_33_misaligned_x86_fail_generic_save_unit(ptr %p) {
433+
; CHECK-LABEL: @memset_optimize_size_lo_33_misaligned_x86_fail_generic_save_unit(
434+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
435+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
436+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
437+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 29, i1 false)
438+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
439+
; CHECK-NEXT: ret void
440+
;
441+
%p0 = getelementptr inbounds i8, ptr %p, i64 3
442+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
443+
call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 33, i1 false)
444+
store i64 0, ptr %p1, align 1
445+
ret void
446+
}
447+
448+
define void @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(ptr %p) {
449+
; CHECK-LABEL: @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(
450+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
451+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
452+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
453+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 28, i1 false)
454+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
455+
; CHECK-NEXT: ret void
456+
;
457+
%p0 = getelementptr inbounds i8, ptr %p, i64 4
458+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
459+
call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 32, i1 false)
460+
store i64 0, ptr %p1, align 1
461+
ret void
462+
}
463+
464+
define void @memset_optimize_size_lo_34_to_32(ptr %p) {
465+
; CHECK-LABEL: @memset_optimize_size_lo_34_to_32(
466+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
467+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
468+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
469+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 30, i1 false)
470+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
471+
; CHECK-NEXT: ret void
472+
;
473+
%p0 = getelementptr inbounds i8, ptr %p, i64 4
474+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
475+
call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 34, i1 false)
476+
store i64 0, ptr %p1, align 1
477+
ret void
478+
}
479+
480+
define void @memset_optimize_size_lo_34_x86_misaligned_fail_generic_save_unit(ptr %p) {
481+
; CHECK-LABEL: @memset_optimize_size_lo_34_x86_misaligned_fail_generic_save_unit(
482+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
483+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
484+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
485+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 30, i1 false)
486+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
487+
; CHECK-NEXT: ret void
488+
;
489+
%p0 = getelementptr inbounds i8, ptr %p, i64 4
490+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
491+
call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 34, i1 false)
492+
store i64 0, ptr %p1, align 1
493+
ret void
494+
}
495+
496+
define void @memset_optimize_size_lo_34_to_32_no_align_okay(ptr %p) {
497+
; CHECK-LABEL: @memset_optimize_size_lo_34_to_32_no_align_okay(
498+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
499+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
500+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
501+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 30, i1 false)
502+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
503+
; CHECK-NEXT: ret void
504+
;
505+
%p0 = getelementptr inbounds i8, ptr %p, i64 4
506+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
507+
call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 34, i1 false)
508+
store i64 0, ptr %p1, align 1
509+
ret void
510+
}
511+
512+
define void @memset_optimize_size_lo_33_to_31_save_unit_no_change(ptr %p) {
513+
; CHECK-LABEL: @memset_optimize_size_lo_33_to_31_save_unit_no_change(
514+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
515+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
516+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 2
517+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 31, i1 false)
518+
; CHECK-NEXT: store i32 0, ptr [[P1]], align 1
519+
; CHECK-NEXT: ret void
520+
;
521+
%p0 = getelementptr inbounds i8, ptr %p, i64 1
522+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
523+
call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 33, i1 false)
524+
store i32 0, ptr %p1, align 1
525+
ret void
526+
}
527+
528+
define void @memset_optimize_size_lo_36_to_32(ptr %p) {
529+
; CHECK-LABEL: @memset_optimize_size_lo_36_to_32(
530+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
531+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
532+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
533+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 32, i1 false)
534+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
535+
; CHECK-NEXT: ret void
536+
;
537+
%p0 = getelementptr inbounds i8, ptr %p, i64 1
538+
%p1 = getelementptr inbounds i8, ptr %p, i64 0
539+
call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 36, i1 false)
540+
store i64 0, ptr %p1, align 1
541+
ret void
542+
}

llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes=dse -S | FileCheck %s
2+
; RUN: opt < %s -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM4
3+
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM16
4+
35
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
46

5-
%struct.vec2 = type { <4 x i32>, <4 x i32> }
6-
%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
7+
%struct.vec2 = type {
8+
<4 x i32>, <4 x i32>
9+
}
10+
11+
%struct.vec2plusi = type {
12+
<4 x i32>, <4 x i32>, i32
13+
}
714

815
@glob1 = global %struct.vec2 zeroinitializer, align 16
916
@glob2 = global %struct.vec2plusi zeroinitializer, align 16
@@ -231,7 +238,9 @@ declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture, ptr
231238
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
232239
declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind
233240

234-
%struct.trapframe = type { i64, i64, i64 }
241+
%struct.trapframe = type {
242+
i64, i64, i64
243+
}
235244

236245
; bugzilla 11455 - make sure negative GEP's don't break this optimisation
237246
define void @cpu_lwp_fork(ptr %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
@@ -259,8 +268,8 @@ define void @write16To23AndThen24To31(ptr nocapture %P, i64 %n64, i32 %n32, i16
259268
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i1 false)
260269
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
261270
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
262-
; CHECK-NEXT: store i64 3, ptr [[BASE64_2]]
263-
; CHECK-NEXT: store i64 3, ptr [[BASE64_3]]
271+
; CHECK-NEXT: store i64 3, ptr [[BASE64_2]], align 8
272+
; CHECK-NEXT: store i64 3, ptr [[BASE64_3]], align 8
264273
; CHECK-NEXT: ret void
265274
;
266275
entry:
@@ -392,3 +401,80 @@ entry:
392401
ret void
393402
}
394403

404+
define void @memset_optimize_size_hi_31_to_24(ptr %p) {
405+
; CHECK-LABEL: @memset_optimize_size_hi_31_to_24(
406+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
407+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 23
408+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 23, i1 false)
409+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
410+
; CHECK-NEXT: ret void
411+
;
412+
%p0 = getelementptr inbounds i8, ptr %p, i64 0
413+
%p1 = getelementptr inbounds i8, ptr %p, i64 23
414+
call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 31, i1 false)
415+
store i64 0, ptr %p1, align 1
416+
ret void
417+
}
418+
419+
define void @memset_optimize_size_hi_32_no_change_x86_change_generic(ptr %p) {
420+
; CHECK-LABEL: @memset_optimize_size_hi_32_no_change_x86_change_generic(
421+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
422+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
423+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 28, i1 false)
424+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
425+
; CHECK-NEXT: ret void
426+
;
427+
%p0 = getelementptr inbounds i8, ptr %p, i64 0
428+
%p1 = getelementptr inbounds i8, ptr %p, i64 28
429+
call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 32, i1 false)
430+
store i64 0, ptr %p1, align 1
431+
ret void
432+
}
433+
434+
define void @memset_optimize_size_hi_28_to_24(ptr %p) {
435+
; CHECK-LABEL: @memset_optimize_size_hi_28_to_24(
436+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
437+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 21
438+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P0]], i8 0, i64 24, i1 false)
439+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
440+
; CHECK-NEXT: ret void
441+
;
442+
%p0 = getelementptr inbounds i8, ptr %p, i64 0
443+
%p1 = getelementptr inbounds i8, ptr %p, i64 21
444+
call void @llvm.memset.p0.i64(ptr align 8 %p0, i8 0, i64 28, i1 false)
445+
store i64 0, ptr %p1, align 1
446+
ret void
447+
}
448+
449+
define void @memset_optimize_size_hi_31_to_28(ptr %p) {
450+
; CHECK-LABEL: @memset_optimize_size_hi_31_to_28(
451+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
452+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
453+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P0]], i8 0, i64 28, i1 false)
454+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
455+
; CHECK-NEXT: ret void
456+
;
457+
%p0 = getelementptr inbounds i8, ptr %p, i64 0
458+
%p1 = getelementptr inbounds i8, ptr %p, i64 27
459+
call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 31, i1 false)
460+
store i64 0, ptr %p1, align 1
461+
ret void
462+
}
463+
464+
define void @memset_optimize_size_hi_33_to_x86_32_generic_28(ptr %p) {
465+
; CHECK-LABEL: @memset_optimize_size_hi_33_to_x86_32_generic_28(
466+
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
467+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
468+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P0]], i8 0, i64 28, i1 false)
469+
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
470+
; CHECK-NEXT: ret void
471+
;
472+
%p0 = getelementptr inbounds i8, ptr %p, i64 0
473+
%p1 = getelementptr inbounds i8, ptr %p, i64 27
474+
call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 33, i1 false)
475+
store i64 0, ptr %p1, align 1
476+
ret void
477+
}
478+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
479+
; CHECK-MEM16: {{.*}}
480+
; CHECK-MEM4: {{.*}}

0 commit comments

Comments
 (0)