Skip to content

Commit 3d61370

Browse files
committed
[InstCombine] Optimisitically infer load/store type from memcpy
1 parent 4637bf0 commit 3d61370

File tree

5 files changed

+82
-31
lines changed

5 files changed

+82
-31
lines changed

clang/test/CodeGenCXX/auto-var-init.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct nullinit { char* null = nullptr; };
7979
// ZERO-O0: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 4
8080
// PATTERN-O1-NOT: @__const.test_padded_uninit.uninit
8181
// PATTERN-O1-NOT: @__const.test_padded_custom.custom
82-
// ZERO-O1-NOT: @__const.test_padded_custom.custom
82+
// ZERO-O1: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 8
8383
struct padded { char c; int i; };
8484
// PATTERN-O0: @__const.test_paddednullinit_uninit.uninit = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
8585
// PATTERN-O0: @__const.test_paddednullinit_braces.braces = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
@@ -713,6 +713,13 @@ TEST_CUSTOM(padded, padded, { 42, 13371337 });
713713
// CHECK-NEXT: call void @llvm.memcpy
714714
// CHECK-NOT: !annotation
715715
// CHECK-NEXT: call void @{{.*}}used{{.*}}%custom)
716+
// ZERO-O1: %custom = alloca %struct.padded, align 4
717+
// ZERO-O1: %0 = load %struct.padded, ptr @__const.test_padded_custom.custom, align 8
718+
// ZERO-O1: %[[I8:.*]] = extractvalue %struct.padded %0, 0
719+
// ZERO-O1: store i8 %[[I8]], ptr %custom, align 4
720+
// ZERO-O1: %[[I32:.*]] = extractvalue %struct.padded %0, 1
721+
// ZERO-O1: %[[GEP:.*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
722+
// ZERO-O1: store i32 %[[I32]], ptr %[[GEP]], align 4
716723

717724
TEST_UNINIT(paddednullinit, paddednullinit);
718725
// CHECK-LABEL: @test_paddednullinit_uninit()
@@ -1298,7 +1305,9 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 });
12981305
// PATTERN-O1: store i32 1145324612, ptr %custom, align 4
12991306
// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
13001307
// PATTERN-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4
1301-
// ZERO-O1: store i64 4919131752989213764, ptr %custom, align 8
1308+
// ZERO-O1: store i32 1145324612, ptr %custom, align 4
1309+
// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
1310+
// ZERO-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4
13021311
// CHECK-NOT: !annotation
13031312

13041313
TEST_UNINIT(semivolatileinit, semivolatileinit);
@@ -1441,7 +1450,7 @@ TEST_CUSTOM(matchingreverse, matchingreverse, { .i = 0xf00f });
14411450
// CHECK-NOT: !annotation
14421451
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
14431452
// PATTERN-O1: store i32 61455, ptr %custom, align 4
1444-
// ZERO-O1: store i32 61455, ptr %custom, align 4
1453+
// ZERO-O1: store float 0x379E01E000000000, ptr %custom, align 4
14451454
// CHECK-NOT: !annotation
14461455

14471456
TEST_UNINIT(unmatched, unmatched);
@@ -1527,7 +1536,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 });
15271536
// CHECK-NOT: !annotation
15281537
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
15291538
// PATTERN-O1: store double 0x400921FB54442D18, ptr %custom, align 8
1530-
// ZERO-O1: store i64 4614256656552045848, ptr %custom, align 8
1539+
// ZERO-O1: store double 0x400921FB54442D18, ptr %custom, align 8
15311540
// CHECK-NOT: !annotation
15321541

15331542
TEST_UNINIT(emptyenum, emptyenum);

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,33 @@ static bool hasUndefSource(AnyMemTransferInst *MI) {
114114
return isa<AllocaInst>(Src) && Src->hasOneUse();
115115
}
116116

117+
// Optimistically infer a type from either the Src or Dest.
118+
//
119+
// Returns the DefaultTy if unable to infer a type, if inferred types
120+
// disagree, or, if inferred type does not match the size of load/store.
121+
static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value *Src,
122+
Value *Dest) {
123+
Type *SrcTy = nullptr;
124+
Type *DestTy = nullptr;
125+
126+
if (auto *SrcAI = dyn_cast<AllocaInst>(Src))
127+
SrcTy = SrcAI->getAllocatedType();
128+
129+
if (auto *DestAI = dyn_cast<AllocaInst>(Dest))
130+
DestTy = DestAI->getAllocatedType();
131+
132+
if (SrcTy && DestTy && SrcTy != DestTy)
133+
return DefaultTy; // Unable to infer common type
134+
135+
Type *InferredTy = SrcTy ? SrcTy : DestTy;
136+
137+
if (InferredTy &&
138+
DefaultTy->getPrimitiveSizeInBits() == DL.getTypeSizeInBits(InferredTy))
139+
return InferredTy;
140+
141+
return DefaultTy;
142+
}
143+
117144
Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
118145
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
119146
MaybeAlign CopyDstAlign = MI->getDestAlign();
@@ -169,16 +196,18 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
169196
if (*CopyDstAlign < Size || *CopySrcAlign < Size)
170197
return nullptr;
171198

172-
// Use an integer load+store unless we can find something better.
173-
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
174-
175199
// If the memcpy has metadata describing the members, see if we can get the
176200
// TBAA, scope and noalias tags describing our copy.
177201
AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
178202

179203
Value *Src = MI->getArgOperand(1);
180204
Value *Dest = MI->getArgOperand(0);
181-
LoadInst *L = Builder.CreateLoad(IntType, Src);
205+
206+
// Use an integer load+store unless we can find something better.
207+
IntegerType *IntType = IntegerType::get(MI->getContext(), Size << 3);
208+
Type *InferredType = inferType(DL, IntType, Src, Dest);
209+
210+
LoadInst *L = Builder.CreateLoad(InferredType, Src);
182211
// Alignment from the mem intrinsic will be better, so use it.
183212
L->setAlignment(*CopySrcAlign);
184213
L->setAAMetadata(AACopyMD);

llvm/test/Transforms/InstCombine/alloca.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,24 +189,36 @@ define void @test9(ptr %a) {
189189
; CHECK-LABEL: @test9(
190190
; CHECK-NEXT: entry:
191191
; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1
192-
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
193-
; CHECK-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4
192+
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
193+
; CHECK-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
194+
; CHECK-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
195+
; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
196+
; CHECK-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4
197+
; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
194198
; CHECK-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
195199
; CHECK-NEXT: ret void
196200
;
197201
; P32-LABEL: @test9(
198202
; P32-NEXT: entry:
199203
; P32-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1
200-
; P32-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
201-
; P32-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4
204+
; P32-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
205+
; P32-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 4
206+
; P32-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
207+
; P32-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
208+
; P32-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i32 4
209+
; P32-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
202210
; P32-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
203211
; P32-NEXT: ret void
204212
;
205213
; NODL-LABEL: @test9(
206214
; NODL-NEXT: entry:
207215
; NODL-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 8
208-
; NODL-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
209-
; NODL-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 8
216+
; NODL-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
217+
; NODL-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
218+
; NODL-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
219+
; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 8
220+
; NODL-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4
221+
; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
210222
; NODL-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
211223
; NODL-NEXT: ret void
212224
;
@@ -251,8 +263,8 @@ entry:
251263

252264
define void @test_inalloca_with_element_count(ptr %a) {
253265
; ALL-LABEL: @test_inalloca_with_element_count(
254-
; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x %struct_type], align 4
255-
; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE:%.*]]) [[ALLOCA1]])
266+
; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x [[STRUCT_TYPE:%.*]]], align 4
267+
; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE]]) [[ALLOCA1]])
256268
; ALL-NEXT: ret void
257269
;
258270
%alloca = alloca inalloca %struct_type, i32 10, align 4

llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,27 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer
2828
; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
2929
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
3030
; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]]
31-
; CHECK: [[BB12_LOOPEXIT:.*]]:
32-
; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
33-
; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32
34-
; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
35-
; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
36-
; CHECK-NEXT: br label %[[BB12]]
3731
; CHECK: [[BB12]]:
38-
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
39-
; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
32+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ <i32 24, i32 42>, %[[BB]] ], [ [[I3_SROA_0_4_VEC_INSERT33:%.*]], %[[BB13]] ]
33+
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
4034
; CHECK-NEXT: ret void
4135
; CHECK: [[BB13]]:
42-
; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ]
43-
; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ]
36+
; CHECK-NEXT: [[I3_SROA_0_1:%.*]] = phi <2 x i32> [ [[I3_SROA_0_4_VEC_INSERT33]], %[[BB13]] ], [ <i32 24, i32 42>, %[[BB]] ]
4437
; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ]
38+
; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 0
4539
; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
4640
; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1
41+
; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 1
4742
; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
4843
; CHECK-NEXT: [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2
49-
; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]]
44+
; CHECK-NEXT: [[I21_2:%.*]] = mul nsw i32 [[I21]], [[I24_1]]
45+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[I21_2]], i64 0
5046
; CHECK-NEXT: [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3
51-
; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
47+
; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]]
48+
; CHECK-NEXT: [[I3_SROA_0_4_VEC_INSERT33]] = insertelement <2 x i32> [[TMP1]], i32 [[I21_3]], i64 1
5249
; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
5350
; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
54-
; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
51+
; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
5552
;
5653
bb:
5754
%i = alloca i32, align 4

llvm/test/Transforms/PhaseOrdering/swap-promotion.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55

66
define void @swap(ptr %p1, ptr %p2) {
77
; CHECK-LABEL: @swap(
8-
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 1
8+
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load i32, ptr [[P1:%.*]], align 1
9+
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4
10+
; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i32, ptr [[DOTELT1]], align 1
911
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[P2:%.*]], align 1
1012
; CHECK-NEXT: store i64 [[TMP2]], ptr [[P1]], align 1
11-
; CHECK-NEXT: store i64 [[TMP1]], ptr [[P2]], align 1
13+
; CHECK-NEXT: store i32 [[DOTUNPACK]], ptr [[P2]], align 1
14+
; CHECK-NEXT: [[P2_REPACK8:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4
15+
; CHECK-NEXT: store i32 [[DOTUNPACK2]], ptr [[P2_REPACK8]], align 1
1216
; CHECK-NEXT: ret void
1317
;
1418
%tmp = alloca [2 x i32]

0 commit comments

Comments
 (0)