Skip to content

Commit a9b215b

Browse files
committed
fixup! [SROA] Vector promote some memsets
1 parent c0525fd commit a9b215b

File tree

5 files changed

+202
-80
lines changed

5 files changed

+202
-80
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,9 +1023,14 @@ static FixedVectorType *getVectorTypeFor(const MemSetInst &II,
10231023
if (Val.ugt(std::numeric_limits<unsigned>::max()))
10241024
return nullptr;
10251025

1026-
auto *VTy =
1027-
FixedVectorType::get(II.getValue()->getType(), Val.getZExtValue());
1028-
if (DL.getTypeStoreSizeInBits(VTy) != DL.getTypeAllocSizeInBits(VTy))
1026+
uint64_t MemSetLen = Val.getZExtValue();
1027+
auto *VTy = FixedVectorType::get(II.getValue()->getType(), MemSetLen);
1028+
1029+
// FIXME: This is a workaround. Vector promotion sometimes inhibits our
1030+
// ability to merge constant stores. It seems to be related to the presence of
1031+
// alignment bytes. See
1032+
// test/Transforms/PhaseOrdering/X86/store-constant-merge.ll
1033+
if (MemSetLen != DL.getTypeAllocSize(VTy).getFixedValue())
10291034
return nullptr;
10301035

10311036
return VTy;
@@ -1191,23 +1196,17 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
11911196
if (!IsOffsetKnown)
11921197
return PI.setAborted(&II);
11931198

1194-
auto IsSplittable = [&]() {
1195-
FixedVectorType *VTy = getVectorTypeFor(II, DL);
1196-
Type *ATy = AS.AI.getAllocatedType();
1199+
bool Splittable;
11971200

1198-
if (!Length)
1199-
return false;
1200-
if (!VTy)
1201-
return true;
1202-
if (DL.getTypeAllocSize(VTy) != DL.getTypeAllocSize(ATy))
1203-
return true;
1204-
return isSplittableMemOp(ATy, II.isVolatile());
1205-
};
1201+
if (getVectorTypeFor(II, DL))
1202+
Splittable = isSplittableMemOp(AS.AI.getAllocatedType(), II.isVolatile());
1203+
else
1204+
Splittable = (bool)Length;
12061205

12071206
insertUse(II, Offset,
12081207
Length ? Length->getLimitedValue()
12091208
: AllocSize - Offset.getLimitedValue(),
1210-
IsSplittable());
1209+
Splittable);
12111210
}
12121211

12131212
void visitMemTransferInst(MemTransferInst &II) {

llvm/test/Transforms/SROA/basictest.ll

Lines changed: 27 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ define void @test3(ptr %dst, ptr align 8 %src) {
138138
; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1
139139
; CHECK-NEXT: [[A_SROA_32:%.*]] = alloca [16 x i8], align 1
140140
; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1
141-
; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1
142-
; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [7 x i8], align 1
141+
; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [15 x i8], align 1
143142
; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1
144143
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]]
145144
; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 42
@@ -151,11 +150,7 @@ define void @test3(ptr %dst, ptr align 8 %src) {
151150
; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 158
152151
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15]], ptr align 2 [[A_SROA_15_0_SRC_SROA_IDX]], i32 42, i1 false), !tbaa [[TBAA0]]
153152
; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 200
154-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]]
155-
; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 207
156-
; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_23_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]]
157-
; CHECK-NEXT: [[A_SROA_235_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 208
158-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 8 [[A_SROA_235_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]]
153+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 15, i1 false), !tbaa [[TBAA0]]
159154
; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 215
160155
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_0_SRC_SROA_IDX]], i32 85, i1 false), !tbaa [[TBAA0]]
161156
; CHECK-NEXT: store i8 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA3:![0-9]+]]
@@ -178,37 +173,37 @@ define void @test3(ptr %dst, ptr align 8 %src) {
178173
; CHECK-NEXT: store i64 8, ptr [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX]], align 1, !tbaa [[TBAA23:![0-9]+]]
179174
; CHECK-NEXT: [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 8
180175
; CHECK-NEXT: store i64 9, ptr [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX]], align 1, !tbaa [[TBAA25:![0-9]+]]
181-
; CHECK-NEXT: store i8 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA27:![0-9]+]]
182-
; CHECK-NEXT: store i16 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA29:![0-9]+]]
183-
; CHECK-NEXT: store i32 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA31:![0-9]+]]
184-
; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 1
185-
; CHECK-NEXT: store i32 2, ptr [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA33:![0-9]+]]
186-
; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2
187-
; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA35:![0-9]+]]
188-
; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 3
189-
; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA37:![0-9]+]]
190-
; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA39:![0-9]+]]
191-
; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1
192-
; CHECK-NEXT: store i8 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11]], align 1, !tbaa [[TBAA41:![0-9]+]]
193-
; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1
194-
; CHECK-NEXT: store i16 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA43:![0-9]+]]
176+
; CHECK-NEXT: store i8 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA27:![0-9]+]]
177+
; CHECK-NEXT: store i16 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA29:![0-9]+]]
178+
; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA31:![0-9]+]]
195179
; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1
196-
; CHECK-NEXT: store i32 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]]
180+
; CHECK-NEXT: store i32 2, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA33:![0-9]+]]
197181
; CHECK-NEXT: [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2
198-
; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA47:![0-9]+]]
182+
; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA35:![0-9]+]]
199183
; CHECK-NEXT: [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 3
200-
; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA49:![0-9]+]]
184+
; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA37:![0-9]+]]
185+
; CHECK-NEXT: [[A_SROA_16_8_OVERLAP2_2_0_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 8
186+
; CHECK-NEXT: store i32 1, ptr [[A_SROA_16_8_OVERLAP2_2_0_I8_SROA_IDX]], align 1, !tbaa [[TBAA39:![0-9]+]]
187+
; CHECK-NEXT: [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 9
188+
; CHECK-NEXT: store i8 1, ptr [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA41:![0-9]+]]
189+
; CHECK-NEXT: [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX9:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 9
190+
; CHECK-NEXT: store i16 1, ptr [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX9]], align 1, !tbaa [[TBAA43:![0-9]+]]
191+
; CHECK-NEXT: [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 9
192+
; CHECK-NEXT: store i32 1, ptr [[A_SROA_16_9_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]]
193+
; CHECK-NEXT: [[A_SROA_16_10_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 10
194+
; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_10_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA47:![0-9]+]]
195+
; CHECK-NEXT: [[A_SROA_16_11_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 11
196+
; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_11_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA49:![0-9]+]]
201197
; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_15]], i64 39
202198
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA51:![0-9]+]]
203199
; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
204-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]]
205-
; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2
206-
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12]], i8 42, i32 5, i1 false), !tbaa [[TBAA53:![0-9]+]]
207-
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235]], i8 42, i32 2, i1 false), !tbaa [[TBAA53]]
208-
; CHECK-NEXT: [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1
209-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA55:![0-9]+]]
200+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]]
210201
; CHECK-NEXT: [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2
211-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]]
202+
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], i8 42, i32 8, i1 false), !tbaa [[TBAA53:![0-9]+]]
203+
; CHECK-NEXT: [[A_SROA_16_209_OVERLAP2_2_1_I8_SROA_IDX6:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 9
204+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_209_OVERLAP2_2_1_I8_SROA_IDX6]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA55:![0-9]+]]
205+
; CHECK-NEXT: [[A_SROA_16_210_OVERLAP2_2_2_I8_SROA_IDX7:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 10
206+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_210_OVERLAP2_2_2_I8_SROA_IDX7]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]]
212207
; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 5
213208
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA57]]
214209
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA59:![0-9]+]]
@@ -221,11 +216,7 @@ define void @test3(ptr %dst, ptr align 8 %src) {
221216
; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 158
222217
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA59]]
223218
; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 200
224-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_16]], i32 7, i1 false), !tbaa [[TBAA59]]
225-
; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 207
226-
; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]]
227-
; CHECK-NEXT: [[A_SROA_235_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 208
228-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 7, i1 false), !tbaa [[TBAA59]]
219+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 15, i1 false), !tbaa [[TBAA59]]
229220
; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 215
230221
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA59]]
231222
; CHECK-NEXT: ret void

llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,13 @@ define amdgpu_kernel void @test_struct_contain_multiple_types2() #0 {
140140
; CHECK-NEXT: entry:
141141
; CHECK-NEXT: [[DATA1:%.*]] = load [4 x i32], ptr undef, align 4
142142
; CHECK-NEXT: [[DATA1_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 0
143+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DATA1_FCA_0_EXTRACT]], i32 0
143144
; CHECK-NEXT: [[DATA1_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 1
145+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[B_BLOCKWISE_COPY_SROA_0_0_VEC_INSERT]], i32 [[DATA1_FCA_1_EXTRACT]], i32 1
144146
; CHECK-NEXT: [[DATA1_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 2
147+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[B_BLOCKWISE_COPY_SROA_0_4_VEC_INSERT]], i32 [[DATA1_FCA_2_EXTRACT]], i32 2
145148
; CHECK-NEXT: [[DATA1_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 3
149+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[B_BLOCKWISE_COPY_SROA_0_8_VEC_INSERT]], i32 [[DATA1_FCA_3_EXTRACT]], i32 3
146150
; CHECK-NEXT: [[DATA2:%.*]] = load <4 x float>, ptr undef, align 16
147151
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA2]] to <8 x i16>
148152
; CHECK-NEXT: br label [[BB:%.*]]
@@ -214,13 +218,17 @@ define amdgpu_kernel void @test_struct_array_vector_i16() #0 {
214218
; CHECK-NEXT: entry:
215219
; CHECK-NEXT: [[DATA:%.*]] = load <4 x i32>, ptr undef, align 16
216220
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[DATA]] to <8 x i16>
221+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
222+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VECBLEND:%.*]] = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXPAND]], <16 x i16> zeroinitializer
217223
; CHECK-NEXT: [[DATA2:%.*]] = load <4 x i32>, ptr undef, align 16
218224
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[DATA2]] to <8 x i16>
225+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
226+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_16_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_16_VEC_EXPAND]], <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_0_VECBLEND]]
219227
; CHECK-NEXT: br label [[BB:%.*]]
220228
; CHECK: bb:
221-
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0
222-
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1
223-
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0
229+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_16_VECBLEND]], i32 0
230+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_16_VECBLEND]], i32 1
231+
; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[B_BLOCKWISE_COPY_SROA_0_16_VECBLEND]], i32 8
224232
; CHECK-NEXT: ret void
225233
;
226234
entry:

0 commit comments

Comments
 (0)