Skip to content

Commit 8c8013c

Browse files
committed
apply the revised shuffle cost calculation
1 parent 772eb2f commit 8c8013c

File tree

2 files changed

+45
-66
lines changed

2 files changed

+45
-66
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,25 +334,29 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
334334
// We assume this operation has no cost in codegen if there was no offset.
335335
// Note that we could use freeze to avoid poison problems, but then we might
336336
// still need a shuffle to change the vector size.
337+
auto *Ty = cast<FixedVectorType>(I.getType());
337338
SmallVector<int> Mask;
338339
assert(OffsetEltIndex + VectorRange < MinVecNumElts &&
339340
"Address offset too big");
340341
if (NeedCast) {
341342
Mask.assign(MinVecNumElts, PoisonMaskElem);
342343
std::iota(Mask.begin(), Mask.begin() + VectorRange, OffsetEltIndex);
343344
} else {
344-
auto *Ty = cast<FixedVectorType>(I.getType());
345345
unsigned OutputNumElts = Ty->getNumElements();
346346
Mask.assign(OutputNumElts, PoisonMaskElem);
347347
Mask[0] = OffsetEltIndex;
348348
}
349349

350350
if (OffsetEltIndex)
351-
NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, MinVecTy, Mask,
352-
CostKind);
351+
if (NeedCast)
352+
NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, MinVecTy, Mask,
353+
CostKind);
354+
else
355+
NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, MinVecTy, Mask,
356+
CostKind);
353357

354358
if (NeedCast)
355-
NewCost += TTI.getCastInstrCost(Instruction::BitCast, I.getType(), MinVecTy,
359+
NewCost += TTI.getCastInstrCost(Instruction::BitCast, Ty, MinVecTy,
356360
TargetTransformInfo::CastContextHint::None,
357361
CostKind);
358362

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 37 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
3-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
44

55
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66

@@ -159,8 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
159159
define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
160160
; CHECK-LABEL: @load_f32_insert_v4f32(
161161
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
162-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
163-
; CHECK-NEXT: ret <4 x float> [[R]]
162+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
164163
;
165164
%s = load float, ptr %p, align 4
166165
%r = insertelement <4 x float> poison, float %s, i32 0
@@ -170,8 +169,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
170169
define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
171170
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
172171
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
173-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
174-
; CHECK-NEXT: ret <4 x float> [[R]]
172+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
175173
;
176174
%s = load float, ptr %p, align 4
177175
%r = insertelement <4 x float> poison, float %s, i32 0
@@ -183,8 +181,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
183181
define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
184182
; CHECK-LABEL: @load_i32_insert_v4i32(
185183
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
186-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
187-
; CHECK-NEXT: ret <4 x i32> [[R]]
184+
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
188185
;
189186
%s = load i32, ptr %p, align 4
190187
%r = insertelement <4 x i32> poison, i32 %s, i32 0
@@ -196,8 +193,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
196193
define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
197194
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
198195
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
199-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
200-
; CHECK-NEXT: ret <4 x i32> [[R]]
196+
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
201197
;
202198
%s = load i32, ptr %p, align 4
203199
%r = insertelement <4 x i32> poison, i32 %s, i32 0
@@ -209,8 +205,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
209205
define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
210206
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
211207
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
212-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
213-
; CHECK-NEXT: ret <4 x float> [[R]]
208+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
214209
;
215210
%s = load float, ptr %p, align 16
216211
%r = insertelement <4 x float> poison, float %s, i64 0
@@ -222,8 +217,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
222217
define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
223218
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
224219
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
225-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
226-
; CHECK-NEXT: ret <4 x float> [[R]]
220+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
227221
;
228222
%s = load float, ptr addrspace(44) %p, align 16
229223
%r = insertelement <4 x float> poison, float %s, i64 0
@@ -235,8 +229,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) alig
235229
define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenceable(16) %v3) {
236230
; CHECK-LABEL: @unsafe_load_i32_insert_v4i32_addrspace(
237231
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[V3:%.*]] to ptr addrspace(42)
238-
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(42) [[TMP1]], align 16
239-
; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
232+
; CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr addrspace(42) [[TMP1]], align 16
233+
; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
240234
; CHECK-NEXT: ret <4 x i32> [[INSELT]]
241235
;
242236
%t0 = getelementptr inbounds i32, ptr %v3, i32 1
@@ -253,8 +247,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
253247
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
254248
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
255249
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
256-
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257-
; CHECK-NEXT: ret <8 x i16> [[R1]]
250+
; CHECK-NEXT: ret <8 x i16> [[R]]
258251
;
259252
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
260253
%s = load i16, ptr %gep, align 2
@@ -266,8 +259,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
266259

267260
define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync {
268261
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
269-
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
270-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
262+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 16
263+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
271264
; CHECK-NEXT: ret <8 x i16> [[R]]
272265
;
273266
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -280,8 +273,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
280273

281274
define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync {
282275
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
283-
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
284-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
276+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 2
277+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
285278
; CHECK-NEXT: ret <8 x i16> [[R]]
286279
;
287280
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -348,17 +341,11 @@ define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 der
348341
}
349342

350343
define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
351-
; SSE2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
352-
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
353-
; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
354-
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
355-
; SSE2-NEXT: ret <4 x i32> [[R]]
356-
;
357-
; AVX2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
358-
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
359-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
360-
; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
361-
; AVX2-NEXT: ret <4 x i32> [[R]]
344+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
345+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
346+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
347+
; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
348+
; CHECK-NEXT: ret <4 x i32> [[R]]
362349
;
363350
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
364351
%s = load i32, ptr %gep, align 1
@@ -386,17 +373,11 @@ define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 der
386373
}
387374

388375
define <4 x i32> @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
389-
; SSE2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
390-
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
391-
; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
392-
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
393-
; SSE2-NEXT: ret <4 x i32> [[R]]
394-
;
395-
; AVX2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
396-
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
397-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
398-
; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
399-
; AVX2-NEXT: ret <4 x i32> [[R]]
376+
; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
377+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
378+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
379+
; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
380+
; CHECK-NEXT: ret <4 x i32> [[R]]
400381
;
401382
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
402383
%s = load i32, ptr %gep, align 1
@@ -452,8 +433,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
452433
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
453434
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
454435
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
455-
; CHECK-NEXT: [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
456-
; CHECK-NEXT: ret <8 x i16> [[R1]]
436+
; CHECK-NEXT: ret <8 x i16> [[R]]
457437
;
458438
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
459439
%s = load i16, ptr %gep, align 16
@@ -555,8 +535,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
555535
define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
556536
; CHECK-LABEL: @load_f32_insert_v4f32_align(
557537
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
558-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
559-
; CHECK-NEXT: ret <4 x float> [[R]]
538+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
560539
;
561540
%s = load float, ptr %p, align 4
562541
%r = insertelement <4 x float> poison, float %s, i32 0
@@ -578,8 +557,8 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
578557

579558
define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
580559
; CHECK-LABEL: @load_i32_insert_v8i32(
581-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
582-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
560+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 16
561+
; CHECK-NEXT: [[R:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
583562
; CHECK-NEXT: ret <8 x i32> [[R]]
584563
;
585564
%s = load i32, ptr %p, align 4
@@ -589,8 +568,8 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
589568

590569
define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
591570
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
592-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
593-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
571+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
572+
; CHECK-NEXT: [[R:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
594573
; CHECK-NEXT: ret <8 x i32> [[R]]
595574
;
596575
%s = load i32, ptr %p, align 4
@@ -600,8 +579,8 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
600579

601580
define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
602581
; CHECK-LABEL: @load_f32_insert_v16f32(
603-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
604-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
582+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16
583+
; CHECK-NEXT: [[R:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
605584
; CHECK-NEXT: ret <16 x float> [[R]]
606585
;
607586
%s = load float, ptr %p, align 4
@@ -611,8 +590,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
611590

612591
define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
613592
; CHECK-LABEL: @load_f32_insert_v2f32(
614-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
615-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
593+
; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 16
616594
; CHECK-NEXT: ret <2 x float> [[R]]
617595
;
618596
%s = load float, ptr %p, align 4
@@ -662,8 +640,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
662640
define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
663641
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
664642
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
665-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
666-
; CHECK-NEXT: ret <4 x float> [[R]]
643+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
667644
;
668645
%l = load <2 x float>, ptr %p, align 4
669646
%s = extractelement <2 x float> %l, i32 0
@@ -674,8 +651,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
674651
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
675652
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
676653
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
677-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
678-
; CHECK-NEXT: ret <4 x float> [[R]]
654+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
679655
;
680656
%l = load <8 x float>, ptr %p, align 4
681657
%s = extractelement <8 x float> %l, i32 0
@@ -755,8 +731,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32_tsan(ptr align 16 dereferenc
755731

756732
define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16) %p) nofree nosync sanitize_memory {
757733
; CHECK-LABEL: @load_f32_insert_v2f32_msan(
758-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
759-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
734+
; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 16
760735
; CHECK-NEXT: ret <2 x float> [[R]]
761736
;
762737
%s = load float, ptr %p, align 4

0 commit comments

Comments
 (0)