Skip to content

Commit 4d6873f

Browse files
author
Leon Clark
committed
Address review comments and update tests.
1 parent d6c00c0 commit 4d6873f

File tree

4 files changed

+39
-25
lines changed

4 files changed

+39
-25
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3528,10 +3528,11 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
35283528
OutputRange.second = std::max(Index, OutputRange.second);
35293529
}
35303530
}
3531-
3532-
if (OutputRange.second < OutputRange.first)
3533-
return {};
35343531
}
3532+
3533+
if (OutputRange.second < OutputRange.first)
3534+
return {};
3535+
35353536
return OutputRange;
35363537
};
35373538

@@ -3546,20 +3547,33 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
35463547
// Create new load of smaller vector.
35473548
auto *ElemTy = VecTy->getElementType();
35483549
auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
3550+
auto *PtrOp = OldLoad->getPointerOperand();
35493551
auto *NewLoad = cast<LoadInst>(
3550-
Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
3552+
Builder.CreateAlignedLoad(NewVecTy, PtrOp, OldLoad->getAlign()));
35513553
NewLoad->copyMetadata(I);
35523554

3553-
// Compare cost of old and new loads.
3555+
// Compare cost of old and new ops.
35543556
auto OldCost = TTI.getMemoryOpCost(
35553557
Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
35563558
OldLoad->getPointerAddressSpace(), CostKind);
35573559
auto NewCost = TTI.getMemoryOpCost(
35583560
Instruction::Load, NewLoad->getType(), NewLoad->getAlign(),
35593561
NewLoad->getPointerAddressSpace(), CostKind);
35603562

3561-
if (OldCost < NewCost || !NewCost.isValid())
3563+
for (auto &Use : I.uses()) {
3564+
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
3565+
auto Mask = Shuffle->getShuffleMask();
3566+
3567+
OldCost += TTI.getShuffleCost(
3568+
TTI::SK_PermuteSingleSrc, VecTy, Mask, CostKind);
3569+
NewCost += TTI.getShuffleCost(
3570+
TTI::SK_PermuteSingleSrc, NewVecTy, Mask, CostKind);
3571+
}
3572+
3573+
if (OldCost < NewCost || !NewCost.isValid()) {
3574+
NewLoad->eraseFromParent();
35623575
return false;
3576+
}
35633577

35643578
// Replace all users.
35653579
for (auto &Use : I.uses()) {

llvm/test/Transforms/VectorCombine/X86/load-widening.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) {
336336

337337
define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) {
338338
; CHECK-LABEL: @load_v2i32_v4i32(
339-
; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
339+
; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
340340
; CHECK-NEXT: ret <4 x i32> [[S]]
341341
;
342342
%l = load <2 x i32>, ptr %p, align 1
@@ -443,7 +443,7 @@ define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize
443443

444444
define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address {
445445
; CHECK-LABEL: @load_v2i32_v4i32_asan(
446-
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
446+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 1
447447
; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
448448
; CHECK-NEXT: ret <4 x i32> [[S]]
449449
;

llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
4949
define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
5050
; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64(
5151
; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
52-
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 8
53-
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 8
52+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 32
53+
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 32
5454
; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 1, i32 0>
5555
; CHECK-NEXT: ret <4 x double> [[BLEND]]
5656
;

llvm/test/Transforms/VectorCombine/load-shufflevector.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %ar
1919
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
2020
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
2121
; CHECK-NEXT: [[ENTRY:.*:]]
22-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4
22+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 32
2323
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
2424
; CHECK-NEXT: ret <8 x half> [[TMP1]]
2525
;
@@ -33,7 +33,7 @@ define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %ar
3333
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
3434
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
3535
; CHECK-NEXT: [[ENTRY:.*:]]
36-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
36+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
3737
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
3838
; CHECK-NEXT: ret <8 x half> [[TMP1]]
3939
;
@@ -47,7 +47,7 @@ define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %ar
4747
; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
4848
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
4949
; CHECK-NEXT: [[ENTRY:.*:]]
50-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
50+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
5151
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
5252
; CHECK-NEXT: ret <4 x half> [[TMP1]]
5353
;
@@ -61,7 +61,7 @@ define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %ar
6161
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
6262
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
6363
; CHECK-NEXT: [[ENTRY:.*:]]
64-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
64+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
6565
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
6666
; CHECK-NEXT: ret <8 x half> [[TMP1]]
6767
;
@@ -75,7 +75,7 @@ define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonl
7575
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
7676
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
7777
; CHECK-NEXT: [[ENTRY:.*:]]
78-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4
78+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 32
7979
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
8080
; CHECK: [[THEN]]:
8181
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> zeroinitializer
@@ -108,7 +108,7 @@ define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonl
108108
; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
109109
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
110110
; CHECK-NEXT: [[ENTRY:.*:]]
111-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
111+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
112112
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
113113
; CHECK: [[THEN]]:
114114
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -141,7 +141,7 @@ define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonl
141141
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
142142
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
143143
; CHECK-NEXT: [[ENTRY:.*:]]
144-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
144+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
145145
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
146146
; CHECK: [[THEN]]:
147147
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -174,7 +174,7 @@ define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg
174174
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
175175
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
176176
; CHECK-NEXT: [[ENTRY:.*:]]
177-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8
177+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 32
178178
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
179179
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
180180
;
@@ -188,7 +188,7 @@ define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg
188188
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
189189
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
190190
; CHECK-NEXT: [[ENTRY:.*:]]
191-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
191+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
192192
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
193193
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
194194
;
@@ -202,7 +202,7 @@ define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg
202202
; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
203203
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
204204
; CHECK-NEXT: [[ENTRY:.*:]]
205-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
205+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
206206
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
207207
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
208208
;
@@ -216,7 +216,7 @@ define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg
216216
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
217217
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
218218
; CHECK-NEXT: [[ENTRY:.*:]]
219-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
219+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
220220
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
221221
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
222222
;
@@ -230,7 +230,7 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly
230230
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
231231
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
232232
; CHECK-NEXT: [[ENTRY:.*:]]
233-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8
233+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 32
234234
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
235235
; CHECK: [[THEN]]:
236236
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer
@@ -263,7 +263,7 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly
263263
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
264264
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
265265
; CHECK-NEXT: [[ENTRY:.*:]]
266-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
266+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
267267
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
268268
; CHECK: [[THEN]]:
269269
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> zeroinitializer
@@ -296,7 +296,7 @@ define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly
296296
; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
297297
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
298298
; CHECK-NEXT: [[ENTRY:.*:]]
299-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
299+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
300300
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
301301
; CHECK: [[THEN]]:
302302
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -329,7 +329,7 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly
329329
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
330330
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
331331
; CHECK-NEXT: [[ENTRY:.*:]]
332-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
332+
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
333333
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
334334
; CHECK: [[THEN]]:
335335
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>

0 commit comments

Comments
 (0)