Skip to content

Commit 365522e

Browse files
author
Leon Clark
committed
[VectorCombine] Trim low end of loads used in shufflevector rebroadcasts.
1 parent bceb357 commit 365522e

File tree

2 files changed

+48
-27
lines changed

2 files changed

+48
-27
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3745,7 +3745,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
37453745

37463746
// Get the range of vector elements used by shufflevector instructions.
37473747
if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
3748-
unsigned const NewNumElements = Indices->second + 1u;
3748+
unsigned const NewNumElements = (Indices->second + 1u) - Indices->first;
37493749

37503750
// If the range of vector elements is smaller than the full load, attempt
37513751
// to create a smaller load.
@@ -3767,21 +3767,28 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
37673767

37683768
using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
37693769
SmallVector<UseEntry, 4u> NewUses;
3770+
unsigned const LowOffset = Indices->first;
3771+
unsigned const HighOffset = OldNumElements - NewNumElements;
37703772

37713773
for (llvm::Use &Use : I.uses()) {
37723774
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
37733775
ArrayRef<int> OldMask = Shuffle->getShuffleMask();
37743776

37753777
// Create entry for new use.
3776-
NewUses.push_back({Shuffle, OldMask});
3778+
NewUses.push_back({Shuffle, {}});
3779+
std::vector<int> &NewMask = NewUses.back().second;
3780+
for (int Index : OldMask)
3781+
NewMask.push_back(Index >= static_cast<int>(OldNumElements)
3782+
? Index - HighOffset
3783+
: Index - LowOffset);
37773784

37783785
// Update costs.
37793786
OldCost +=
37803787
TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(),
37813788
OldLoadTy, OldMask, CostKind);
37823789
NewCost +=
37833790
TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(),
3784-
NewLoadTy, OldMask, CostKind);
3791+
NewLoadTy, NewMask, CostKind);
37853792
}
37863793

37873794
LLVM_DEBUG(
@@ -3793,8 +3800,13 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
37933800
return false;
37943801

37953802
// Create new load of smaller vector.
3803+
Value *NewPtr =
3804+
LowOffset > 0u
3805+
? Builder.CreateInBoundsPtrAdd(PtrOp, Builder.getInt64(LowOffset))
3806+
: PtrOp;
3807+
37963808
auto *NewLoad = cast<LoadInst>(
3797-
Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign()));
3809+
Builder.CreateAlignedLoad(NewLoadTy, NewPtr, OldLoad->getAlign()));
37983810
NewLoad->copyMetadata(I);
37993811

38003812
// Replace all uses.

llvm/test/Transforms/VectorCombine/load-shufflevector.ll

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %ar
4747
; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
4848
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
4949
; CHECK-NEXT: [[ENTRY:.*:]]
50-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
51-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
50+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
51+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 32
52+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
5253
; CHECK-NEXT: ret <4 x half> [[TMP1]]
5354
;
5455
entry:
@@ -61,8 +62,9 @@ define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %ar
6162
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
6263
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
6364
; CHECK-NEXT: [[ENTRY:.*:]]
64-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
65-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
65+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
66+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 32
67+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
6668
; CHECK-NEXT: ret <8 x half> [[TMP1]]
6769
;
6870
entry:
@@ -108,13 +110,14 @@ define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonl
108110
; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
109111
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
110112
; CHECK-NEXT: [[ENTRY:.*:]]
111-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
113+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
114+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 32
112115
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
113116
; CHECK: [[THEN]]:
114-
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
117+
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <4 x i32> zeroinitializer
115118
; CHECK-NEXT: br label %[[FINALLY:.*]]
116119
; CHECK: [[ELSE]]:
117-
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
120+
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
118121
; CHECK-NEXT: br label %[[FINALLY]]
119122
; CHECK: [[FINALLY]]:
120123
; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
@@ -141,13 +144,14 @@ define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonl
141144
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
142145
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
143146
; CHECK-NEXT: [[ENTRY:.*:]]
144-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 32
147+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
148+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 32
145149
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
146150
; CHECK: [[THEN]]:
147-
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
151+
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <8 x i32> zeroinitializer
148152
; CHECK-NEXT: br label %[[FINALLY:.*]]
149153
; CHECK: [[ELSE]]:
150-
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
154+
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
151155
; CHECK-NEXT: br label %[[FINALLY]]
152156
; CHECK: [[FINALLY]]:
153157
; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
@@ -202,8 +206,9 @@ define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg
202206
; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
203207
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
204208
; CHECK-NEXT: [[ENTRY:.*:]]
205-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
206-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
209+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
210+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 32
211+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
207212
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
208213
;
209214
entry:
@@ -216,8 +221,9 @@ define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg
216221
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
217222
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
218223
; CHECK-NEXT: [[ENTRY:.*:]]
219-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
220-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
224+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
225+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 32
226+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
221227
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
222228
;
223229
entry:
@@ -296,13 +302,14 @@ define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly
296302
; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
297303
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
298304
; CHECK-NEXT: [[ENTRY:.*:]]
299-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
305+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
306+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 32
300307
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
301308
; CHECK: [[THEN]]:
302-
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
309+
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer
303310
; CHECK-NEXT: br label %[[FINALLY:.*]]
304311
; CHECK: [[ELSE]]:
305-
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
312+
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
306313
; CHECK-NEXT: br label %[[FINALLY]]
307314
; CHECK: [[FINALLY]]:
308315
; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
@@ -329,13 +336,14 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly
329336
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
330337
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
331338
; CHECK-NEXT: [[ENTRY:.*:]]
332-
; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 32
339+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
340+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 32
333341
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
334342
; CHECK: [[THEN]]:
335-
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
343+
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> zeroinitializer
336344
; CHECK-NEXT: br label %[[FINALLY:.*]]
337345
; CHECK: [[ELSE]]:
338-
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
346+
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
339347
; CHECK-NEXT: br label %[[FINALLY]]
340348
; CHECK: [[FINALLY]]:
341349
; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
@@ -362,13 +370,14 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_4(ptr addrspace(1) nocapture readonly
362370
; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_4(
363371
; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
364372
; CHECK-NEXT: [[ENTRY:.*:]]
365-
; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
373+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG0]], i64 1
374+
; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr addrspace(1) [[TMP0]], align 32
366375
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
367376
; CHECK: [[THEN]]:
368-
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
377+
; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
369378
; CHECK-NEXT: br label %[[FINALLY:.*]]
370379
; CHECK: [[ELSE]]:
371-
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4>
380+
; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
372381
; CHECK-NEXT: br label %[[FINALLY]]
373382
; CHECK: [[FINALLY]]:
374383
; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]

0 commit comments

Comments
 (0)