Skip to content

Commit df6ad3d

Browse files
cmc-repgithub-actions[bot]
authored andcommitted
Automerge: [LoadStoreVectorizer] Fix one-element vector handling (#169671)
This is the followup of llvm/llvm-project#168135
2 parents 81ea94f + ceba82f commit df6ad3d

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -953,15 +953,15 @@ bool Vectorizer::vectorizeChain(Chain &C) {
953953
unsigned EOffset =
954954
(E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
955955
unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
956-
if (auto *VT = dyn_cast<FixedVectorType>(T)) {
956+
if (!VecTy->isVectorTy()) {
957+
V = VecInst;
958+
} else if (auto *VT = dyn_cast<FixedVectorType>(T)) {
957959
auto Mask = llvm::to_vector<8>(
958960
llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
959961
V = Builder.CreateShuffleVector(VecInst, Mask, I->getName());
960-
} else if (VecTy != VecElemTy) {
962+
} else {
961963
V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
962964
I->getName());
963-
} else {
964-
V = VecInst;
965965
}
966966
if (V->getType() != I->getType())
967967
V = Builder.CreateBitOrPointerCast(V, I->getType());

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-loads.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,33 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
22
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
33

4+
define void @onevec(ptr %ptr) {
5+
; CHECK-LABEL: define void @onevec(
6+
; CHECK-SAME: ptr [[PTR:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR]], align 4
8+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x i32>
9+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16
10+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4
11+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
12+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32
13+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP2]], align 4
14+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to <1 x i32>
15+
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP5]] to <1 x i32>
16+
; CHECK-NEXT: ret void
17+
;
18+
%ld0 = load <1 x i32>, ptr %ptr, align 4
19+
%ld1 = load i32, ptr %ptr, align 4
20+
21+
%gep1 = getelementptr inbounds i8, ptr %ptr, i32 16
22+
%ld2 = load i32, ptr %gep1, align 4
23+
%ld3 = load <1 x i32>, ptr %gep1, align 4
24+
25+
%gep2 = getelementptr inbounds i8, ptr %ptr, i32 32
26+
%ld4 = load <1 x i32>, ptr %gep2, align 4
27+
%ld5 = load <1 x i32>, ptr %gep2, align 4
28+
ret void
29+
}
30+
431
define void @test(ptr %ptr) {
532
; CHECK-LABEL: define void @test(
633
; CHECK-SAME: ptr [[PTR:%.*]]) {

0 commit comments

Comments
 (0)