Skip to content

Commit 8240ccb

Browse files
committed
Fix bug in alignment derive, update test to show improvement
1 parent f02c6f8 commit 8240ccb

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -690,8 +690,8 @@ std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
690690

691691
// Cache the best aligned element in the chain for use when creating extra
692692
// elements.
693-
Align BestAlignedElemAlign;
694-
APInt OffsetOfBestAlignedElemFromLeader;
693+
Align BestAlignedElemAlign = getLoadStoreAlignment(C[0].Inst);
694+
APInt OffsetOfBestAlignedElemFromLeader = C[0].OffsetFromLeader;
695695
for (const auto &E : C) {
696696
Align ElementAlignment = getLoadStoreAlignment(E.Inst);
697697
if (ElementAlignment > BestAlignedElemAlign) {

llvm/test/Transforms/LoadStoreVectorizer/NVPTX/many_loads_stores.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
; This is an end-to-end test that checks that LSV succeeds at vectorizing a
22
; large program with many loads.
33
; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s > %t
4-
; RUN: grep 'load i8' < %t | count 18
5-
; RUN: grep 'load <2 x i8>' < %t | count 9
4+
; RUN: grep 'load i8' < %t | count 12
5+
; RUN: grep 'load <2 x i8>' < %t | count 3
66
; RUN: grep 'load <4 x i8>' < %t | count 27
7+
; RUN: grep 'call <4 x i8> @llvm.masked.load.v4i8.p1.*<4 x i1> <i1 false, i1 true, i1 true, i1 true>' < %t | count 6
78

89
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
910
target triple = "nvptx64-nvidia-cuda"

0 commit comments

Comments
 (0)