-
Couldn't load subscription status.
- Fork 15k
release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@RKSimon What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-powerpc Author: None (llvmbot) ChangesBackport 994a6a3 Requested by: @RKSimon Full diff: https://github.com/llvm/llvm-project/pull/159286.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 639f8686a271e..ea9cbed0117b9 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1829,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
+ uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);
+ Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits);
+ Value *Mask = ConstantInt::get(PackedTy, EltBitMask);
for (User *U : Ext->users()) {
auto *Extract = cast<ExtractElementInst>(U);
uint64_t Idx =
cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
- Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits);
- Value *And = Builder.CreateAnd(LShr, EltBitMask);
+ uint64_t ShiftAmt =
+ DL->isBigEndian()
+ ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
+ : (Idx * SrcEltSizeInBits);
+ Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt);
+ Value *And = Builder.CreateAnd(LShr, Mask);
U->replaceAllUsesWith(And);
}
return true;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
new file mode 100644
index 0000000000000..9796faf2e6feb
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; LE-LABEL: @g(
+; LE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; LE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; LE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 56
+; LE-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], 255
+; LE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; LE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; LE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; LE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; LE-NEXT: ret i64 [[SUM]]
+;
+; BE-LABEL: @g(
+; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; BE-NEXT: ret i64 [[SUM]]
+;
+ %z = zext <8 x i8> %v to <8 x i64>
+ %e0 = extractelement <8 x i64> %z, i32 0
+ %e7 = extractelement <8 x i64> %z, i32 7
+ %sum = add i64 %e0, %e7
+ ret i64 %sum
+}
+
+
+
diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
new file mode 100644
index 0000000000000..15af315f104fc
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'PowerPC' not in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
new file mode 100644
index 0000000000000..a9b719920c341
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; BE-LABEL: @g(
+; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; BE-NEXT: ret i64 [[SUM]]
+;
+ %z = zext <8 x i8> %v to <8 x i64>
+ %e0 = extractelement <8 x i64> %z, i32 0
+ %e7 = extractelement <8 x i64> %z, i32 7
+ %sum = add i64 %e0, %e7
+ ret i64 %sum
+}
+
|
|
|
|
CC @fhahn who wrote the original fold |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks for the fix + backport!
The scalarizeExtExtract transform assumed little-endian lane ordering, causing miscompiles on big-endian targets such as AIX/PowerPC under -O3 -flto. This patch updates the shift calculation to handle endianness correctly for big-endian targets. No functional change for little-endian targets. Fixes llvm#158197. --------- Co-authored-by: Simon Pilgrim <[email protected]> (cherry picked from commit 994a6a3)
1cb4887 to
9eedaf5
Compare
|
@RKSimon (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 994a6a3
Requested by: @RKSimon