release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286

llvmbot · 2025-09-17T08:51:31Z

Backport 994a6a3

Requested by: @RKSimon

llvmbot · 2025-09-17T08:51:40Z

@RKSimon What do you think about merging this PR to the release branch?

llvmbot · 2025-09-17T08:52:03Z

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-powerpc

Author: None (llvmbot)

Changes

Backport 994a6a3

Requested by: @RKSimon

Full diff: https://github.com/llvm/llvm-project/pull/159286.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+9-2)
(added) llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll (+36)
(added) llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg (+2)
(added) llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll (+22)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 639f8686a271e..ea9cbed0117b9 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1829,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
       IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
   uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
   uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
+  uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);
+  Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits);
+  Value *Mask = ConstantInt::get(PackedTy, EltBitMask);
   for (User *U : Ext->users()) {
     auto *Extract = cast<ExtractElementInst>(U);
     uint64_t Idx =
         cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
-    Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits);
-    Value *And = Builder.CreateAnd(LShr, EltBitMask);
+    uint64_t ShiftAmt =
+        DL->isBigEndian()
+            ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
+            : (Idx * SrcEltSizeInBits);
+    Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt);
+    Value *And = Builder.CreateAnd(LShr, Mask);
     U->replaceAllUsesWith(And);
   }
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
new file mode 100644
index 0000000000000..9796faf2e6feb
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE
+; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; LE-LABEL: @g(
+; LE-NEXT:    [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; LE-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; LE-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP2]], 56
+; LE-NEXT:    [[TMP4:%.*]] = and i64 [[TMP2]], 255
+; LE-NEXT:    [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; LE-NEXT:    [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; LE-NEXT:    [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; LE-NEXT:    [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; LE-NEXT:    ret i64 [[SUM]]
+;
+; BE-LABEL: @g(
+; BE-NEXT:    [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT:    [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; BE-NEXT:    [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; BE-NEXT:    [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; BE-NEXT:    [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; BE-NEXT:    ret i64 [[SUM]]
+;
+  %z  = zext <8 x i8> %v to <8 x i64>
+  %e0 = extractelement <8 x i64> %z, i32 0
+  %e7 = extractelement <8 x i64> %z, i32 7
+  %sum = add i64 %e0, %e7
+  ret i64 %sum
+}
+
+
+
diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
new file mode 100644
index 0000000000000..15af315f104fc
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'PowerPC' not in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
new file mode 100644
index 0000000000000..a9b719920c341
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff     %s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+; BE-LABEL: @g(
+; BE-NEXT:    [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]]
+; BE-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+; BE-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 255
+; BE-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP2]], 56
+; BE-NEXT:    [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64>
+; BE-NEXT:    [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0
+; BE-NEXT:    [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7
+; BE-NEXT:    [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]]
+; BE-NEXT:    ret i64 [[SUM]]
+;
+  %z  = zext <8 x i8> %v to <8 x i64>
+  %e0 = extractelement <8 x i64> %z, i32 0
+  %e7 = extractelement <8 x i64> %z, i32 7
+  %sum = add i64 %e0, %e7
+  ret i64 %sum
+}
+

github-actions · 2025-09-17T08:53:09Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Developer Policy and LLVM Discourse for more information.

RKSimon · 2025-09-17T08:53:26Z

CC @fhahn who wrote the original fold

fhahn

LGTM, thanks for the fix + backport!

The scalarizeExtExtract transform assumed little-endian lane ordering, causing miscompiles on big-endian targets such as AIX/PowerPC under -O3 -flto. This patch updates the shift calculation to handle endianness correctly for big-endian targets. No functional change for little-endian targets. Fixes llvm#158197. --------- Co-authored-by: Simon Pilgrim <[email protected]> (cherry picked from commit 994a6a3)

github-actions · 2025-09-23T00:11:51Z

@RKSimon (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

llvmbot added this to the LLVM 21.x Release milestone Sep 17, 2025

github-project-automation bot added this to LLVM Release Status Sep 17, 2025

github-project-automation bot moved this to Needs Triage in LLVM Release Status Sep 17, 2025

llvmbot requested a review from RKSimon September 17, 2025 08:51

llvmbot added backend:PowerPC vectorizers llvm:transforms llvm:vectorcombine labels Sep 17, 2025

llvmbot mentioned this pull request Sep 17, 2025

[VectorCombine] scalarizeExtExtract uses LE lane mapping on BE targets #158197

Closed

RKSimon requested a review from fhahn September 17, 2025 08:53

fhahn approved these changes Sep 17, 2025

View reviewed changes

github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Sep 17, 2025

RKSimon approved these changes Sep 19, 2025

View reviewed changes

llvmbot force-pushed the issue158197 branch from 1cb4887 to 9eedaf5 Compare September 23, 2025 00:11

tstellar merged commit 9eedaf5 into llvm:release/21.x Sep 23, 2025
6 of 9 checks passed

github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Sep 23, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286

release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Sep 17, 2025

Uh oh!

RKSimon commented Sep 17, 2025

Uh oh!

fhahn left a comment

Uh oh!

Uh oh!

github-actions bot commented Sep 23, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Uh oh!

release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286

release/21.x: [VectorCombine] Fix scalarizeExtExtract for big-endian (#157962) #159286

Uh oh!

Conversation

llvmbot commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 17, 2025

Uh oh!

RKSimon commented Sep 17, 2025

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Sep 23, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented Sep 17, 2025 •

edited

Loading