From 1e1ef6c1987d957c65f78e68df41287aa8691830 Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor Date: Wed, 10 Sep 2025 17:56:01 -0400 Subject: [PATCH 1/4] [VectorCombine] Fix scalarizeExtExtract for big-endian --- .../Transforms/Vectorize/VectorCombine.cpp | 11 +++++++-- .../scalarize-ext-extract-endian.ll | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 17cb18a22336a..ce785278b32a5 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2011,12 +2011,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy))); uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType()); uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1; + uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy); + Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits); + Value *Mask = ConstantInt::get(PackedTy, EltBitMask); for (User *U : Ext->users()) { auto *Extract = cast(U); uint64_t Idx = cast(Extract->getIndexOperand())->getZExtValue(); - Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits); - Value *And = Builder.CreateAnd(LShr, EltBitMask); + uint64_t ShiftAmt = DL->isBigEndian() + ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) + : (Idx * SrcEltSizeInBits); + Value *ShAmtVal = ConstantInt::get(PackedTy, ShiftAmt); + Value *LShr = Builder.CreateLShr(ScalarV, ShAmtVal); + Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } return true; diff --git a/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll new file mode 100644 index 0000000000000..3a1fbaef3df59 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll @@ -0,0 +1,23 @@ +; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + +; LE-LABEL: @g( +; LE: bitcast <8 x i8> %{{.*}} to i64 +; LE: lshr i64 %{{.*}}, 56 +; LE: and i64 %{{.*}}, 255 +; LE-NOT: extractelement + +; BE-LABEL: @g( +; BE: bitcast <8 x i8> %{{.*}} to i64 +; BE: and i64 %{{.*}}, 255 +; BE: lshr i64 %{{.*}}, 56 +; BE-NOT: extractelement + From 8f8e4445c701ee567289fe0dea7f1c2a815e991a Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor Date: Fri, 12 Sep 2025 00:59:02 -0400 Subject: [PATCH 2/4] Simplify LShr construction; add AArch64/PowerPC tests --- .../Transforms/Vectorize/VectorCombine.cpp | 3 +-- .../scalarize-ext-extract-endian.ll | 2 +- .../PowerPC/scalarize-ext-extract.ll | 23 +++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) rename llvm/test/Transforms/VectorCombine/{ => AArch64}/scalarize-ext-extract-endian.ll (82%) create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ce785278b32a5..72039c4ef06a7 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2021,8 +2021,7 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { uint64_t ShiftAmt = DL->isBigEndian() ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) : (Idx * SrcEltSizeInBits); - Value *ShAmtVal = ConstantInt::get(PackedTy, ShiftAmt); - Value *LShr = Builder.CreateLShr(ScalarV, ShAmtVal); + Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } diff --git a/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll similarity index 82% rename from llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll rename to llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll index 3a1fbaef3df59..63867f0402b4f 100644 --- a/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -1,5 +1,5 @@ ; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE -; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE +; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE define i64 @g(<8 x i8> %v) { %z = zext <8 x i8> %v to <8 x i64> diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll new file mode 100644 index 0000000000000..312eebac56423 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll @@ -0,0 +1,23 @@ +; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE + +define i64 @g(<8 x i8> %v) { + %z = zext <8 x i8> %v to <8 x i64> + %e0 = extractelement <8 x i64> %z, i32 0 + %e7 = extractelement <8 x i64> %z, i32 7 + %sum = add i64 %e0, %e7 + ret i64 %sum +} + +; LE-LABEL: @g( +; LE: bitcast <8 x i8> %{{.*}} to i64 +; LE: lshr i64 %{{.*}}, 56 +; LE: and i64 %{{.*}}, 255 +; LE-NOT: extractelement + +; BE-LABEL: @g( +; BE: bitcast <8 x i8> %{{.*}} to i64 +; BE: and i64 %{{.*}}, 255 +; BE: lshr i64 %{{.*}}, 56 +; BE-NOT: extractelement + From 75d16251c5e9e7b98108d34d692adacf3a982274 Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor Date: Fri, 12 Sep 2025 10:54:26 -0400 Subject: [PATCH 3/4] clang-format; add lit.local.cfg; regenerate checks --- .../Transforms/Vectorize/VectorCombine.cpp | 7 ++--- .../AArch64/scalarize-ext-extract-endian.ll | 27 ++++++++++++------- .../VectorCombine/PowerPC/lit.local.cfg | 2 ++ .../PowerPC/scalarize-ext-extract.ll | 22 +++++++-------- 4 files changed, 32 insertions(+), 26 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 72039c4ef06a7..bb880a9fc2321 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2018,9 +2018,10 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { auto *Extract = cast(U); uint64_t Idx = cast(Extract->getIndexOperand())->getZExtValue(); - uint64_t ShiftAmt = DL->isBigEndian() - ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) - : (Idx * SrcEltSizeInBits); + uint64_t ShiftAmt = + DL->isBigEndian() + ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) + : (Idx * SrcEltSizeInBits); Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll index 63867f0402b4f..ad93d2071a858 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -1,7 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE ; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE define i64 @g(<8 x i8> %v) { +; LE-LABEL: @g( +; LE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; LE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; LE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 56 +; LE-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], 255 +; LE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; LE-NEXT: ret i64 [[SUM]] +; +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; %z = zext <8 x i8> %v to <8 x i64> %e0 = extractelement <8 x i64> %z, i32 0 %e7 = extractelement <8 x i64> %z, i32 7 @@ -9,15 +26,5 @@ define i64 @g(<8 x i8> %v) { ret i64 %sum } -; LE-LABEL: @g( -; LE: bitcast <8 x i8> %{{.*}} to i64 -; LE: lshr i64 %{{.*}}, 56 -; LE: and i64 %{{.*}}, 255 -; LE-NOT: extractelement -; BE-LABEL: @g( -; BE: bitcast <8 x i8> %{{.*}} to i64 -; BE: and i64 %{{.*}}, 255 -; BE: lshr i64 %{{.*}}, 56 -; BE-NOT: extractelement diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg new file mode 100644 index 0000000000000..15af315f104fc --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if 'PowerPC' not in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll index 312eebac56423..3d4a8aab6c8ea 100644 --- a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll +++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll @@ -1,7 +1,15 @@ -; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE define i64 @g(<8 x i8> %v) { +; BE-LABEL: @g( +; BE-NEXT: [[TMP1:%.*]] = freeze <8 x i8> [[V:%.*]] +; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 +; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] +; BE-NEXT: ret i64 [[SUM]] +; %z = zext <8 x i8> %v to <8 x i64> %e0 = extractelement <8 x i64> %z, i32 0 %e7 = extractelement <8 x i64> %z, i32 7 @@ -9,15 +17,3 @@ define i64 @g(<8 x i8> %v) { ret i64 %sum } -; LE-LABEL: @g( -; LE: bitcast <8 x i8> %{{.*}} to i64 -; LE: lshr i64 %{{.*}}, 56 -; LE: and i64 %{{.*}}, 255 -; LE-NOT: extractelement - -; BE-LABEL: @g( -; BE: bitcast <8 x i8> %{{.*}} to i64 -; BE: and i64 %{{.*}}, 255 -; BE: lshr i64 %{{.*}}, 56 -; BE-NOT: extractelement - From d4a1421d2c8e8a2f2a0c7a2b20bd7608b6042c42 Mon Sep 17 00:00:00 2001 From: Uyiosa Iyekekpolor Date: Fri, 12 Sep 2025 13:08:59 -0400 Subject: [PATCH 4/4] remove dce pass from tests --- .../AArch64/scalarize-ext-extract-endian.ll | 10 ++++++++-- .../VectorCombine/PowerPC/scalarize-ext-extract.ll | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll index ad93d2071a858..9796faf2e6feb 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/scalarize-ext-extract-endian.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE -; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE +; RUN: opt -passes='vector-combine' -S -mtriple=aarch64_be-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=BE define i64 @g(<8 x i8> %v) { ; LE-LABEL: @g( @@ -8,6 +8,9 @@ define i64 @g(<8 x i8> %v) { ; LE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 ; LE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 56 ; LE-NEXT: [[TMP4:%.*]] = and i64 [[TMP2]], 255 +; LE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; LE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; LE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 ; LE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] ; LE-NEXT: ret i64 [[SUM]] ; @@ -16,6 +19,9 @@ define i64 @g(<8 x i8> %v) { ; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 ; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 ; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 ; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] ; BE-NEXT: ret i64 [[SUM]] ; diff --git a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll index 3d4a8aab6c8ea..a9b719920c341 100644 --- a/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll +++ b/llvm/test/Transforms/VectorCombine/PowerPC/scalarize-ext-extract.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE +; RUN: opt -passes='vector-combine' -S -mtriple=powerpc64-ibm-aix-xcoff %s -o - | FileCheck %s --check-prefix=BE define i64 @g(<8 x i8> %v) { ; BE-LABEL: @g( @@ -7,6 +7,9 @@ define i64 @g(<8 x i8> %v) { ; BE-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 ; BE-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 ; BE-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 56 +; BE-NEXT: [[Z:%.*]] = zext <8 x i8> [[V]] to <8 x i64> +; BE-NEXT: [[E0:%.*]] = extractelement <8 x i64> [[Z]], i32 0 +; BE-NEXT: [[E7:%.*]] = extractelement <8 x i64> [[Z]], i32 7 ; BE-NEXT: [[SUM:%.*]] = add i64 [[TMP4]], [[TMP3]] ; BE-NEXT: ret i64 [[SUM]] ;