Skip to content

Conversation

@cofibrant
Copy link
Contributor

@cofibrant cofibrant commented Nov 20, 2025

Fixes a crash occuring in the AArch64 GlobalISel legaliser pass when legalising a G_SHL of vectors. The crash occured because the legalisation rule modifying the scalar type was being applied after the rule modifying the number of elements of the vectors, while the action padding out vector inputs to G_SHL (and other shifts) assumes the scalar types already agree. The fix treats shifts separately from other binary operators and special cases the treatment of the shift argument.

Fixes #168224

@llvmbot
Copy link
Member

llvmbot commented Nov 20, 2025

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Nathan Corbyn (cofibrant)

Changes

Fixes a crash occuring in the AArch64 GlobalISel legaliser pass when legalising a G_SHL of vectors. The crash occured because the legalisation rule modifying the scalar type was being applied after the rule modifying the number of elements of the vectors, while the action padding out vector inputs to G_SHL (and other shifts) assumes the scalar types already agree.

Fixes #168224


Full diff: https://github.com/llvm/llvm-project/pull/168848.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+34-33)
  • (added) llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll (+13)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a88817c9d2d19..efd525bbbdabd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -216,15 +216,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0)
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
+      .minScalarSameAs(1, 0)
+      .minScalarEltSameAsIf(isVector(0), 1, 0)
+      .maxScalarEltSameAsIf(isVector(0), 1, 0)
       .clampNumElements(0, v8s8, v16s8)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0)
-      .minScalarSameAs(1, 0)
-      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
-      .minScalarEltSameAsIf(isVector(0), 1, 0)
-      .maxScalarEltSameAsIf(isVector(0), 1, 0);
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
 
   getActionDefinitionsBuilder(G_PTR_ADD)
       .legalFor({{p0, s64}, {v2p0, v2s64}})
@@ -467,29 +467,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_FMAD).lower();
 
   for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
-    auto &Actions =  getActionDefinitionsBuilder(Op);
+    auto &Actions = getActionDefinitionsBuilder(Op);
 
     if (Op == G_SEXTLOAD)
-      Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
+      Actions.lowerIf(
+          atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
 
     // Atomics have zero extending behavior.
     Actions
-      .legalForTypesWithMemDesc({{s32, p0, s8, 8},
-                                 {s32, p0, s16, 8},
-                                 {s32, p0, s32, 8},
-                                 {s64, p0, s8, 2},
-                                 {s64, p0, s16, 2},
-                                 {s64, p0, s32, 4},
-                                 {s64, p0, s64, 8},
-                                 {p0, p0, s64, 8},
-                                 {v2s32, p0, s64, 8}})
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, s64)
-      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
-      //       how to do that yet.
-      .unsupportedIfMemSizeNotPow2()
-      // Lower anything left over into G_*EXT and G_LOAD
-      .lower();
+        .legalForTypesWithMemDesc({{s32, p0, s8, 8},
+                                   {s32, p0, s16, 8},
+                                   {s32, p0, s32, 8},
+                                   {s64, p0, s8, 2},
+                                   {s64, p0, s16, 2},
+                                   {s64, p0, s32, 4},
+                                   {s64, p0, s64, 8},
+                                   {p0, p0, s64, 8},
+                                   {v2s32, p0, s64, 8}})
+        .widenScalarToNextPow2(0)
+        .clampScalar(0, s32, s64)
+        // TODO: We could support sum-of-pow2's but the lowering code doesn't
+        // know
+        //       how to do that yet.
+        .unsupportedIfMemSizeNotPow2()
+        // Lower anything left over into G_*EXT and G_LOAD
+        .lower();
   }
 
   auto IsPtrVecPred = [=](const LegalityQuery &Query) {
@@ -982,9 +984,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // Control-flow
   getActionDefinitionsBuilder(G_BR).alwaysLegal();
-  getActionDefinitionsBuilder(G_BRCOND)
-    .legalFor({s32})
-    .clampScalar(0, s32, s32);
+  getActionDefinitionsBuilder(G_BRCOND).legalFor({s32}).clampScalar(0, s32,
+                                                                    s32);
   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
 
   getActionDefinitionsBuilder(G_SELECT)
@@ -1053,8 +1054,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0, /*Min*/ 8);
 
   getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
-      .lowerIf(
-          all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
+      .lowerIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
 
   bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
 
@@ -1606,7 +1606,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   // Don't modify an intrinsic call.
   if (GlobalOp.isSymbol())
     return true;
-  const auto* GV = GlobalOp.getGlobal();
+  const auto *GV = GlobalOp.getGlobal();
   if (GV->isThreadLocal())
     return true; // Don't want to modify TLS vars.
 
@@ -1680,10 +1680,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
   switch (IntrinsicID) {
   case Intrinsic::vacopy: {
     unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
-    unsigned VaListSize =
-      (ST->isTargetDarwin() || ST->isTargetWindows())
-          ? PtrSize
-          : ST->isTargetILP32() ? 20 : 32;
+    unsigned VaListSize = (ST->isTargetDarwin() || ST->isTargetWindows())
+                              ? PtrSize
+                          : ST->isTargetILP32() ? 20
+                                                : 32;
 
     MachineFunction &MF = *MI.getMF();
     auto Val = MF.getRegInfo().createGenericVirtualRegister(
@@ -2122,7 +2122,8 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
   // v8s16,v4s32,v2s64 -> v16i8
   LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
   if (Ty.isScalar()) {
-    assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
+    assert((Size == 32 || Size == 64 || Size == 128) &&
+           "Expected only 32, 64, or 128 bit scalars!");
     if (Size == 32) {
       Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
     }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
new file mode 100644
index 0000000000000..8848fb215c55d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-vector-shl-crash.ll
@@ -0,0 +1,13 @@
+; RUN: llc -global-isel -o - %s | FileCheck %s
+
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here.
+
+define <2 x i8> @test() {
+entry:
+  %zeroes = zext <2 x i1> zeroinitializer to <2 x i32>
+  %ones = shl <2 x i32> splat (i32 1), %zeroes
+  %ones.trunc = trunc <2 x i32> %ones to <2 x i8>
+  ret <2 x i8> %ones.trunc
+}

@cofibrant cofibrant requested review from aemerson and arsenm November 20, 2025 10:45
@cofibrant cofibrant force-pushed the users/cofibrant/issue-168224 branch 2 times, most recently from d6bbb10 to 65d1305 Compare November 20, 2025 10:50
@cofibrant cofibrant force-pushed the users/cofibrant/issue-168224 branch from 65d1305 to 7683b54 Compare November 20, 2025 11:10
@github-actions
Copy link

github-actions bot commented Nov 20, 2025

🐧 Linux x64 Test Results

  • 186793 tests passed
  • 4892 tests skipped

.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.minScalarEltSameAsIf(isVector(0), 1, 0)
.maxScalarEltSameAsIf(isVector(0), 1, 0);
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine but the rules should probably not be so fragile as to crash if you order them wrong

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a code path where an action can fail with UnableToLegalize but we continue to apply other rules before returning to retry the failing rule after observing a change? If so I can add a check somewhere sensible for a more robust fix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, once we hit Unable the whole process will abort.

Copy link
Contributor Author

@cofibrant cofibrant Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, not sure I have a better fix... Basically the action responsible for padding vectors with more undef elements only accepts a single type to expand to, and when applied to binary operations naïvely uses this type for both input operands:

case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}

In other words, it assumes the inputs already agree on their scalar type. One option, I suppose, would be to have this code infer the number of elements from MoreTy, but inherit the scalar element types from each operand for each call to moreElementsVector*(). What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should have validation that the reported rule makes sense for the given operation as part of the rule parsing. The failure ideally wouldn't be deferred all the way to the application

@davemgreen
Copy link
Collaborator

Im not sure if the combine we have for trunc(shift) -> shift(trunk) has been very useful and has led to a number of issues, both correctness and performance.

But I think with how we have it at the moment, the moreElementsVectorSrc(MI, MoreTy, 2); line is incorrect and needs to adjust the type it uses so that it has the same number of lanes as MoreTy but the elements of the existing type. The casts need to do work in the same way.

@cofibrant
Copy link
Contributor Author

cofibrant commented Nov 26, 2025

But I think with how we have it at the moment, the moreElementsVectorSrc(MI, MoreTy, 2); line is incorrect and needs to adjust the type it uses so that it has the same number of lanes as MoreTy but the elements of the existing type. The casts need to do work in the same way.

I tend to agree. However, I was looking into updating the patch this afternoon to reflect this and the solutions seemed very ugly. In particular, it wasn't clear to me that similar affordances shouldn't be made for other generic opcodes.

What I'd really like to say is that the LLT given alongside a FewerElements/MoreElements legalisation action should always have a trivial scalar type (i.e., use the LLT to encode the demanded number of elements only), but this would demand quite a lot of refactoring and is probably not worth it.

@davemgreen
Copy link
Collaborator

I tend to agree. However, I was looking into updating the patch this afternoon to reflect this and the solutions seemed very ugly. In particular, it wasn't clear to me that similar affordances shouldn't be made for other generic opcodes.

What I'd really like to say is that the LLT given alongside a FewerElements/MoreElements legalisation action should always have a trivial scalar type (i.e., use the LLT to encode the demanded number of elements only), but this would demand quite a lot of refactoring and is probably not worth it.

Shifts, as currently defined, can have different types for the dst/first-src operand and the shift amount operand. This is helpful later in the pipeline when we want to canonicalize all shift amounts to i32/i64, to allow the tablegen patterns to match. I'm not sure its as useful early on as it leads to issues like this and has led to perf issues elsewhere. But as we currently have it, it doesn't sound too bad to have the FewerElements/MoreElements code correctly handle shifts with two types. That is how they are defined and I think can be limited to shifts.

@cofibrant cofibrant requested a review from arsenm December 1, 2025 15:49
@cofibrant
Copy link
Contributor Author

Shifts, as currently defined, can have different types for the dst/first-src operand and the shift amount operand. This is helpful later in the pipeline when we want to canonicalize all shift amounts to i32/i64, to allow the tablegen patterns to match. I'm not sure its as useful early on as it leads to issues like this and has led to perf issues elsewhere. But as we currently have it, it doesn't sound too bad to have the FewerElements/MoreElements code correctly handle shifts with two types. That is how they are defined and I think can be limited to shifts.

Thanks Dave, this makes a lot of sense. I've updated the PR to reflect the suggestions in your comment. It would be great to get your opinion as and when you have some time.

@cofibrant cofibrant requested a review from davemgreen December 1, 2025 17:54
Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks - LGTM

@cofibrant cofibrant merged commit b5f7058 into llvm:main Dec 2, 2025
10 checks passed
@cofibrant cofibrant deleted the users/cofibrant/issue-168224 branch December 2, 2025 07:57
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 2, 2025

LLVM Buildbot has detected a new failure on builder sanitizer-x86_64-linux-android running on sanitizer-buildbot-android while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/14407

Here is the relevant piece of the build log for the reference
Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
[       OK ] AddressSanitizer.AtoiAndFriendsOOBTest (2275 ms)
[ RUN      ] AddressSanitizer.HasFeatureAddressSanitizerTest
[       OK ] AddressSanitizer.HasFeatureAddressSanitizerTest (0 ms)
[ RUN      ] AddressSanitizer.CallocReturnsZeroMem
[       OK ] AddressSanitizer.CallocReturnsZeroMem (12 ms)
[ DISABLED ] AddressSanitizer.DISABLED_TSDTest
[ RUN      ] AddressSanitizer.IgnoreTest
[       OK ] AddressSanitizer.IgnoreTest (0 ms)
[ RUN      ] AddressSanitizer.SignalTest
[       OK ] AddressSanitizer.SignalTest (179 ms)
[ RUN      ] AddressSanitizer.ReallocTest
[       OK ] AddressSanitizer.ReallocTest (47 ms)
[ RUN      ] AddressSanitizer.WrongFreeTest
[       OK ] AddressSanitizer.WrongFreeTest (125 ms)
[ RUN      ] AddressSanitizer.LongJmpTest
[       OK ] AddressSanitizer.LongJmpTest (0 ms)
[ RUN      ] AddressSanitizer.ThreadStackReuseTest
[       OK ] AddressSanitizer.ThreadStackReuseTest (1 ms)
[ DISABLED ] AddressSanitizer.DISABLED_MemIntrinsicUnalignedAccessTest
[ DISABLED ] AddressSanitizer.DISABLED_LargeFunctionSymbolizeTest
[ DISABLED ] AddressSanitizer.DISABLED_MallocFreeUnwindAndSymbolizeTest
[ RUN      ] AddressSanitizer.UseThenFreeThenUseTest
[       OK ] AddressSanitizer.UseThenFreeThenUseTest (125 ms)
[ RUN      ] AddressSanitizer.FileNameInGlobalReportTest
[       OK ] AddressSanitizer.FileNameInGlobalReportTest (115 ms)
[ DISABLED ] AddressSanitizer.DISABLED_StressStackReuseAndExceptionsTest
[ RUN      ] AddressSanitizer.MlockTest
[       OK ] AddressSanitizer.MlockTest (0 ms)
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadedTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowIn
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowLeft
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowRight
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFHigh
[ DISABLED ] AddressSanitizer.DISABLED_DemoOOM
[ DISABLED ] AddressSanitizer.DISABLED_DemoDoubleFreeTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoNullDerefTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoFunctionStaticTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoTooMuchMemoryTest
[ RUN      ] AddressSanitizer.LongDoubleNegativeTest
[       OK ] AddressSanitizer.LongDoubleNegativeTest (0 ms)
[----------] 19 tests from AddressSanitizer (27858 ms total)

[----------] Global test environment tear-down
[==========] 22 tests from 2 test suites ran. (27872 ms total)
[  PASSED  ] 22 tests.

  YOU HAVE 1 DISABLED TEST

Step 35 (run instrumented asan tests [aarch64/bluejay-userdebug/TQ3A.230805.001]) failure: run instrumented asan tests [aarch64/bluejay-userdebug/TQ3A.230805.001] (failure)
...
[ RUN      ] AddressSanitizer.HasFeatureAddressSanitizerTest
[       OK ] AddressSanitizer.HasFeatureAddressSanitizerTest (0 ms)
[ RUN      ] AddressSanitizer.CallocReturnsZeroMem
[       OK ] AddressSanitizer.CallocReturnsZeroMem (12 ms)
[ DISABLED ] AddressSanitizer.DISABLED_TSDTest
[ RUN      ] AddressSanitizer.IgnoreTest
[       OK ] AddressSanitizer.IgnoreTest (0 ms)
[ RUN      ] AddressSanitizer.SignalTest
[       OK ] AddressSanitizer.SignalTest (179 ms)
[ RUN      ] AddressSanitizer.ReallocTest
[       OK ] AddressSanitizer.ReallocTest (47 ms)
[ RUN      ] AddressSanitizer.WrongFreeTest
[       OK ] AddressSanitizer.WrongFreeTest (125 ms)
[ RUN      ] AddressSanitizer.LongJmpTest
[       OK ] AddressSanitizer.LongJmpTest (0 ms)
[ RUN      ] AddressSanitizer.ThreadStackReuseTest
[       OK ] AddressSanitizer.ThreadStackReuseTest (1 ms)
[ DISABLED ] AddressSanitizer.DISABLED_MemIntrinsicUnalignedAccessTest
[ DISABLED ] AddressSanitizer.DISABLED_LargeFunctionSymbolizeTest
[ DISABLED ] AddressSanitizer.DISABLED_MallocFreeUnwindAndSymbolizeTest
[ RUN      ] AddressSanitizer.UseThenFreeThenUseTest
[       OK ] AddressSanitizer.UseThenFreeThenUseTest (125 ms)
[ RUN      ] AddressSanitizer.FileNameInGlobalReportTest
[       OK ] AddressSanitizer.FileNameInGlobalReportTest (115 ms)
[ DISABLED ] AddressSanitizer.DISABLED_StressStackReuseAndExceptionsTest
[ RUN      ] AddressSanitizer.MlockTest
[       OK ] AddressSanitizer.MlockTest (0 ms)
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadedTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowIn
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowLeft
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowRight
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFHigh
[ DISABLED ] AddressSanitizer.DISABLED_DemoOOM
[ DISABLED ] AddressSanitizer.DISABLED_DemoDoubleFreeTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoNullDerefTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoFunctionStaticTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoTooMuchMemoryTest
[ RUN      ] AddressSanitizer.LongDoubleNegativeTest
[       OK ] AddressSanitizer.LongDoubleNegativeTest (0 ms)
[----------] 19 tests from AddressSanitizer (27858 ms total)

[----------] Global test environment tear-down
[==========] 22 tests from 2 test suites ran. (27872 ms total)
[  PASSED  ] 22 tests.

  YOU HAVE 1 DISABLED TEST
program finished with exit code 0
elapsedTime=9270.377435

kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[AArch64][GISel] llc crashed at -O1/O2/O3: Assertion `(ResTy.getElementType() == Op0Ty.getElementType()) && "Different vector element types"' failed.

6 participants