[VectorCombine] foldSelectShuffle - early-out cases where the max vector register width isn't large enough (llvm#157430)

RKSimon · web-flow · commit ad3a0ae9e15f · 2025-09-08T12:04:23.000Z
Technically this could happen with vector units that can't handle all legal scalar widths - but its good enough to use a generic crash test without a suitable target Fixes llvm#157335
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3903,6 +3903,8 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
   unsigned MaxVectorSize =
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
   unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
+  if (MaxElementsInVector == 0)
+    return false;
   // When there are multiple shufflevector operations on the same input,
   // especially when the vector length is larger than the register size,
   // identical shuffle patterns may occur across different groups of elements.
diff --git a/llvm/test/Transforms/VectorCombine/pr157335.ll b/llvm/test/Transforms/VectorCombine/pr157335.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=vector-combine -S %s | FileCheck %s
+
+define <2 x double> @PR157335() {
+; CHECK-LABEL: @PR157335(
+; CHECK-NEXT:    [[V0:%.*]] = fmul <2 x double> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[V1:%.*]] = fmul <2 x double> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[V2:%.*]] = fsub <2 x double> [[V0]], [[V1]]
+; CHECK-NEXT:    [[V3:%.*]] = fadd <2 x double> [[V0]], [[V1]]
+; CHECK-NEXT:    [[V4:%.*]] = shufflevector <2 x double> [[V2]], <2 x double> [[V3]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x double> [[V4]]
+;
+  %v0 = fmul <2 x double> zeroinitializer, zeroinitializer
+  %v1 = fmul <2 x double> zeroinitializer, zeroinitializer
+  %v2 = fsub <2 x double> %v0, %v1
+  %v3 = fadd <2 x double> %v0, %v1
+  %v4 = shufflevector <2 x double> %v2, <2 x double> %v3, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %v4
+}