[SLP]Fix the minbitwidth analysis for slternate opcodes

alexey-bataev · alexey-bataev · commit 964c7711f438 · 2025-10-31T15:25:13.000-07:00
If the laternate operation is more stricter than the main operation, we cannot rely on the analysis of the main operation. In such case, better to avoid doing the analysis at all, since it may affect the overall result and lead to incorrect optimization Fixes llvm#165878
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote(
         {VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(),
          VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()});
 
+  if (E.isAltShuffle()) {
+    // Combining these opcodes may lead to incorrect analysis, skip for now.
+    auto IsDangerousOpcode = [](unsigned Opcode) {
+      switch (Opcode) {
+      case Instruction::Shl:
+      case Instruction::AShr:
+      case Instruction::LShr:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+        return true;
+      default:
+        break;
+      }
+      return false;
+    };
+    if (IsDangerousOpcode(E.getAltOpcode()))
+      return FinalAnalysis();
+  }
+
   switch (E.getOpcode()) {
 
   // We can always demote truncations and extensions. Since truncations can
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
@@ -6,14 +6,12 @@ define float @test(i8 %0) {
 ; CHECK-SAME: i8 [[TMP0:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], <i16 2, i16 27>
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], <i16 2, i16 27>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = sext i16 [[TMP9]] to i32
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    switch i32 [[TMP8]], label %[[EXIT:.*]] [
 ; CHECK-NEXT:      i32 0, label %[[EXIT]]