[InstCombine] Fold out-of-range bits for squaring signed integers (#153484)

Aethezz · web-flow · commit ef1539c1d4e2 · 2025-09-05T16:48:57.000+02:00
Fixes an issue where bits next to the sign bit were not constant-folded when squaring a sign- or zero-extended small integer. Added logic to detect when both operands of a multiplication are the same extended value, allowing InstCombine to mark bits above the maximum possible square as known zero. This enables correct folding of (x * x) & (1 << N) to 0 when N is out of range. Proof: https://alive2.llvm.org/ce/z/YGou44 Fixes #152061
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
@@ -413,6 +413,18 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
         isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
   Known = KnownBits::mul(Known, Known2, SelfMultiply);
 
+  if (SelfMultiply) {
+    unsigned SignBits = ComputeNumSignBits(Op0, DemandedElts, Q, Depth + 1);
+    unsigned TyBits = Op0->getType()->getScalarSizeInBits();
+    unsigned OutValidBits = 2 * (TyBits - SignBits + 1);
+
+    if (OutValidBits < TyBits) {
+      APInt KnownZeroMask =
+          APInt::getHighBitsSet(TyBits, TyBits - OutValidBits + 1);
+      Known.Zero |= KnownZeroMask;
+    }
+  }
+
   // Only make use of no-wrap flags if we failed to compute the sign bit
   // directly.  This matters if the multiplication always overflows, in
   // which case we prefer to follow the result of the direct computation,
diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll
@@ -423,3 +423,41 @@ define i64 @smear_set_bit_different_dest_type_wider_dst(i32 %x) {
   %s = sext i8 %a to i64
   ret i64 %s
 }
+
+; Test known bits for (sext i8 x) * (sext i8 x)
+
+define i1 @sext_square_bit30(i8 noundef %x) {
+; CHECK-LABEL: @sext_square_bit30(
+; CHECK-NEXT:    ret i1 false
+;
+  %sx = sext i8 %x to i32
+  %mul = mul nsw i32 %sx, %sx
+  %and = and i32 %mul, 1073741824 ; 1 << 30
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @sext_square_bit15(i8 noundef %x) {
+; CHECK-LABEL: @sext_square_bit15(
+; CHECK-NEXT:    ret i1 false
+;
+  %sx = sext i8 %x to i32
+  %mul = mul nsw i32 %sx, %sx
+  %and = and i32 %mul, 32768 ; 1 << 15
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @sext_square_bit14(i8 noundef %x) {
+; CHECK-LABEL: @sext_square_bit14(
+; CHECK-NEXT:    [[SX:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[SX]], [[SX]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ugt i32 [[MUL]], 16383
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sx = sext i8 %x to i32
+  %mul = mul nsw i32 %sx, %sx
+  %and = and i32 %mul, 16384 ; 1 << 14
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}