Skip to content

Commit ef1539c

Browse files
authored
[InstCombine] Fold out-of-range bits for squaring signed integers (#153484)
Fixes an issue where bits next to the sign bit were not constant-folded when squaring a sign- or zero-extended small integer. Added logic to detect when both operands of a multiplication are the same extended value, allowing InstCombine to mark bits above the maximum possible square as known zero. This enables correct folding of (x * x) & (1 << N) to 0 when N is out of range. Proof: https://alive2.llvm.org/ce/z/YGou44 Fixes #152061
1 parent caf5fb1 commit ef1539c

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,18 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
413413
isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
414414
Known = KnownBits::mul(Known, Known2, SelfMultiply);
415415

416+
if (SelfMultiply) {
417+
unsigned SignBits = ComputeNumSignBits(Op0, DemandedElts, Q, Depth + 1);
418+
unsigned TyBits = Op0->getType()->getScalarSizeInBits();
419+
unsigned OutValidBits = 2 * (TyBits - SignBits + 1);
420+
421+
if (OutValidBits < TyBits) {
422+
APInt KnownZeroMask =
423+
APInt::getHighBitsSet(TyBits, TyBits - OutValidBits + 1);
424+
Known.Zero |= KnownZeroMask;
425+
}
426+
}
427+
416428
// Only make use of no-wrap flags if we failed to compute the sign bit
417429
// directly. This matters if the multiplication always overflows, in
418430
// which case we prefer to follow the result of the direct computation,

llvm/test/Transforms/InstCombine/sext.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,41 @@ define i64 @smear_set_bit_different_dest_type_wider_dst(i32 %x) {
423423
%s = sext i8 %a to i64
424424
ret i64 %s
425425
}
426+
427+
; Test known bits for (sext i8 x) * (sext i8 x)
428+
429+
define i1 @sext_square_bit30(i8 noundef %x) {
430+
; CHECK-LABEL: @sext_square_bit30(
431+
; CHECK-NEXT: ret i1 false
432+
;
433+
%sx = sext i8 %x to i32
434+
%mul = mul nsw i32 %sx, %sx
435+
%and = and i32 %mul, 1073741824 ; 1 << 30
436+
%cmp = icmp ne i32 %and, 0
437+
ret i1 %cmp
438+
}
439+
440+
define i1 @sext_square_bit15(i8 noundef %x) {
441+
; CHECK-LABEL: @sext_square_bit15(
442+
; CHECK-NEXT: ret i1 false
443+
;
444+
%sx = sext i8 %x to i32
445+
%mul = mul nsw i32 %sx, %sx
446+
%and = and i32 %mul, 32768 ; 1 << 15
447+
%cmp = icmp ne i32 %and, 0
448+
ret i1 %cmp
449+
}
450+
451+
define i1 @sext_square_bit14(i8 noundef %x) {
452+
; CHECK-LABEL: @sext_square_bit14(
453+
; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X:%.*]] to i32
454+
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[SX]], [[SX]]
455+
; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt i32 [[MUL]], 16383
456+
; CHECK-NEXT: ret i1 [[CMP]]
457+
;
458+
%sx = sext i8 %x to i32
459+
%mul = mul nsw i32 %sx, %sx
460+
%and = and i32 %mul, 16384 ; 1 << 14
461+
%cmp = icmp ne i32 %and, 0
462+
ret i1 %cmp
463+
}

0 commit comments

Comments
 (0)