Skip to content

Commit 30728eb

Browse files
[Reland][ValueTracking] Improve Bitcast handling to match SDAG (#145223)
Fixes #125228 --------- Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 96c8b9e commit 30728eb

File tree

4 files changed

+60
-18
lines changed

4 files changed

+60
-18
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
13511351
isa<ScalableVectorType>(I->getType()))
13521352
break;
13531353

1354+
unsigned NumElts = DemandedElts.getBitWidth();
1355+
bool IsLE = Q.DL.isLittleEndian();
13541356
// Look through a cast from narrow vector elements to wider type.
13551357
// Examples: v4i32 -> v2i64, v3i8 -> v24
13561358
unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1369,7 +1371,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
13691371
//
13701372
// The known bits of each sub-element are then inserted into place
13711373
// (dependent on endian) to form the full result of known bits.
1372-
unsigned NumElts = DemandedElts.getBitWidth();
13731374
unsigned SubScale = BitWidth / SubBitWidth;
13741375
APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
13751376
for (unsigned i = 0; i != NumElts; ++i) {
@@ -1381,10 +1382,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
13811382
for (unsigned i = 0; i != SubScale; ++i) {
13821383
computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
13831384
Depth + 1);
1384-
unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1385+
unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
13851386
Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
13861387
}
13871388
}
1389+
// Look through a cast from wider vector elements to narrow type.
1390+
// Examples: v2i64 -> v4i32
1391+
if (SubBitWidth % BitWidth == 0) {
1392+
unsigned SubScale = SubBitWidth / BitWidth;
1393+
KnownBits KnownSrc(SubBitWidth);
1394+
APInt SubDemandedElts =
1395+
APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
1396+
computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
1397+
Depth + 1);
1398+
1399+
Known.Zero.setAllBits();
1400+
Known.One.setAllBits();
1401+
for (unsigned i = 0; i != NumElts; ++i) {
1402+
if (DemandedElts[i]) {
1403+
unsigned Shifts = IsLE ? i : NumElts - 1 - i;
1404+
unsigned Offset = (Shifts % SubScale) * BitWidth;
1405+
Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
1406+
if (Known.isUnknown())
1407+
break;
1408+
}
1409+
}
1410+
}
13881411
break;
13891412
}
13901413
case Instruction::SExt: {

llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3732,7 +3732,6 @@ define <4 x i64> @test_avx2_psrl_0() {
37323732
ret <4 x i64> %16
37333733
}
37343734

3735-
; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds.
37363735
define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
37373736
; CHECK-LABEL: @PR125228(
37383737
; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63)
@@ -3741,7 +3740,8 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
37413740
; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8>
37423741
; CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
37433742
; CHECK-NEXT: [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
3744-
; CHECK-NEXT: [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]])
3743+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer
3744+
; CHECK-NEXT: [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]]
37453745
; CHECK-NEXT: [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> <i32 0, i32 3>
37463746
; CHECK-NEXT: ret <2 x i64> [[SHUFP_UNCASTED]]
37473747
;

llvm/test/Transforms/InstCombine/bitcast-known-bits.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg
1212
; CHECK-NEXT: [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16>
1313
; CHECK-NEXT: [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2)
1414
; CHECK-NEXT: [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8>
15-
; CHECK-NEXT: [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52)
16-
; CHECK-NEXT: ret <16 x i8> [[AND7]]
15+
; CHECK-NEXT: ret <16 x i8> [[BITCAST6]]
1716
;
1817
%and = and <16 x i8> %arg1, splat (i8 3)
1918
%and3 = and <16 x i8> %arg2, splat (i8 48)
@@ -33,8 +32,7 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg) {
3332
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
3433
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
3534
; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
36-
; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
37-
; CHECK-NEXT: ret <16 x i8> [[AND3]]
35+
; CHECK-NEXT: ret <16 x i8> [[BITCAST2]]
3836
;
3937
%and = and <16 x i8> %arg, splat (i8 15)
4038
%shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -53,8 +51,7 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg) {
5351
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
5452
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
5553
; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
56-
; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
57-
; CHECK-NEXT: ret <16 x i8> [[AND3]]
54+
; CHECK-NEXT: ret <16 x i8> [[BITCAST2]]
5855
;
5956
%and = and <16 x i8> %arg, splat (i8 15)
6057
%shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -70,8 +67,7 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg) {
7067
; CHECK-SAME: <8 x i16> [[ARG:%.*]]) {
7168
; CHECK-NEXT: [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15)
7269
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8>
73-
; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1)
74-
; CHECK-NEXT: ret <16 x i8> [[AND]]
70+
; CHECK-NEXT: ret <16 x i8> [[BITCAST1]]
7571
;
7672
%lshr = lshr <8 x i16> %arg, splat (i16 15)
7773
%bitcast1 = bitcast <8 x i16> %lshr to <16 x i8>
@@ -88,7 +84,8 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i
8884
; CHECK-NEXT: [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]])
8985
; CHECK-NEXT: [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32>
9086
; CHECK-NEXT: [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0
91-
; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]])
87+
; CHECK-NEXT: [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]]
88+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0
9289
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
9390
;
9491
%call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1)
@@ -110,11 +107,7 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> %
110107
; CHECK-NEXT: [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8>
111108
; CHECK-NEXT: [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]]
112109
; CHECK-NEXT: [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]]
113-
; CHECK-NEXT: [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16>
114-
; CHECK-NEXT: [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8)
115-
; CHECK-NEXT: [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8>
116-
; CHECK-NEXT: [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]]
117-
; CHECK-NEXT: ret <16 x i8> [[ADD13]]
110+
; CHECK-NEXT: ret <16 x i8> [[ADD14]]
118111
;
119112
%shl6 = shl <8 x i16> %arg2, splat (i16 8)
120113
%bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>

llvm/test/Transforms/InstSimplify/shift-knownbits.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,3 +499,29 @@ define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %
499499
%r = shl <1 x i64> %v2, %b
500500
ret <1 x i64> %r
501501
}
502+
503+
; Test that verifies correct handling of known bits when bitcasting from a smaller vector
504+
; to a larger one (e.g., <2 x i32> to <8 x i8>). Previously, only the subscale portion
505+
; (e.g., 4 elements) was checked instead of the full demanded vector width (8 elements),
506+
; leading to incorrect known bits and removal of the `ashr` instruction.
507+
508+
define <8 x i8> @bitcast_knownbits_subscale_miscompile(i32 %x) {
509+
; CHECK-LABEL: @bitcast_knownbits_subscale_miscompile(
510+
; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[X:%.*]], -256
511+
; CHECK-NEXT: [[SETBITS:%.*]] = or i32 [[MASKED]], -16777216
512+
; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i32> poison, i32 [[SETBITS]], i32 0
513+
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
514+
; CHECK-NEXT: [[VEC:%.*]] = bitcast <2 x i32> [[SPLAT]] to <8 x i8>
515+
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i8> [[VEC]], <8 x i8> zeroinitializer, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 0, i32 0, i32 0, i32 0>
516+
; CHECK-NEXT: [[SHR:%.*]] = ashr <8 x i8> [[SHUF]], splat (i8 1)
517+
; CHECK-NEXT: ret <8 x i8> [[SHR]]
518+
;
519+
%masked = and i32 %x, u0xFFFFFF00
520+
%setbits = or i32 %masked, u0xFF000000
521+
%insert = insertelement <2 x i32> poison, i32 %setbits, i32 0
522+
%splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> splat (i32 0)
523+
%vec = bitcast <2 x i32> %splat to <8 x i8>
524+
%shuf = shufflevector <8 x i8> %vec, <8 x i8> zeroinitializer, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 0, i32 0, i32 0, i32 0>
525+
%shr = ashr <8 x i8> %shuf, splat (i8 1)
526+
ret <8 x i8> %shr
527+
}

0 commit comments

Comments
 (0)