Skip to content

Commit 33b2c26

Browse files
[X86] Fix assertion in AVX512 setcc combine due to invalid APInt mask width (#155775)
The AVX512 setcc combine in X86ISelLowering was calling `APInt::getLowBitsSet` with a mask width (`Len`) that could exceed the bit width of the broadcasted scalar operand (`BroadcastOpVT.getSizeInBits()`), leading to assertion failures. This patch replaces `Len` with the number of defined (non-undef) elements in the constant pool vector. This ensures the generated mask is valid and avoids crashes when the constant pool contains more elements than the scalar bit width can represent. Fixes #155762
1 parent 6dd67f8 commit 33b2c26

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56278,7 +56278,13 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
5627856278

5627956279
SDValue Masked = BroadcastOp;
5628056280
if (N != 0) {
56281-
APInt Mask = APInt::getLowBitsSet(BroadcastOpVT.getSizeInBits(), Len);
56281+
unsigned BroadcastOpBitWidth = BroadcastOpVT.getSizeInBits();
56282+
unsigned NumDefinedElts = UndefElts.countTrailingZeros();
56283+
56284+
if (NumDefinedElts > BroadcastOpBitWidth)
56285+
return SDValue();
56286+
56287+
APInt Mask = APInt::getLowBitsSet(BroadcastOpBitWidth, NumDefinedElts);
5628256288
SDValue ShiftedValue = DAG.getNode(ISD::SRL, DL, BroadcastOpVT, BroadcastOp,
5628356289
DAG.getConstant(N, DL, BroadcastOpVT));
5628456290
Masked = DAG.getNode(ISD::AND, DL, BroadcastOpVT, ShiftedValue,

llvm/test/CodeGen/X86/kmov.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,57 @@ define <8 x i1> @invert_i8_mask_extract_8(i8 %mask) {
143143
ret <8 x i1> %cmp.45
144144
}
145145

146+
define <8 x i1> @i8_mask_extract_7(i8 %mask) {
147+
; X64-AVX512-LABEL: i8_mask_extract_7:
148+
; X64-AVX512: # %bb.0:
149+
; X64-AVX512-NEXT: shrb %dil
150+
; X64-AVX512-NEXT: movzbl %dil, %eax
151+
; X64-AVX512-NEXT: kmovd %eax, %k0
152+
; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
153+
; X64-AVX512-NEXT: retq
154+
;
155+
; X64-KNL-LABEL: i8_mask_extract_7:
156+
; X64-KNL: # %bb.0:
157+
; X64-KNL-NEXT: vmovd %edi, %xmm0
158+
; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
159+
; X64-KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,4,8,16,32,64,128,0,2,4,8,16,32,64,128,0]
160+
; X64-KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
161+
; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
162+
; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
163+
; X64-KNL-NEXT: retq
164+
%.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
165+
%.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
166+
%1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
167+
%cmp.45 = icmp ne <8 x i8> %1, zeroinitializer
168+
ret <8 x i1> %cmp.45
169+
}
170+
171+
define <8 x i1> @invert_i8_mask_extract_7(i8 %mask) {
172+
; X64-AVX512-LABEL: invert_i8_mask_extract_7:
173+
; X64-AVX512: # %bb.0:
174+
; X64-AVX512-NEXT: shrb %dil
175+
; X64-AVX512-NEXT: movzbl %dil, %eax
176+
; X64-AVX512-NEXT: kmovd %eax, %k0
177+
; X64-AVX512-NEXT: knotb %k0, %k0
178+
; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
179+
; X64-AVX512-NEXT: retq
180+
;
181+
; X64-KNL-LABEL: invert_i8_mask_extract_7:
182+
; X64-KNL: # %bb.0:
183+
; X64-KNL-NEXT: vmovd %edi, %xmm0
184+
; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
185+
; X64-KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186+
; X64-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
187+
; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
188+
; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
189+
; X64-KNL-NEXT: retq
190+
%.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
191+
%.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
192+
%1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
193+
%cmp.45 = icmp eq <8 x i8> %1, zeroinitializer
194+
ret <8 x i1> %cmp.45
195+
}
196+
146197
define <4 x i1> @i16_mask_extract_4(i16 %mask) {
147198
; X64-AVX512-LABEL: i16_mask_extract_4:
148199
; X64-AVX512: # %bb.0:

0 commit comments

Comments
 (0)