Skip to content

Commit 1a644bc

Browse files
abhishek-kaushik22tstellar
authored andcommitted
[X86] Fix assertion in AVX512 setcc combine due to invalid APInt mask width (llvm#155775)
The AVX512 setcc combine in X86ISelLowering was calling `APInt::getLowBitsSet` with a mask width (`Len`) that could exceed the bit width of the broadcasted scalar operand (`BroadcastOpVT.getSizeInBits()`), leading to assertion failures. This patch replaces `Len` with the number of defined (non-undef) elements in the constant pool vector. This ensures the generated mask is valid and avoids crashes when the constant pool contains more elements than the scalar bit width can represent. Fixes llvm#155762 (cherry picked from commit 33b2c26)
1 parent 2c8cb31 commit 1a644bc

File tree

2 files changed

+58
-1
lines changed

2 files changed

+58
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56212,7 +56212,13 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
5621256212

5621356213
SDValue Masked = BroadcastOp;
5621456214
if (N != 0) {
56215-
APInt Mask = APInt::getLowBitsSet(BroadcastOpVT.getSizeInBits(), Len);
56215+
unsigned BroadcastOpBitWidth = BroadcastOpVT.getSizeInBits();
56216+
unsigned NumDefinedElts = UndefElts.countTrailingZeros();
56217+
56218+
if (NumDefinedElts > BroadcastOpBitWidth)
56219+
return SDValue();
56220+
56221+
APInt Mask = APInt::getLowBitsSet(BroadcastOpBitWidth, NumDefinedElts);
5621656222
SDValue ShiftedValue = DAG.getNode(ISD::SRL, DL, BroadcastOpVT, BroadcastOp,
5621756223
DAG.getConstant(N, DL, BroadcastOpVT));
5621856224
Masked = DAG.getNode(ISD::AND, DL, BroadcastOpVT, ShiftedValue,

llvm/test/CodeGen/X86/kmov.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,57 @@ define <8 x i1> @invert_i8_mask_extract_8(i8 %mask) {
143143
ret <8 x i1> %cmp.45
144144
}
145145

146+
define <8 x i1> @i8_mask_extract_7(i8 %mask) {
147+
; X64-AVX512-LABEL: i8_mask_extract_7:
148+
; X64-AVX512: # %bb.0:
149+
; X64-AVX512-NEXT: shrb %dil
150+
; X64-AVX512-NEXT: movzbl %dil, %eax
151+
; X64-AVX512-NEXT: kmovd %eax, %k0
152+
; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
153+
; X64-AVX512-NEXT: retq
154+
;
155+
; X64-KNL-LABEL: i8_mask_extract_7:
156+
; X64-KNL: # %bb.0:
157+
; X64-KNL-NEXT: vmovd %edi, %xmm0
158+
; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
159+
; X64-KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,4,8,16,32,64,128,0,2,4,8,16,32,64,128,0]
160+
; X64-KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
161+
; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
162+
; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
163+
; X64-KNL-NEXT: retq
164+
%.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
165+
%.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
166+
%1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
167+
%cmp.45 = icmp ne <8 x i8> %1, zeroinitializer
168+
ret <8 x i1> %cmp.45
169+
}
170+
171+
define <8 x i1> @invert_i8_mask_extract_7(i8 %mask) {
172+
; X64-AVX512-LABEL: invert_i8_mask_extract_7:
173+
; X64-AVX512: # %bb.0:
174+
; X64-AVX512-NEXT: shrb %dil
175+
; X64-AVX512-NEXT: movzbl %dil, %eax
176+
; X64-AVX512-NEXT: kmovd %eax, %k0
177+
; X64-AVX512-NEXT: knotb %k0, %k0
178+
; X64-AVX512-NEXT: vpmovm2w %k0, %xmm0
179+
; X64-AVX512-NEXT: retq
180+
;
181+
; X64-KNL-LABEL: invert_i8_mask_extract_7:
182+
; X64-KNL: # %bb.0:
183+
; X64-KNL-NEXT: vmovd %edi, %xmm0
184+
; X64-KNL-NEXT: vpbroadcastb %xmm0, %xmm0
185+
; X64-KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186+
; X64-KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
187+
; X64-KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
188+
; X64-KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
189+
; X64-KNL-NEXT: retq
190+
%.splatinsert = insertelement <8 x i8> poison, i8 %mask, i64 0
191+
%.splat = shufflevector <8 x i8> %.splatinsert, <8 x i8> poison, <8 x i32> zeroinitializer
192+
%1 = and <8 x i8> %.splat, <i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 poison>
193+
%cmp.45 = icmp eq <8 x i8> %1, zeroinitializer
194+
ret <8 x i1> %cmp.45
195+
}
196+
146197
define <4 x i1> @i16_mask_extract_4(i16 %mask) {
147198
; X64-AVX512-LABEL: i16_mask_extract_4:
148199
; X64-AVX512: # %bb.0:

0 commit comments

Comments
 (0)