Skip to content

Commit 615ebce

Browse files
fix: prevent constant folding in tests
1 parent 74e6184 commit 615ebce

File tree

1 file changed

+66
-14
lines changed

1 file changed

+66
-14
lines changed

llvm/test/CodeGen/X86/avx512-mask-set-opt.ll

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -135,24 +135,76 @@ define <16 x float> @gather_lower(ptr %base, <16 x i32> %ind, i16 %mask) {
135135
ret <16 x float> %res
136136
}
137137

138-
; Test case 5: v32i1 mask via bitconvert, lower 16 bits set (tests bitconvert pattern)
139-
define <32 x i16> @mask_v32i1_lower16(<32 x i16> %a, <32 x i16> %b) {
140-
; AVX512-LABEL: mask_v32i1_lower16:
141-
; AVX512: # %bb.0:
142-
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
143-
; AVX512-NEXT: retq
144-
%mask = bitcast i32 65535 to <32 x i1>
138+
; Test case 5: v32i1 mask via bitconvert combined with dynamic condition.
139+
; Ensures lower 16 lanes force the KSET1W path without folding into a shuffle.
140+
define <32 x i16> @mask_v32i1_lower16(<32 x i16> %a, <32 x i16> %b,
141+
<32 x i16> %c, <32 x i16> %d) {
142+
; AVX512F-LABEL: mask_v32i1_lower16:
143+
; AVX512F: vextracti64x4
144+
; AVX512F: vpcmpgtw
145+
; AVX512F: vpternlogd
146+
; AVX512F: vinserti64x4
147+
; AVX512F: vpternlogq
148+
;
149+
; AVX512DQ-LABEL: mask_v32i1_lower16:
150+
; AVX512DQ: vextracti64x4
151+
; AVX512DQ: vpcmpgtw
152+
; AVX512DQ: vpternlogd
153+
; AVX512DQ: vinserti64x4
154+
; AVX512DQ: vpternlogq
155+
;
156+
; AVX512BW-LABEL: mask_v32i1_lower16:
157+
; AVX512BW: movl $65535, %eax
158+
; AVX512BW: kmovd %eax, %k0
159+
; AVX512BW: vpcmpgtw %zmm3, %zmm2, %k1
160+
; AVX512BW: kord %k0, %k1, %k1
161+
; AVX512BW: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
162+
;
163+
; AVX512DQBW-LABEL: mask_v32i1_lower16:
164+
; AVX512DQBW: kxnorw %k0, %k0, %k0
165+
; AVX512DQBW: vpcmpgtw %zmm3, %zmm2, %k1
166+
; AVX512DQBW: kord %k0, %k1, %k1
167+
; AVX512DQBW: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
168+
%mask0 = bitcast i32 65535 to <32 x i1>
169+
%mask1 = icmp sgt <32 x i16> %c, %d
170+
%mask = or <32 x i1> %mask0, %mask1
145171
%res = select <32 x i1> %mask, <32 x i16> %a, <32 x i16> %b
146172
ret <32 x i16> %res
147173
}
148174

149-
; Test case 6: v64i1 mask via bitconvert, lower 32 bits set (tests bitconvert pattern)
150-
define <64 x i8> @mask_v64i1_lower32(<64 x i8> %a, <64 x i8> %b) {
151-
; AVX512-LABEL: mask_v64i1_lower32:
152-
; AVX512: # %bb.0:
153-
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
154-
; AVX512-NEXT: retq
155-
%mask = bitcast i64 4294967295 to <64 x i1>
175+
; Test case 6: v64i1 mask via bitconvert combined with dynamic condition.
176+
; Verifies the KSET1D submask pattern survives past SelectionDAG combines.
177+
define <64 x i8> @mask_v64i1_lower32(<64 x i8> %a, <64 x i8> %b,
178+
<64 x i8> %c, <64 x i8> %d) {
179+
; AVX512F-LABEL: mask_v64i1_lower32:
180+
; AVX512F: vextracti64x4
181+
; AVX512F: vpcmpgtb
182+
; AVX512F: vpternlogd
183+
; AVX512F: vinserti64x4
184+
; AVX512F: vpternlogq
185+
;
186+
; AVX512DQ-LABEL: mask_v64i1_lower32:
187+
; AVX512DQ: vextracti64x4
188+
; AVX512DQ: vpcmpgtb
189+
; AVX512DQ: vpternlogd
190+
; AVX512DQ: vinserti64x4
191+
; AVX512DQ: vpternlogq
192+
;
193+
; AVX512BW-LABEL: mask_v64i1_lower32:
194+
; AVX512BW: movl $4294967295, %eax
195+
; AVX512BW: kmovq %rax, %k0
196+
; AVX512BW: vpcmpgtb %zmm3, %zmm2, %k1
197+
; AVX512BW: korq %k0, %k1, %k1
198+
; AVX512BW: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
199+
;
200+
; AVX512DQBW-LABEL: mask_v64i1_lower32:
201+
; AVX512DQBW: kxnord %k0, %k0, %k0
202+
; AVX512DQBW: vpcmpgtb %zmm3, %zmm2, %k1
203+
; AVX512DQBW: korq %k0, %k1, %k1
204+
; AVX512DQBW: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
205+
%mask0 = bitcast i64 4294967295 to <64 x i1>
206+
%mask1 = icmp sgt <64 x i8> %c, %d
207+
%mask = or <64 x i1> %mask0, %mask1
156208
%res = select <64 x i1> %mask, <64 x i8> %a, <64 x i8> %b
157209
ret <64 x i8> %res
158210
}

0 commit comments

Comments
 (0)