@@ -1211,3 +1211,59 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
12111211 %out = call <3 x i3 > @llvm.experimental.vector.compress (<3 x i3 > %vec , <3 x i1 > %mask , <3 x i3 > undef )
12121212 ret <3 x i3 > %out
12131213}
1214+
1215+ define <4 x i32 > @test_compress_v4i32_zero_passthru (<4 x i32 > %vec , <4 x i1 > %mask ) {
1216+ ; AVX2-LABEL: test_compress_v4i32_zero_passthru:
1217+ ; AVX2: # %bb.0:
1218+ ; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
1219+ ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
1220+ ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
1221+ ; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
1222+ ; AVX2-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
1223+ ; AVX2-NEXT: vmovd %xmm1, %eax
1224+ ; AVX2-NEXT: andl $1, %eax
1225+ ; AVX2-NEXT: vextractps $1, %xmm0, -24(%rsp,%rax,4)
1226+ ; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
1227+ ; AVX2-NEXT: andl $1, %ecx
1228+ ; AVX2-NEXT: addq %rax, %rcx
1229+ ; AVX2-NEXT: vextractps $2, %xmm0, -24(%rsp,%rcx,4)
1230+ ; AVX2-NEXT: vpextrd $2, %xmm1, %eax
1231+ ; AVX2-NEXT: andl $1, %eax
1232+ ; AVX2-NEXT: addq %rcx, %rax
1233+ ; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
1234+ ; AVX2-NEXT: andl $1, %ecx
1235+ ; AVX2-NEXT: addq %rax, %rcx
1236+ ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
1237+ ; AVX2-NEXT: andl $3, %eax
1238+ ; AVX2-NEXT: vextractps $3, %xmm0, -24(%rsp,%rax,4)
1239+ ; AVX2-NEXT: xorl %eax, %eax
1240+ ; AVX2-NEXT: cmpq $3, %rcx
1241+ ; AVX2-NEXT: movl $3, %edx
1242+ ; AVX2-NEXT: cmovbq %rcx, %rdx
1243+ ; AVX2-NEXT: vextractps $3, %xmm0, %ecx
1244+ ; AVX2-NEXT: cmovbel %eax, %ecx
1245+ ; AVX2-NEXT: movl %ecx, -24(%rsp,%rdx,4)
1246+ ; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1247+ ; AVX2-NEXT: retq
1248+ ;
1249+ ; AVX512F-LABEL: test_compress_v4i32_zero_passthru:
1250+ ; AVX512F: # %bb.0:
1251+ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1252+ ; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
1253+ ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1254+ ; AVX512F-NEXT: kshiftlw $12, %k0, %k0
1255+ ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
1256+ ; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1257+ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1258+ ; AVX512F-NEXT: vzeroupper
1259+ ; AVX512F-NEXT: retq
1260+ ;
1261+ ; AVX512VL-LABEL: test_compress_v4i32_zero_passthru:
1262+ ; AVX512VL: # %bb.0:
1263+ ; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm1
1264+ ; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
1265+ ; AVX512VL-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z}
1266+ ; AVX512VL-NEXT: retq
1267+ %out = call <4 x i32 > @llvm.experimental.vector.compress (<4 x i32 > %vec , <4 x i1 > %mask , <4 x i32 > zeroinitializer )
1268+ ret <4 x i32 > %out
1269+ }
0 commit comments