Skip to content

Commit 496b174

Browse files
goldsteinntru
authored andcommitted
[X86] Add tests for incorrectly optimizing out shuffle used in movmsk; PR67287
(cherry picked from commit 65a576e)
1 parent f50c638 commit 496b174

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

llvm/test/CodeGen/X86/movmsk-cmp.ll

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4454,3 +4454,140 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
44544454
%r = select i1 %u, i32 42, i32 99
44554455
ret i32 %r
44564456
}
4457+
4458+
define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
4459+
; SSE2-LABEL: pr67287:
4460+
; SSE2: # %bb.0: # %entry
4461+
; SSE2-NEXT: movl $3, %eax
4462+
; SSE2-NEXT: testl %eax, %eax
4463+
; SSE2-NEXT: jne .LBB97_2
4464+
; SSE2-NEXT: # %bb.1: # %entry
4465+
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4466+
; SSE2-NEXT: pxor %xmm1, %xmm1
4467+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
4468+
; SSE2-NEXT: movd %xmm1, %eax
4469+
; SSE2-NEXT: testb $1, %al
4470+
; SSE2-NEXT: jne .LBB97_2
4471+
; SSE2-NEXT: # %bb.3: # %middle.block
4472+
; SSE2-NEXT: xorl %eax, %eax
4473+
; SSE2-NEXT: retq
4474+
; SSE2-NEXT: .LBB97_2:
4475+
; SSE2-NEXT: movw $0, 0
4476+
; SSE2-NEXT: xorl %eax, %eax
4477+
; SSE2-NEXT: retq
4478+
;
4479+
; SSE41-LABEL: pr67287:
4480+
; SSE41: # %bb.0: # %entry
4481+
; SSE41-NEXT: pxor %xmm1, %xmm1
4482+
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4483+
; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
4484+
; SSE41-NEXT: movmskpd %xmm0, %eax
4485+
; SSE41-NEXT: testl %eax, %eax
4486+
; SSE41-NEXT: jne .LBB97_2
4487+
; SSE41-NEXT: # %bb.1: # %entry
4488+
; SSE41-NEXT: movd %xmm0, %eax
4489+
; SSE41-NEXT: testb $1, %al
4490+
; SSE41-NEXT: jne .LBB97_2
4491+
; SSE41-NEXT: # %bb.3: # %middle.block
4492+
; SSE41-NEXT: xorl %eax, %eax
4493+
; SSE41-NEXT: retq
4494+
; SSE41-NEXT: .LBB97_2:
4495+
; SSE41-NEXT: movw $0, 0
4496+
; SSE41-NEXT: xorl %eax, %eax
4497+
; SSE41-NEXT: retq
4498+
;
4499+
; AVX1-LABEL: pr67287:
4500+
; AVX1: # %bb.0: # %entry
4501+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4502+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
4503+
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4504+
; AVX1-NEXT: vtestpd %xmm0, %xmm0
4505+
; AVX1-NEXT: jne .LBB97_2
4506+
; AVX1-NEXT: # %bb.1: # %entry
4507+
; AVX1-NEXT: vmovd %xmm0, %eax
4508+
; AVX1-NEXT: testb $1, %al
4509+
; AVX1-NEXT: jne .LBB97_2
4510+
; AVX1-NEXT: # %bb.3: # %middle.block
4511+
; AVX1-NEXT: xorl %eax, %eax
4512+
; AVX1-NEXT: retq
4513+
; AVX1-NEXT: .LBB97_2:
4514+
; AVX1-NEXT: movw $0, 0
4515+
; AVX1-NEXT: xorl %eax, %eax
4516+
; AVX1-NEXT: retq
4517+
;
4518+
; AVX2-LABEL: pr67287:
4519+
; AVX2: # %bb.0: # %entry
4520+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
4521+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4522+
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4523+
; AVX2-NEXT: vtestpd %xmm0, %xmm0
4524+
; AVX2-NEXT: jne .LBB97_2
4525+
; AVX2-NEXT: # %bb.1: # %entry
4526+
; AVX2-NEXT: vmovd %xmm0, %eax
4527+
; AVX2-NEXT: testb $1, %al
4528+
; AVX2-NEXT: jne .LBB97_2
4529+
; AVX2-NEXT: # %bb.3: # %middle.block
4530+
; AVX2-NEXT: xorl %eax, %eax
4531+
; AVX2-NEXT: retq
4532+
; AVX2-NEXT: .LBB97_2:
4533+
; AVX2-NEXT: movw $0, 0
4534+
; AVX2-NEXT: xorl %eax, %eax
4535+
; AVX2-NEXT: retq
4536+
;
4537+
; KNL-LABEL: pr67287:
4538+
; KNL: # %bb.0: # %entry
4539+
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4540+
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4541+
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
4542+
; KNL-NEXT: kmovw %k0, %eax
4543+
; KNL-NEXT: testb $3, %al
4544+
; KNL-NEXT: jne .LBB97_2
4545+
; KNL-NEXT: # %bb.1: # %entry
4546+
; KNL-NEXT: kmovw %k0, %eax
4547+
; KNL-NEXT: testb $1, %al
4548+
; KNL-NEXT: jne .LBB97_2
4549+
; KNL-NEXT: # %bb.3: # %middle.block
4550+
; KNL-NEXT: xorl %eax, %eax
4551+
; KNL-NEXT: vzeroupper
4552+
; KNL-NEXT: retq
4553+
; KNL-NEXT: .LBB97_2:
4554+
; KNL-NEXT: movw $0, 0
4555+
; KNL-NEXT: xorl %eax, %eax
4556+
; KNL-NEXT: vzeroupper
4557+
; KNL-NEXT: retq
4558+
;
4559+
; SKX-LABEL: pr67287:
4560+
; SKX: # %bb.0: # %entry
4561+
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
4562+
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
4563+
; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
4564+
; SKX-NEXT: kortestb %k0, %k0
4565+
; SKX-NEXT: jne .LBB97_2
4566+
; SKX-NEXT: # %bb.1: # %entry
4567+
; SKX-NEXT: kmovd %k0, %eax
4568+
; SKX-NEXT: testb $1, %al
4569+
; SKX-NEXT: jne .LBB97_2
4570+
; SKX-NEXT: # %bb.3: # %middle.block
4571+
; SKX-NEXT: xorl %eax, %eax
4572+
; SKX-NEXT: retq
4573+
; SKX-NEXT: .LBB97_2:
4574+
; SKX-NEXT: movw $0, 0
4575+
; SKX-NEXT: xorl %eax, %eax
4576+
; SKX-NEXT: retq
4577+
entry:
4578+
%0 = and <2 x i64> %broadcast.splatinsert25, <i64 4294967295, i64 4294967295>
4579+
%1 = icmp eq <2 x i64> %0, zeroinitializer
4580+
%shift = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 poison>
4581+
%2 = or <2 x i1> %1, %shift
4582+
%3 = extractelement <2 x i1> %2, i64 0
4583+
%4 = extractelement <2 x i1> %1, i64 0
4584+
%5 = or i1 %3, %4
4585+
br i1 %5, label %6, label %middle.block
4586+
4587+
6: ; preds = %entry
4588+
store i16 0, ptr null, align 2
4589+
br label %middle.block
4590+
4591+
middle.block: ; preds = %6, %entry
4592+
ret i32 0
4593+
}

0 commit comments

Comments
 (0)