Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21878,9 +21878,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}

In = DAG.getBitcast(MVT::i16, In);
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
getZeroVector(MVT::v4i32, Subtarget, DAG, DL), In,
DAG.getVectorIdxConstant(0, DL));
In = DAG.getBitcast(MVT::v8i16, In);
SDValue Res;
if (IsStrict) {
Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other},
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-insert-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2166,7 +2166,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; KNL-NEXT: vpsrld $16, %xmm0, %xmm0
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
; KNL-NEXT: vucomiss %xmm2, %xmm0
; KNL-NEXT: setb %al
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1443,8 +1443,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
; KNL-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
Expand All @@ -1470,8 +1469,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
;
; AVX512-LABEL: v_test_canonicalize__half:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: movzwl (%rdi), %eax
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
; AVX512-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not zeroing out v8i16[1] element anymore? Just v8i16[2-7].

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, limit it to non-strict FP only.

; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512-NEXT: retq
Expand Down Expand Up @@ -144,12 +144,12 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
; AVX512-NEXT: vmovd %eax, %xmm2
; AVX512-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3,4,5,6,7]
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -228,21 +228,21 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
; AVX512-LABEL: v_test_canonicalize_v2half:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2
; AVX512-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; AVX512-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512-NEXT: vmulss %xmm1, %xmm3, %xmm3
; AVX512-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm3, %xmm3
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
; AVX512-NEXT: vmovd %xmm0, (%rdi)
; AVX512-NEXT: retq
entry:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/fminimum-fmaximum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1854,9 +1854,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: cmovpl %ecx, %r8d
; AVX512-NEXT: movl $0, %r11d
; AVX512-NEXT: cmoval %ecx, %r11d
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
; AVX512-NEXT: vpsrlq $48, %xmm1, %xmm2
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm3
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512-NEXT: vucomiss %xmm2, %xmm3
; AVX512-NEXT: movl $0, %r10d
Expand All @@ -1872,9 +1872,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: cmovpl %ecx, %ebx
; AVX512-NEXT: movl $0, %r14d
; AVX512-NEXT: cmoval %ecx, %r14d
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
; AVX512-NEXT: vpsrld $16, %xmm1, %xmm2
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm3
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512-NEXT: vucomiss %xmm2, %xmm3
; AVX512-NEXT: movl $0, %r15d
Expand Down Expand Up @@ -1916,7 +1916,7 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
; AVX512-NEXT: vpsrld $16, %xmm2, %xmm3
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512-NEXT: vucomiss %xmm4, %xmm3
Expand All @@ -1937,7 +1937,7 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
; AVX512-NEXT: cmovnel %eax, %edx
; AVX512-NEXT: cmovpl %eax, %edx
; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7]
; AVX512-NEXT: vpsrlq $48, %xmm2, %xmm5
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
; AVX512-NEXT: vucomiss %xmm4, %xmm5
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
Expand Down
Loading