From 798189e004badbcdcab907663cf7831afe06d575 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Thu, 5 Jun 2025 20:18:12 +0800 Subject: [PATCH] [X86][FP16] Winden UI2FP for FP16 when VLX not enabled Fixes: https://godbolt.org/z/5vc8oMhxz --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +++++-- llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll | 35 ++++++++++++-------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9cff3d76913ab..760119bc62604 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20361,10 +20361,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL, if (VT == MVT::v8f64) return Op; - assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && + assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64 || + VT == MVT::v8f16) && "Unexpected VT!"); - MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; - MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; + MVT WideVT = VT == MVT::v8f16 ? MVT::v16f16 : MVT::v16f32; + MVT WideIntVT = MVT::v16i32; + if (VT == MVT::v4f64) { + WideVT = MVT::v8f64; + WideIntVT = MVT::v8i32; + } + // Need to concat with zero vector for strict fp to avoid spurious // exceptions. SDValue Tmp = diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll index 38c833ee89bc0..26947b5eb3022 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll @@ -8,8 +8,8 @@ define <2 x half> @vector_sint64ToHalf(<2 x i64> %int64) { ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %fp16 = sitofp <2 x i64> %int64 to <2 x half> - ret <2 x half> %fp16 + %fp16 = sitofp <2 x i64> %int64 to <2 x half> + ret <2 x half> %fp16 } define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) { @@ -27,8 +27,8 @@ define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) { ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; CHECK-NEXT: retq - %fp16 = sitofp <4 x i32> %int32 to <4 x half> - ret <4 x half> %fp16 + %fp16 = sitofp <4 x i32> %int32 to <4 x half> + ret <4 x half> %fp16 } define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) { @@ -66,8 +66,8 @@ define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) { ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq - %fp16 = sitofp <8 x i16> %int16 to <8 x half> - ret <8 x half> %fp16 + %fp16 = sitofp <8 x i16> %int16 to <8 x half> + ret <8 x half> %fp16 } define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) { @@ -77,14 +77,21 @@ define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) { ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %fp16 = uitofp <2 x i64> %int64 to <2 x half> - ret <2 x half> %fp16 + %fp16 = uitofp <2 x i64> %int64 to <2 x half> + ret <2 x half> %fp16 } -; define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) { -; %fp16 = uitofp <4 x i32> %int32 to <4 x half> -; ret <4 x half> %fp16 -; } +define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) { +; CHECK-LABEL: vector_uint32ToHalf: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %fp16 = uitofp <4 x i32> %int32 to <4 x half> + ret <4 x half> %fp16 +} define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) { ; CHECK-LABEL: vector_uint16ToHalf: @@ -113,6 +120,6 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) { ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq - %fp16 = uitofp <8 x i16> %int16 to <8 x half> - ret <8 x half> %fp16 + %fp16 = uitofp <8 x i16> %int16 to <8 x half> + ret <8 x half> %fp16 }