diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5cd6561914364..70f9485c3e5b4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -919,6 +919,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal); } + if (Subtarget->hasCvtPkF16F32Inst()) + setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom); + setTargetDAGCombine({ISD::ADD, ISD::UADDO_CARRY, ISD::SUB, @@ -6899,10 +6902,16 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG, SDValue Op, SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + + if (DstVT == MVT::v2f16) { + assert(Subtarget->hasCvtPkF16F32Inst() && "support v_cvt_pk_f16_f32"); + return SrcVT == MVT::v2f32 ? Op : SDValue(); + } + if (SrcVT.getScalarType() != MVT::f64) return Op; - EVT DstVT = Op.getValueType(); SDLoc DL(Op); if (DstVT == MVT::f16) { // TODO: Handle strictfp diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll similarity index 93% rename from llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll rename to llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll index 5a5e39489d888..e5815e96fbe33 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll @@ -12,6 +12,20 @@ define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) { ret <2 x half> %res } +define half @fptrunc_v2f32_v2f16_then_extract(<2 x float> %src) { +; GFX950-LABEL: fptrunc_v2f32_v2f16_then_extract: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 +; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX950-NEXT: s_setpc_b64 s[30:31] + %vec_half = fptrunc <2 x float> %src to <2 x half> + %first = extractelement <2 x half> %vec_half, i64 1 + %second = extractelement <2 x half> %vec_half, i64 0 + %res = fadd half %first, %second + ret half %res +} + define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) { ; GFX950-SDAG-LABEL: v_test_cvt_v2f64_v2f16: ; GFX950-SDAG: ; %bb.0: