From d39e8640cec12e8f8a956dbd0551d7c465fee000 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 28 Feb 2025 13:31:52 +0000 Subject: [PATCH] [LLVM][SVE] Implement isel for fptoi half/float/double to i1. Also adds an assert that SVE support for strict_fp fp<->int operations is missing. The added costs are to maintain the existing values expected by Analysis/CostModel/AArch64/sve-cast.ll. NOTE: This PR omits bfloat support because it is broken for all result types. This will be fixed in a follow-up PR. --- .../Target/AArch64/AArch64ISelLowering.cpp | 16 ++ .../AArch64/AArch64TargetTransformInfo.cpp | 6 + llvm/test/CodeGen/AArch64/sve-fcvt.ll | 228 ++++++++++++++++++ 3 files changed, 250 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7a471662ea075..aab55bf725d98 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1567,6 +1567,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // There are no legal MVT::nxv16f## based types. if (VT != MVT::nxv16i1) { + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); } @@ -4726,7 +4728,18 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType(); EVT VT = Op.getValueType(); + assert(!(IsStrict && VT.isScalableVector()) && + "Unimplemented SVE support for STRICT_FP_to_INT!"); + if (VT.isScalableVector()) { + if (VT.getVectorElementType() == MVT::i1) { + SDLoc DL(Op); + EVT CvtVT = getPromotedVTForPredicate(VT); + SDValue Cvt = DAG.getNode(Op.getOpcode(), DL, CvtVT, Op.getOperand(0)); + SDValue Zero = DAG.getConstant(0, DL, CvtVT); + return DAG.getSetCC(DL, VT, Cvt, Zero, ISD::SETNE); + } + unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT ? AArch64ISD::FCVTZU_MERGE_PASSTHRU : AArch64ISD::FCVTZS_MERGE_PASSTHRU; @@ -5032,6 +5045,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, unsigned Opc = Op.getOpcode(); bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; + assert(!(IsStrict && VT.isScalableVector()) && + "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!"); + if (VT.isScalableVector()) { if (InVT.getVectorElementType() == MVT::i1) { // We can't directly extend an SVE predicate; extend it first. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 1008be32e5bfa..670bb98988297 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3017,20 +3017,24 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 1}, {ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 1}, {ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 1}, + {ISD::FP_TO_SINT, MVT::nxv2i1, MVT::nxv2f64, 1}, {ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1}, {ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 1}, {ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 1}, {ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 1}, + {ISD::FP_TO_UINT, MVT::nxv2i1, MVT::nxv2f64, 1}, // Complex, from nxv4f32. {ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f32, 4}, {ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1}, {ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 1}, {ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 1}, + {ISD::FP_TO_SINT, MVT::nxv4i1, MVT::nxv4f32, 1}, {ISD::FP_TO_UINT, MVT::nxv4i64, MVT::nxv4f32, 4}, {ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1}, {ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 1}, {ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 1}, + {ISD::FP_TO_UINT, MVT::nxv4i1, MVT::nxv4f32, 1}, // Complex, from nxv8f64. Illegal -> illegal conversions not required. {ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 7}, @@ -3057,10 +3061,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f16, 4}, {ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f16, 1}, {ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f16, 1}, + {ISD::FP_TO_SINT, MVT::nxv8i1, MVT::nxv8f16, 1}, {ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f16, 10}, {ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f16, 4}, {ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f16, 1}, {ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f16, 1}, + {ISD::FP_TO_UINT, MVT::nxv8i1, MVT::nxv8f16, 1}, // Complex, from nxv4f16. {ISD::FP_TO_SINT, MVT::nxv4i64, MVT::nxv4f16, 4}, diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll index fc5128fffad36..1d000e4fd5d53 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -113,6 +113,120 @@ define @fcvts_nxv2f64( %a) { ; FP_TO_SINT ; +define @fcvtzs_nxv2f16_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzs_nxv2f16_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv2f32_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzs_nxv2f32_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv2f64_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzs_nxv2f64_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv4f16_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzs_nxv4f16_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv4f32_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzs_nxv4f32_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv4f64_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzs_nxv4f64_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv8f16_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzs_nxv8f16_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv8f32_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzs_nxv8f32_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + +define @fcvtzs_nxv8f64_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzs_nxv8f64_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + define @fcvtzs_h_nxv2f16( %a) { ; CHECK-LABEL: fcvtzs_h_nxv2f16: ; CHECK: // %bb.0: @@ -277,6 +391,120 @@ define @fcvtzs_d_nxv2f64( %a) { ; FP_TO_UINT ; +define @fcvtzu_nxv2f16_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzu_nxv2f16_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv2f32_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzu_nxv2f32_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv2f64_to_nxv2i1( %a) { +; CHECK-LABEL: fcvtzu_nxv2f64_to_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv4f16_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzu_nxv4f16_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv4f32_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzu_nxv4f32_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv4f64_to_nxv4i1( %a) { +; CHECK-LABEL: fcvtzu_nxv4f64_to_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv8f16_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzu_nxv8f16_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv8f32_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzu_nxv8f32_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_nxv8f64_to_nxv8i1( %a) { +; CHECK-LABEL: fcvtzu_nxv8f64_to_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d +; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: uzp1 p0.s, p0.s, p3.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + ; NOTE: Using fcvtzs is safe as fptoui overflow is considered poison and a ; 64bit signed value encompasses the entire range of a 16bit unsigned value define @fcvtzu_h_nxv2f16( %a) {