diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index aefbbe2534be2..18a9ec7c61319 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1289,6 +1289,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); + + // f16 -> i16 conversion intrinsics need custom lowering + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); } else { // when AArch64 doesn't have fullfp16 support, promote the input // to i32 first. @@ -28238,6 +28241,16 @@ void AArch64TargetLowering::ReplaceNodeResults( Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } + case Intrinsic::aarch64_neon_fcvtzs: { + if (VT.getScalarType() != MVT::i16) + return; + + SDLoc DL(N); + auto CVT = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, VT, N->getOperand(1), + DAG.getValueType(MVT::i16)); + Results.push_back(CVT); + return; + } } } case ISD::READ_REGISTER: { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index ee34a85a5b507..300a4ee0beeac 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2278,6 +2278,23 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { } return false; } + case TargetOpcode::G_INTRINSIC: { + unsigned IntrinID = cast(I).getIntrinsicID(); + switch (IntrinID) { + default: + break; + case Intrinsic::aarch64_neon_fcvtzs: { + const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + if (DstTy != LLT::scalar(16)) + return false; + // Remove the no longer needed intrinsic ID operand + I.removeOperand(1); + I.setDesc(TII.get(TargetOpcode::G_FPTOSI_SAT)); + return true; + } + } + return false; + } default: return false; } diff --git a/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll new file mode 100644 index 0000000000000..955ee2e4b319f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp16_s16_intrinsic_scalar.ll @@ -0,0 +1,20 @@ +; Test fp16 -> s16 conversion intrinsics which require special handling to ensure correct behaviour. +; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK-SD + +declare i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half) + +define i16 @fcvtzs_intrinsic_i16(half %a) { +; CHECK-SD-LABEL: fcvtzs_intrinsic_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvtzs w8, h0 +; CHECK-SD-NEXT: mov w9, #32767 +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-32768 +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 +; CHECK-SD-NEXT: csel +; CHECK-SD-NEXT: ret +entry: + %fcvt = tail call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a) + ret i16 %fcvt +}