Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ static bool isMergePassthruOpcode(unsigned Opc) {
case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::FCVTX_MERGE_PASSTHRU:
case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
case AArch64ISD::FSQRT_MERGE_PASSTHRU:
Expand Down Expand Up @@ -2622,6 +2623,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTX_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
Expand Down Expand Up @@ -4363,6 +4365,19 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
// Set the quiet bit.
if (!DAG.isKnownNeverSNaN(SrcVal))
NaN = DAG.getNode(ISD::OR, DL, I32, Narrow, ImmV(0x400000));
} else if (SrcVT == MVT::nxv2f64 &&
(Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
// Round to float without introducing rounding errors and try again.
SDValue Pg = getPredicateForVector(DAG, DL, MVT::nxv2f32);
Narrow = DAG.getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU, DL, MVT::nxv2f32,
Pg, SrcVal, DAG.getUNDEF(MVT::nxv2f32));

SmallVector<SDValue, 3> NewOps;
if (IsStrict)
NewOps.push_back(Op.getOperand(0));
NewOps.push_back(Narrow);
NewOps.push_back(Op.getOperand(IsStrict ? 2 : 1));
return DAG.getNode(Op.getOpcode(), DL, VT, NewOps, Op->getFlags());
} else
return SDValue();

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ enum NodeType : unsigned {
FP_EXTEND_MERGE_PASSTHRU,
UINT_TO_FP_MERGE_PASSTHRU,
SINT_TO_FP_MERGE_PASSTHRU,
FCVTX_MERGE_PASSTHRU,
FCVTZU_MERGE_PASSTHRU,
FCVTZS_MERGE_PASSTHRU,
SIGN_EXTEND_INREG_MERGE_PASSTHRU,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64
def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64fcvtx_mt : SDNode<"AArch64ISD::FCVTX_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;

Expand Down Expand Up @@ -3779,7 +3780,7 @@ let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in {
let Predicates = [HasSVE2orSME] in {
// SVE2 floating-point convert precision
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">;
defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">;
defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx", AArch64fcvtx_mt>;
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">;
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">;

Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,14 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
}

/// Returns true if the target has access to either the full range of SVE instructions,
/// or the streaming-compatible subset of SVE instructions.
/// Returns true if the target has access to the streaming-compatible subset
/// of SVE instructions.
bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }

/// Returns true if the target has access to either the full range of SVE
/// instructions, or the streaming-compatible subset of SVE instructions.
bool isSVEorStreamingSVEAvailable() const {
return hasSVE() || (hasSME() && isStreaming());
return hasSVE() || isStreamingSVEAvailable();
}

unsigned getMinVectorRegisterBitWidth() const {
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -3059,9 +3059,11 @@ multiclass sve2_fp_un_pred_zeroing_hsd<SDPatternOperator op> {
def : SVE_1_Op_PassthruZero_Pat<nxv2i64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_ZERO)>;
}

multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
multiclass sve2_fp_convert_down_odd_rounding<string asm, string op, SDPatternOperator ir_op = null_frag> {
def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>;

def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}

//===----------------------------------------------------------------------===//
Expand Down
201 changes: 201 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16
; RUN: llc -mattr=+sve2 --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=NOBF16NNAN
; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
; NOBF16: // %bb.0:
; NOBF16-NEXT: ptrue p0.d
; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16-NEXT: lsr z2.s, z0.s, #16
; NOBF16-NEXT: add z1.s, z0.s, z1.s
; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; NOBF16-NEXT: orr z0.s, z0.s, #0x400000
; NOBF16-NEXT: and z2.s, z2.s, #0x1
; NOBF16-NEXT: add z1.s, z2.s, z1.s
; NOBF16-NEXT: sel z0.s, p0, z0.s, z1.s
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
; NOBF16NNAN-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
; NOBF16NNAN: // %bb.0:
; NOBF16NNAN-NEXT: ptrue p0.d
; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
; NOBF16NNAN-NEXT: ret
;
; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
; BF16-NEXT: ret
%res = fptrunc <vscale x 2 x double> %a to <vscale x 2 x bfloat>
ret <vscale x 2 x bfloat> %res
}

define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; NOBF16: // %bb.0:
; NOBF16-NEXT: ptrue p0.d
; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff
; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16-NEXT: lsr z3.s, z1.s, #16
; NOBF16-NEXT: lsr z4.s, z0.s, #16
; NOBF16-NEXT: add z5.s, z1.s, z2.s
; NOBF16-NEXT: add z2.s, z0.s, z2.s
; NOBF16-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; NOBF16-NEXT: orr z1.s, z1.s, #0x400000
; NOBF16-NEXT: orr z0.s, z0.s, #0x400000
; NOBF16-NEXT: and z3.s, z3.s, #0x1
; NOBF16-NEXT: and z4.s, z4.s, #0x1
; NOBF16-NEXT: add z3.s, z3.s, z5.s
; NOBF16-NEXT: add z2.s, z4.s, z2.s
; NOBF16-NEXT: sel z1.s, p1, z1.s, z3.s
; NOBF16-NEXT: sel z0.s, p0, z0.s, z2.s
; NOBF16-NEXT: lsr z1.s, z1.s, #16
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16-NEXT: ret
;
; NOBF16NNAN-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; NOBF16NNAN: // %bb.0:
; NOBF16NNAN-NEXT: ptrue p0.d
; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff
; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16
; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16
; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s
; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s
; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1
; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1
; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s
; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s
; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16NNAN-NEXT: ret
;
; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
; BF16-NEXT: ret
%res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %res
}

define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; NOBF16: // %bb.0:
; NOBF16-NEXT: ptrue p0.d
; NOBF16-NEXT: mov z4.s, #32767 // =0x7fff
; NOBF16-NEXT: fcvtx z3.s, p0/m, z3.d
; NOBF16-NEXT: fcvtx z2.s, p0/m, z2.d
; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16-NEXT: lsr z5.s, z3.s, #16
; NOBF16-NEXT: lsr z6.s, z2.s, #16
; NOBF16-NEXT: lsr z7.s, z1.s, #16
; NOBF16-NEXT: lsr z24.s, z0.s, #16
; NOBF16-NEXT: add z25.s, z3.s, z4.s
; NOBF16-NEXT: add z26.s, z2.s, z4.s
; NOBF16-NEXT: add z27.s, z1.s, z4.s
; NOBF16-NEXT: add z4.s, z0.s, z4.s
; NOBF16-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s
; NOBF16-NEXT: and z5.s, z5.s, #0x1
; NOBF16-NEXT: and z6.s, z6.s, #0x1
; NOBF16-NEXT: and z7.s, z7.s, #0x1
; NOBF16-NEXT: and z24.s, z24.s, #0x1
; NOBF16-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s
; NOBF16-NEXT: fcmuo p3.s, p0/z, z1.s, z1.s
; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
; NOBF16-NEXT: orr z3.s, z3.s, #0x400000
; NOBF16-NEXT: orr z2.s, z2.s, #0x400000
; NOBF16-NEXT: add z5.s, z5.s, z25.s
; NOBF16-NEXT: add z6.s, z6.s, z26.s
; NOBF16-NEXT: add z7.s, z7.s, z27.s
; NOBF16-NEXT: add z4.s, z24.s, z4.s
; NOBF16-NEXT: orr z1.s, z1.s, #0x400000
; NOBF16-NEXT: orr z0.s, z0.s, #0x400000
; NOBF16-NEXT: sel z3.s, p1, z3.s, z5.s
; NOBF16-NEXT: sel z2.s, p2, z2.s, z6.s
; NOBF16-NEXT: sel z1.s, p3, z1.s, z7.s
; NOBF16-NEXT: sel z0.s, p0, z0.s, z4.s
; NOBF16-NEXT: lsr z3.s, z3.s, #16
; NOBF16-NEXT: lsr z2.s, z2.s, #16
; NOBF16-NEXT: lsr z1.s, z1.s, #16
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: uzp1 z2.s, z2.s, z3.s
; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h
; NOBF16-NEXT: ret
;
; NOBF16NNAN-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; NOBF16NNAN: // %bb.0:
; NOBF16NNAN-NEXT: ptrue p0.d
; NOBF16NNAN-NEXT: mov z4.s, #32767 // =0x7fff
; NOBF16NNAN-NEXT: fcvtx z3.s, p0/m, z3.d
; NOBF16NNAN-NEXT: fcvtx z2.s, p0/m, z2.d
; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
; NOBF16NNAN-NEXT: lsr z5.s, z3.s, #16
; NOBF16NNAN-NEXT: lsr z6.s, z2.s, #16
; NOBF16NNAN-NEXT: lsr z7.s, z1.s, #16
; NOBF16NNAN-NEXT: lsr z24.s, z0.s, #16
; NOBF16NNAN-NEXT: add z3.s, z3.s, z4.s
; NOBF16NNAN-NEXT: add z2.s, z2.s, z4.s
; NOBF16NNAN-NEXT: add z1.s, z1.s, z4.s
; NOBF16NNAN-NEXT: add z0.s, z0.s, z4.s
; NOBF16NNAN-NEXT: and z5.s, z5.s, #0x1
; NOBF16NNAN-NEXT: and z6.s, z6.s, #0x1
; NOBF16NNAN-NEXT: and z7.s, z7.s, #0x1
; NOBF16NNAN-NEXT: and z24.s, z24.s, #0x1
; NOBF16NNAN-NEXT: add z3.s, z5.s, z3.s
; NOBF16NNAN-NEXT: add z2.s, z6.s, z2.s
; NOBF16NNAN-NEXT: add z1.s, z7.s, z1.s
; NOBF16NNAN-NEXT: add z0.s, z24.s, z0.s
; NOBF16NNAN-NEXT: lsr z3.s, z3.s, #16
; NOBF16NNAN-NEXT: lsr z2.s, z2.s, #16
; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
; NOBF16NNAN-NEXT: uzp1 z2.s, z2.s, z3.s
; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z2.h
; NOBF16NNAN-NEXT: ret
;
; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
; BF16-NEXT: fcvtx z3.s, p0/m, z3.d
; BF16-NEXT: fcvtx z2.s, p0/m, z2.d
; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
; BF16-NEXT: bfcvt z3.h, p0/m, z3.s
; BF16-NEXT: bfcvt z2.h, p0/m, z2.s
; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
; BF16-NEXT: uzp1 z2.s, z2.s, z3.s
; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
; BF16-NEXT: uzp1 z0.h, z0.h, z2.h
; BF16-NEXT: ret
%res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %res
}
Loading