Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,7 @@ to ``float``; see below for more information on this emulation.
* 64-bit ARM (AArch64)
* RISC-V
* X86 (when SSE2 is available)
* LoongArch

(For X86, SSE2 is available on 64-bit and all recent 32-bit processors.)

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Targets/LoongArch.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
HasFeatureLD_SEQ_SA = false;
HasFeatureDiv32 = false;
HasFeatureSCQ = false;
BFloat16Width = 16;
BFloat16Align = 16;
BFloat16Format = &llvm::APFloat::BFloat();
LongDoubleWidth = 128;
LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::IEEEquad();
Expand Down Expand Up @@ -99,6 +102,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {

bool hasBitIntType() const override { return true; }

bool hasBFloat16Type() const override { return true; }

bool useFP16ConversionIntrinsics() const override { return false; }

bool handleTargetFeatures(std::vector<std::string> &Features,
Expand Down
532 changes: 532 additions & 0 deletions clang/test/CodeGen/LoongArch/bfloat-abi.c

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions clang/test/CodeGen/LoongArch/bfloat-mangle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple loongarch32 -emit-llvm -o - %s | FileCheck %s

// CHECK-LABEL: define dso_local void @_Z3fooDF16b
// CHECK-SAME: (bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: ret void
//
void foo(__bf16 b) {}
54 changes: 50 additions & 4 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasBasicF()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);

setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
Expand All @@ -203,6 +205,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);

if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f32, Legal);
Expand All @@ -221,6 +226,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasBasicD()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
Expand All @@ -243,6 +250,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);

if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f64, Legal);
Expand Down Expand Up @@ -497,6 +507,10 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerFP_TO_FP16(Op, DAG);
case ISD::FP16_TO_FP:
return lowerFP16_TO_FP(Op, DAG);
case ISD::FP_TO_BF16:
return lowerFP_TO_BF16(Op, DAG);
case ISD::BF16_TO_FP:
return lowerBF16_TO_FP(Op, DAG);
}
return SDValue();
}
Expand Down Expand Up @@ -2283,6 +2297,36 @@ SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
return Res;
}

SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
SDLoc DL(Op);
MakeLibCallOptions CallOptions;
RTLIB::Libcall LC =
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
if (Subtarget.is64Bit())
return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}

SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
Op = DAG.getNode(
ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
DL, MVT::f32, Op)
: DAG.getBitcast(MVT::f32, Op);
if (VT != MVT::f32)
return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
return Res;
}

static bool isConstantOrUndef(const SDValue Op) {
if (Op->isUndef())
return true;
Expand Down Expand Up @@ -7714,8 +7758,9 @@ bool LoongArchTargetLowering::splitValueIntoRegisterParts(
bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();

if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
// Cast the f16 to i16, extend to i32, pad with ones to make a float
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
// nan, and cast to f32.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
Expand All @@ -7734,10 +7779,11 @@ SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();

if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
SDValue Val = Parts[0];

// Cast the f32 to i32, truncate to i16, and cast back to f16.
// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const;

bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
Expand Down
172 changes: 172 additions & 0 deletions llvm/test/CodeGen/LoongArch/bf16-promote.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=loongarch64 -mattr=+d -target-abi=lp64d < %s | FileCheck --check-prefixes=CHECK,LA64 %s
; RUN: llc -mtriple=loongarch32 -mattr=+d -target-abi=ilp32d < %s | FileCheck --check-prefixes=CHECK,LA32 %s

define void @test_load_store(ptr %p, ptr %q) nounwind {
; CHECK-LABEL: test_load_store:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: ret
%a = load bfloat, ptr %p
store bfloat %a, ptr %q
ret void
}

define float @test_fpextend_float(ptr %p) nounwind {
; LA64-LABEL: test_fpextend_float:
; LA64: # %bb.0:
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa0, $a0
; LA64-NEXT: ret
;
; LA32-LABEL: test_fpextend_float:
; LA32: # %bb.0:
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa0, $a0
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%r = fpext bfloat %a to float
ret float %r
}

define double @test_fpextend_double(ptr %p) nounwind {
; LA64-LABEL: test_fpextend_double:
; LA64: # %bb.0:
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa0, $a0
; LA64-NEXT: fcvt.d.s $fa0, $fa0
; LA64-NEXT: ret
;
; LA32-LABEL: test_fpextend_double:
; LA32: # %bb.0:
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa0, $a0
; LA32-NEXT: fcvt.d.s $fa0, $fa0
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%r = fpext bfloat %a to double
ret double %r
}

define void @test_fptrunc_float(float %f, ptr %p) nounwind {
; LA64-LABEL: test_fptrunc_float:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fptrunc_float:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: bl __truncsfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = fptrunc float %f to bfloat
store bfloat %a, ptr %p
ret void
}

define void @test_fptrunc_double(double %d, ptr %p) nounwind {
; LA64-LABEL: test_fptrunc_double:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncdfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fptrunc_double:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: bl __truncdfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = fptrunc double %d to bfloat
store bfloat %a, ptr %p
ret void
}

define void @test_fadd(ptr %p, ptr %q) nounwind {
; LA64-LABEL: test_fadd:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: ld.hu $a1, $a1, 0
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a1, $a1, 16
; LA64-NEXT: movgr2fr.w $fa0, $a1
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa1, $a0
; LA64-NEXT: fadd.s $fa0, $fa1, $fa0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fadd:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: ld.hu $a1, $a1, 0
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a1, $a1, 16
; LA32-NEXT: movgr2fr.w $fa0, $a1
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa1, $a0
; LA32-NEXT: fadd.s $fa0, $fa1, $fa0
; LA32-NEXT: bl __truncsfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%b = load bfloat, ptr %q
%r = fadd bfloat %a, %b
store bfloat %r, ptr %p
ret void
}
Loading
Loading