Skip to content

Commit 8d8a955

Browse files
committed
[NVPTX] Add mix precision arith intrinsics
This change adds "add" and "sub" mix precision operations.
1 parent 497382e commit 8d8a955

File tree

3 files changed

+602
-0
lines changed

3 files changed

+602
-0
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,48 @@ let TargetPrefix = "nvvm" in {
13061306
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
13071307
[IntrNoMem, IntrSpeculatable, Commutative]>;
13081308

1309+
// Mixed-precision add intrinsics for half and bfloat16 to float
1310+
foreach rnd = ["rn", "rz", "rm", "rp"] in {
1311+
foreach sat = ["", "_sat"] in {
1312+
// Half-precision to float
1313+
def int_nvvm_add_#rnd#sat#_h_f
1314+
: ClangBuiltin<"__nvvm_add_"#rnd#sat#"_h_f">,
1315+
DefaultAttrsIntrinsic<[llvm_float_ty],
1316+
[llvm_half_ty, llvm_float_ty],
1317+
[IntrNoMem, IntrSpeculatable]>;
1318+
1319+
// BFloat16 to float
1320+
def int_nvvm_add_#rnd#sat#_bf_f
1321+
: ClangBuiltin<"__nvvm_add_"#rnd#sat#"_bf_f">,
1322+
DefaultAttrsIntrinsic<[llvm_float_ty],
1323+
[llvm_bfloat_ty, llvm_float_ty],
1324+
[IntrNoMem, IntrSpeculatable]>;
1325+
}
1326+
}
1327+
1328+
//
1329+
// Sub
1330+
//
1331+
1332+
// Mixed-precision subtraction intrinsics for half and bfloat16 to float
1333+
foreach rnd = ["rn", "rz", "rm", "rp"] in {
1334+
foreach sat = ["", "_sat"] in {
1335+
// Half-precision to float
1336+
def int_nvvm_sub_#rnd#sat#_h_f
1337+
: ClangBuiltin<"__nvvm_sub_"#rnd#sat#"_h_f">,
1338+
DefaultAttrsIntrinsic<[llvm_float_ty],
1339+
[llvm_half_ty, llvm_float_ty],
1340+
[IntrNoMem, IntrSpeculatable]>;
1341+
1342+
// BFloat16 to float
1343+
def int_nvvm_sub_#rnd#sat#_bf_f
1344+
: ClangBuiltin<"__nvvm_sub_"#rnd#sat#"_bf_f">,
1345+
DefaultAttrsIntrinsic<[llvm_float_ty],
1346+
[llvm_bfloat_ty, llvm_float_ty],
1347+
[IntrNoMem, IntrSpeculatable]>;
1348+
}
1349+
}
1350+
13091351
//
13101352
// Dot Product
13111353
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,6 +1656,51 @@ def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
16561656
def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
16571657
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
16581658

1659+
// Define mixed-precision add instructions for half and bfloat16 to float
1660+
foreach rnd = ["rn", "rz", "rm", "rp"] in {
1661+
foreach sat = ["", "_sat"] in {
1662+
// Half-precision to float
1663+
def INT_NVVM_ADD_#!toupper(rnd#sat)#_H_F
1664+
: F_MATH_2<"add."#rnd#!subst("_", ".",
1665+
sat)#".f32.f16 \t$dst, $src0, $src1;",
1666+
Float32Regs, Int16Regs, Float32Regs,
1667+
!cast<Intrinsic>("int_nvvm_add_"#rnd#sat#"_h_f"),
1668+
[hasPTX<86>, hasSM<100>]>;
1669+
1670+
// BFloat16 to float
1671+
def INT_NVVM_ADD_#!toupper(rnd#sat)#_BF_F
1672+
: F_MATH_2<"add."#rnd#!subst("_", ".",
1673+
sat)#".f32.bf16 \t$dst, $src0, $src1;",
1674+
Float32Regs, Int16Regs, Float32Regs,
1675+
!cast<Intrinsic>("int_nvvm_add_"#rnd#sat#"_bf_f"),
1676+
[hasPTX<86>, hasSM<100>]>;
1677+
}
1678+
}
1679+
1680+
//
1681+
// Sub
1682+
//
1683+
// Define mixed-precision sub instructions for half and bfloat16 to float
1684+
foreach rnd = ["rn", "rz", "rm", "rp"] in {
1685+
foreach sat = ["", "_sat"] in {
1686+
// Half-precision to float
1687+
def INT_NVVM_SUB_#!toupper(rnd#sat)#_H_F
1688+
: F_MATH_2<"sub."#rnd#!subst("_", ".",
1689+
sat)#".f32.f16 \t$dst, $src0, $src1;",
1690+
Float32Regs, Int16Regs, Float32Regs,
1691+
!cast<Intrinsic>("int_nvvm_sub_"#rnd#sat#"_h_f"),
1692+
[hasPTX<86>, hasSM<100>]>;
1693+
1694+
// BFloat16 to float
1695+
def INT_NVVM_SUB_#!toupper(rnd#sat)#_BF_F
1696+
: F_MATH_2<"sub."#rnd#!subst("_", ".",
1697+
sat)#".f32.bf16 \t$dst, $src0, $src1;",
1698+
Float32Regs, Int16Regs, Float32Regs,
1699+
!cast<Intrinsic>("int_nvvm_sub_"#rnd#sat#"_bf_f"),
1700+
[hasPTX<86>, hasSM<100>]>;
1701+
}
1702+
}
1703+
16591704
//
16601705
// BFIND
16611706
//

0 commit comments

Comments
 (0)