Skip to content

Commit d54f68e

Browse files
committed
[NVPTX] Fix PTX and SM conditions for narrow FP conversions
This change fixes the PTX and SM conditions for narrow FP conversion intrinsics. It also adds the `AnyPred` helper class to make it easier to combine multiple predicates with OR.
1 parent 35a95fe commit d54f68e

File tree

2 files changed

+40
-28
lines changed

2 files changed

+40
-28
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ def PrmtMode : Operand<i32> {
101101
// NVPTX Instruction Predicate Definitions
102102
//===----------------------------------------------------------------------===//
103103

104+
// AnyPred - helper class to create an OR condition between multiple predicates.
105+
class AnyPred<list<Predicate> predicates> : Predicate<""> {
106+
let CondString = !foldl("false", predicates, acc, pred,
107+
acc # " || (" # pred.CondString # ")");
108+
}
109+
104110
// Checks PTX version and family-specific and architecture-specific SM versions.
105111
// For example, sm_100{f/a} and any future variants in the same family will match
106112
// for any PTX version greater than or equal to `PTXVersion`.

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,34 +2008,40 @@ def : Pat<(int_nvvm_ull2d_rp i64:$a), (CVT_f64_u64 $a, CvtRP)>;
20082008
def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), (CVT_f16_f32 $a, CvtRN_FTZ)>;
20092009
def : Pat<(int_nvvm_f2h_rn f32:$a), (CVT_f16_f32 $a, CvtRN)>;
20102010

2011-
def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2012-
(CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2013-
def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2014-
(CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2015-
def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2016-
(CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2017-
def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2018-
(CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2019-
2020-
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2021-
(CVT_e4m3x2_f16x2 $a, CvtRN)>;
2022-
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2023-
(CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2024-
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2025-
(CVT_e5m2x2_f16x2 $a, CvtRN)>;
2026-
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2027-
(CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2028-
2029-
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2030-
(CVT_f16x2_e4m3x2 $a, CvtRN)>;
2031-
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2032-
(CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2033-
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2034-
(CVT_f16x2_e5m2x2 $a, CvtRN)>;
2035-
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2036-
(CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2037-
2038-
let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
2011+
let Predicates = [hasPTX<81>, hasSM<89>] in {
2012+
def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
2013+
(CVT_e4m3x2_f32 $a, $b, CvtRN)>;
2014+
def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
2015+
(CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
2016+
def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
2017+
(CVT_e5m2x2_f32 $a, $b, CvtRN)>;
2018+
def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
2019+
(CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
2020+
2021+
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
2022+
(CVT_e4m3x2_f16x2 $a, CvtRN)>;
2023+
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
2024+
(CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
2025+
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
2026+
(CVT_e5m2x2_f16x2 $a, CvtRN)>;
2027+
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
2028+
(CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
2029+
2030+
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
2031+
(CVT_f16x2_e4m3x2 $a, CvtRN)>;
2032+
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
2033+
(CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
2034+
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
2035+
(CVT_f16x2_e5m2x2 $a, CvtRN)>;
2036+
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
2037+
(CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
2038+
}
2039+
2040+
let Predicates = [AnyPred<[
2041+
PTXWithFamilySMs<90, [100, 110, 120]>,
2042+
PTXWithFamilySMs<88, [100, 101, 120]>,
2043+
PTXWithAccelSMs<86, [100, 101, 120]>
2044+
]>] in {
20392045
def : Pat<(int_nvvm_ff_to_e2m3x2_rn_satfinite f32:$a, f32:$b),
20402046
(CVT_e2m3x2_f32_sf $a, $b, CvtRN)>;
20412047
def : Pat<(int_nvvm_ff_to_e2m3x2_rn_relu_satfinite f32:$a, f32:$b),

0 commit comments

Comments
 (0)