@@ -151,9 +151,6 @@ def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
151151
152152def doMulWide : Predicate<"doMulWide">;
153153
154- def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155- def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
156-
157154def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158155def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
159156
@@ -1119,26 +1116,19 @@ def INEG64 :
11191116//-----------------------------------
11201117
11211118// Constant 1.0f
1122- def FloatConst1 : PatLeaf<(fpimm) , [{
1123- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1124- N->getValueAPF() .convertToFloat() == 1.0f;
1119+ def f32imm_1 : FPImmLeaf<f32 , [{
1120+ return &Imm .getSemantics() == &llvm::APFloat::IEEEsingle() &&
1121+ Imm .convertToFloat() == 1.0f;
11251122}]>;
11261123// Constant 1.0 (double)
1127- def DoubleConst1 : PatLeaf<(fpimm) , [{
1128- return &N->getValueAPF() .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1129- N->getValueAPF() .convertToDouble() == 1.0;
1124+ def f64imm_1 : FPImmLeaf<f64 , [{
1125+ return &Imm .getSemantics() == &llvm::APFloat::IEEEdouble() &&
1126+ Imm .convertToDouble() == 1.0;
11301127}]>;
11311128// Constant -1.0 (double)
1132- def DoubleConstNeg1 : PatLeaf<(fpimm), [{
1133- return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1134- N->getValueAPF().convertToDouble() == -1.0;
1135- }]>;
1136-
1137-
1138- // Constant -X -> X (double)
1139- def NegDoubleConst : SDNodeXForm<fpimm, [{
1140- return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1141- SDLoc(N), MVT::f64);
1129+ def f64imm_neg1 : FPImmLeaf<f64, [{
1130+ return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
1131+ Imm.convertToDouble() == -1.0;
11421132}]>;
11431133
11441134defm FADD : F3_fma_component<"add", fadd>;
@@ -1189,11 +1179,11 @@ def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
11891179//
11901180// F64 division
11911181//
1192- def FDIV641r :
1182+ def FRCP64r :
11931183 NVPTXInst<(outs Float64Regs:$dst),
1194- (ins f64imm:$a, Float64Regs:$b),
1184+ (ins Float64Regs:$b),
11951185 "rcp.rn.f64 \t$dst, $b;",
1196- [(set f64:$dst, (fdiv DoubleConst1:$a , f64:$b))]>;
1186+ [(set f64:$dst, (fdiv f64imm_1 , f64:$b))]>;
11971187def FDIV64rr :
11981188 NVPTXInst<(outs Float64Regs:$dst),
11991189 (ins Float64Regs:$a, Float64Regs:$b),
@@ -1207,109 +1197,114 @@ def FDIV64ri :
12071197
12081198// fdiv will be converted to rcp
12091199// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1210- def : Pat<(fdiv DoubleConstNeg1:$a , f64:$b),
1211- (FNEGf64 (FDIV641r (NegDoubleConst node:$a), $b))>;
1200+ def : Pat<(fdiv f64imm_neg1 , f64:$b),
1201+ (FNEGf64 (FRCP64r $b))>;
12121202
12131203//
12141204// F32 Approximate reciprocal
12151205//
1216- def FDIV321r_ftz :
1206+
1207+ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
1208+ (fdiv node:$a, node:$b), [{
1209+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Approx;
1210+ }]>;
1211+
1212+
1213+ def FRCP32_approx_r_ftz :
12171214 NVPTXInst<(outs Float32Regs:$dst),
1218- (ins f32imm:$a, Float32Regs:$b),
1215+ (ins Float32Regs:$b),
12191216 "rcp.approx.ftz.f32 \t$dst, $b;",
1220- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1221- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1222- def FDIV321r :
1217+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>,
1218+ Requires<[doF32FTZ]>;
1219+ def FRCP32_approx_r :
12231220 NVPTXInst<(outs Float32Regs:$dst),
1224- (ins f32imm:$a, Float32Regs:$b),
1221+ (ins Float32Regs:$b),
12251222 "rcp.approx.f32 \t$dst, $b;",
1226- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1227- Requires<[do_DIVF32_APPROX]>;
1223+ [(set f32:$dst, (fdiv_approx f32imm_1 , f32:$b))]>;
1224+
12281225//
12291226// F32 Approximate division
12301227//
12311228def FDIV32approxrr_ftz :
12321229 NVPTXInst<(outs Float32Regs:$dst),
12331230 (ins Float32Regs:$a, Float32Regs:$b),
12341231 "div.approx.ftz.f32 \t$dst, $a, $b;",
1235- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1236- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1232+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1233+ Requires<[doF32FTZ]>;
12371234def FDIV32approxri_ftz :
12381235 NVPTXInst<(outs Float32Regs:$dst),
12391236 (ins Float32Regs:$a, f32imm:$b),
12401237 "div.approx.ftz.f32 \t$dst, $a, $b;",
1241- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1242- Requires<[do_DIVF32_APPROX, doF32FTZ]>;
1238+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1239+ Requires<[doF32FTZ]>;
12431240def FDIV32approxrr :
12441241 NVPTXInst<(outs Float32Regs:$dst),
12451242 (ins Float32Regs:$a, Float32Regs:$b),
12461243 "div.approx.f32 \t$dst, $a, $b;",
1247- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1248- Requires<[do_DIVF32_APPROX]>;
1244+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
12491245def FDIV32approxri :
12501246 NVPTXInst<(outs Float32Regs:$dst),
12511247 (ins Float32Regs:$a, f32imm:$b),
12521248 "div.approx.f32 \t$dst, $a, $b;",
1253- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1254- Requires<[do_DIVF32_APPROX]>;
1249+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
12551250//
12561251// F32 Semi-accurate reciprocal
12571252//
12581253// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
12591254//
1260- def FDIV321r_approx_ftz :
1261- NVPTXInst<(outs Float32Regs:$dst),
1262- (ins f32imm:$a, Float32Regs:$b),
1263- "rcp.approx.ftz.f32 \t$dst, $b;",
1264- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1265- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1266- def FDIV321r_approx :
1267- NVPTXInst<(outs Float32Regs:$dst),
1268- (ins f32imm:$a, Float32Regs:$b),
1269- "rcp.approx.f32 \t$dst, $b;",
1270- [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>,
1271- Requires<[do_DIVF32_FULL]>;
1255+
1256+ def fdiv_full : PatFrag<(ops node:$a, node:$b),
1257+ (fdiv node:$a, node:$b), [{
1258+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::Full;
1259+ }]>;
1260+
1261+
1262+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1263+ (FRCP32_approx_r_ftz $b)>,
1264+ Requires<[doF32FTZ]>;
1265+
1266+ def : Pat<(fdiv_full f32imm_1, f32:$b),
1267+ (FRCP32_approx_r $b)>;
1268+
12721269//
12731270// F32 Semi-accurate division
12741271//
12751272def FDIV32rr_ftz :
12761273 NVPTXInst<(outs Float32Regs:$dst),
12771274 (ins Float32Regs:$a, Float32Regs:$b),
12781275 "div.full.ftz.f32 \t$dst, $a, $b;",
1279- [(set f32:$dst, (fdiv Float32Regs :$a, f32:$b))]>,
1280- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1276+ [(set f32:$dst, (fdiv_full f32 :$a, f32:$b))]>,
1277+ Requires<[doF32FTZ]>;
12811278def FDIV32ri_ftz :
12821279 NVPTXInst<(outs Float32Regs:$dst),
12831280 (ins Float32Regs:$a, f32imm:$b),
12841281 "div.full.ftz.f32 \t$dst, $a, $b;",
1285- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1286- Requires<[do_DIVF32_FULL, doF32FTZ]>;
1282+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1283+ Requires<[doF32FTZ]>;
12871284def FDIV32rr :
12881285 NVPTXInst<(outs Float32Regs:$dst),
12891286 (ins Float32Regs:$a, Float32Regs:$b),
12901287 "div.full.f32 \t$dst, $a, $b;",
1291- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1292- Requires<[do_DIVF32_FULL]>;
1288+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
12931289def FDIV32ri :
12941290 NVPTXInst<(outs Float32Regs:$dst),
12951291 (ins Float32Regs:$a, f32imm:$b),
12961292 "div.full.f32 \t$dst, $a, $b;",
1297- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1298- Requires<[do_DIVF32_FULL]>;
1293+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
12991294//
13001295// F32 Accurate reciprocal
13011296//
1302- def FDIV321r_prec_ftz :
1297+ def FRCP32r_prec_ftz :
13031298 NVPTXInst<(outs Float32Regs:$dst),
1304- (ins f32imm:$a, Float32Regs:$b),
1299+ (ins Float32Regs:$b),
13051300 "rcp.rn.ftz.f32 \t$dst, $b;",
1306- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>,
1301+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>,
13071302 Requires<[doF32FTZ]>;
1308- def FDIV321r_prec :
1303+ def FRCP32r_prec :
13091304 NVPTXInst<(outs Float32Regs:$dst),
1310- (ins f32imm:$a, Float32Regs:$b),
1305+ (ins Float32Regs:$b),
13111306 "rcp.rn.f32 \t$dst, $b;",
1312- [(set f32:$dst, (fdiv FloatConst1:$a , f32:$b))]>;
1307+ [(set f32:$dst, (fdiv f32imm_1 , f32:$b))]>;
13131308//
13141309// F32 Accurate division
13151310//
0 commit comments