@@ -1222,20 +1222,20 @@ def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
12221222// F64 division
12231223//
12241224def FRCP64r :
1225- NVPTXInst <(outs Float64Regs:$dst),
1226- (ins Float64Regs:$b),
1227- "rcp.rn.f64 \t$dst, $b; ",
1228- [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
1225+ BasicNVPTXInst <(outs Float64Regs:$dst),
1226+ (ins Float64Regs:$b),
1227+ "rcp.rn.f64",
1228+ [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
12291229def FDIV64rr :
1230- NVPTXInst <(outs Float64Regs:$dst),
1231- (ins Float64Regs:$a, Float64Regs:$b),
1232- "div.rn.f64 \t$dst, $a, $b; ",
1233- [(set f64:$dst, (fdiv f64:$a, f64:$b))]>;
1230+ BasicNVPTXInst <(outs Float64Regs:$dst),
1231+ (ins Float64Regs:$a, Float64Regs:$b),
1232+ "div.rn.f64",
1233+ [(set f64:$dst, (fdiv f64:$a, f64:$b))]>;
12341234def FDIV64ri :
1235- NVPTXInst <(outs Float64Regs:$dst),
1236- (ins Float64Regs:$a, f64imm:$b),
1237- "div.rn.f64 \t$dst, $a, $b; ",
1238- [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>;
1235+ BasicNVPTXInst <(outs Float64Regs:$dst),
1236+ (ins Float64Regs:$a, f64imm:$b),
1237+ "div.rn.f64",
1238+ [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>;
12391239
12401240// fdiv will be converted to rcp
12411241// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
@@ -1253,42 +1253,42 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
12531253
12541254
12551255def FRCP32_approx_r_ftz :
1256- NVPTXInst <(outs Float32Regs:$dst),
1257- (ins Float32Regs:$b),
1258- "rcp.approx.ftz.f32 \t$dst, $b; ",
1259- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
1260- Requires<[doF32FTZ]>;
1256+ BasicNVPTXInst <(outs Float32Regs:$dst),
1257+ (ins Float32Regs:$b),
1258+ "rcp.approx.ftz.f32",
1259+ [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
1260+ Requires<[doF32FTZ]>;
12611261def FRCP32_approx_r :
1262- NVPTXInst <(outs Float32Regs:$dst),
1263- (ins Float32Regs:$b),
1264- "rcp.approx.f32 \t$dst, $b; ",
1265- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
1262+ BasicNVPTXInst <(outs Float32Regs:$dst),
1263+ (ins Float32Regs:$b),
1264+ "rcp.approx.f32",
1265+ [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
12661266
12671267//
12681268// F32 Approximate division
12691269//
12701270def FDIV32approxrr_ftz :
1271- NVPTXInst <(outs Float32Regs:$dst),
1272- (ins Float32Regs:$a, Float32Regs:$b),
1273- "div.approx.ftz.f32 \t$dst, $a, $b; ",
1274- [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1275- Requires<[doF32FTZ]>;
1271+ BasicNVPTXInst <(outs Float32Regs:$dst),
1272+ (ins Float32Regs:$a, Float32Regs:$b),
1273+ "div.approx.ftz.f32",
1274+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
1275+ Requires<[doF32FTZ]>;
12761276def FDIV32approxri_ftz :
1277- NVPTXInst <(outs Float32Regs:$dst),
1278- (ins Float32Regs:$a, f32imm:$b),
1279- "div.approx.ftz.f32 \t$dst, $a, $b; ",
1280- [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1281- Requires<[doF32FTZ]>;
1277+ BasicNVPTXInst <(outs Float32Regs:$dst),
1278+ (ins Float32Regs:$a, f32imm:$b),
1279+ "div.approx.ftz.f32",
1280+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
1281+ Requires<[doF32FTZ]>;
12821282def FDIV32approxrr :
1283- NVPTXInst <(outs Float32Regs:$dst),
1284- (ins Float32Regs:$a, Float32Regs:$b),
1285- "div.approx.f32 \t$dst, $a, $b; ",
1286- [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1283+ BasicNVPTXInst <(outs Float32Regs:$dst),
1284+ (ins Float32Regs:$a, Float32Regs:$b),
1285+ "div.approx.f32",
1286+ [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
12871287def FDIV32approxri :
1288- NVPTXInst <(outs Float32Regs:$dst),
1289- (ins Float32Regs:$a, f32imm:$b),
1290- "div.approx.f32 \t$dst, $a, $b; ",
1291- [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
1288+ BasicNVPTXInst <(outs Float32Regs:$dst),
1289+ (ins Float32Regs:$a, f32imm:$b),
1290+ "div.approx.f32",
1291+ [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
12921292//
12931293// F32 Semi-accurate reciprocal
12941294//
@@ -1312,66 +1312,72 @@ def : Pat<(fdiv_full f32imm_1, f32:$b),
13121312// F32 Semi-accurate division
13131313//
13141314def FDIV32rr_ftz :
1315- NVPTXInst <(outs Float32Regs:$dst),
1316- (ins Float32Regs:$a, Float32Regs:$b),
1317- "div.full.ftz.f32 \t$dst, $a, $b; ",
1318- [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
1319- Requires<[doF32FTZ]>;
1315+ BasicNVPTXInst <(outs Float32Regs:$dst),
1316+ (ins Float32Regs:$a, Float32Regs:$b),
1317+ "div.full.ftz.f32",
1318+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
1319+ Requires<[doF32FTZ]>;
13201320def FDIV32ri_ftz :
1321- NVPTXInst <(outs Float32Regs:$dst),
1322- (ins Float32Regs:$a, f32imm:$b),
1323- "div.full.ftz.f32 \t$dst, $a, $b; ",
1324- [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1325- Requires<[doF32FTZ]>;
1321+ BasicNVPTXInst <(outs Float32Regs:$dst),
1322+ (ins Float32Regs:$a, f32imm:$b),
1323+ "div.full.ftz.f32",
1324+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
1325+ Requires<[doF32FTZ]>;
13261326def FDIV32rr :
1327- NVPTXInst <(outs Float32Regs:$dst),
1328- (ins Float32Regs:$a, Float32Regs:$b),
1329- "div.full.f32 \t$dst, $a, $b; ",
1330- [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
1327+ BasicNVPTXInst <(outs Float32Regs:$dst),
1328+ (ins Float32Regs:$a, Float32Regs:$b),
1329+ "div.full.f32",
1330+ [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
13311331def FDIV32ri :
1332- NVPTXInst <(outs Float32Regs:$dst),
1333- (ins Float32Regs:$a, f32imm:$b),
1334- "div.full.f32 \t$dst, $a, $b; ",
1335- [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
1332+ BasicNVPTXInst <(outs Float32Regs:$dst),
1333+ (ins Float32Regs:$a, f32imm:$b),
1334+ "div.full.f32",
1335+ [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
13361336//
13371337// F32 Accurate reciprocal
13381338//
1339+
1340+ def fdiv_ftz : PatFrag<(ops node:$a, node:$b),
1341+ (fdiv node:$a, node:$b), [{
1342+ return getDivF32Level(N) == NVPTX::DivPrecisionLevel::IEEE754;
1343+ }]>;
1344+
13391345def FRCP32r_prec_ftz :
1340- NVPTXInst <(outs Float32Regs:$dst),
1341- (ins Float32Regs:$b),
1342- "rcp.rn.ftz.f32 \t$dst, $b; ",
1343- [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>,
1344- Requires<[doF32FTZ]>;
1346+ BasicNVPTXInst <(outs Float32Regs:$dst),
1347+ (ins Float32Regs:$b),
1348+ "rcp.rn.ftz.f32",
1349+ [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>,
1350+ Requires<[doF32FTZ]>;
13451351def FRCP32r_prec :
1346- NVPTXInst <(outs Float32Regs:$dst),
1347- (ins Float32Regs:$b),
1348- "rcp.rn.f32 \t$dst, $b; ",
1349- [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
1352+ BasicNVPTXInst <(outs Float32Regs:$dst),
1353+ (ins Float32Regs:$b),
1354+ "rcp.rn.f32",
1355+ [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
13501356//
13511357// F32 Accurate division
13521358//
13531359def FDIV32rr_prec_ftz :
1354- NVPTXInst <(outs Float32Regs:$dst),
1355- (ins Float32Regs:$a, Float32Regs:$b),
1356- "div.rn.ftz.f32 \t$dst, $a, $b; ",
1357- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>,
1358- Requires<[doF32FTZ]>;
1360+ BasicNVPTXInst <(outs Float32Regs:$dst),
1361+ (ins Float32Regs:$a, Float32Regs:$b),
1362+ "div.rn.ftz.f32",
1363+ [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>,
1364+ Requires<[doF32FTZ]>;
13591365def FDIV32ri_prec_ftz :
1360- NVPTXInst <(outs Float32Regs:$dst),
1361- (ins Float32Regs:$a, f32imm:$b),
1362- "div.rn.ftz.f32 \t$dst, $a, $b; ",
1363- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>,
1364- Requires<[doF32FTZ]>;
1366+ BasicNVPTXInst <(outs Float32Regs:$dst),
1367+ (ins Float32Regs:$a, f32imm:$b),
1368+ "div.rn.ftz.f32",
1369+ [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>,
1370+ Requires<[doF32FTZ]>;
13651371def FDIV32rr_prec :
1366- NVPTXInst <(outs Float32Regs:$dst),
1367- (ins Float32Regs:$a, Float32Regs:$b),
1368- "div.rn.f32 \t$dst, $a, $b; ",
1369- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
1372+ BasicNVPTXInst <(outs Float32Regs:$dst),
1373+ (ins Float32Regs:$a, Float32Regs:$b),
1374+ "div.rn.f32",
1375+ [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
13701376def FDIV32ri_prec :
1371- NVPTXInst <(outs Float32Regs:$dst),
1372- (ins Float32Regs:$a, f32imm:$b),
1373- "div.rn.f32 \t$dst, $a, $b; ",
1374- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
1377+ BasicNVPTXInst <(outs Float32Regs:$dst),
1378+ (ins Float32Regs:$a, f32imm:$b),
1379+ "div.rn.f32",
1380+ [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
13751381
13761382//
13771383// FMA
0 commit comments