@@ -411,7 +411,18 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
411
411
(ins Float32Regs:$a, f32imm:$b),
412
412
op_str # ".f32 \t$dst, $a, $b;",
413
413
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
414
-
414
+ def f32x2rr_ftz :
415
+ NVPTXInst<(outs Int64Regs:$dst),
416
+ (ins Int64Regs:$a, Int64Regs:$b),
417
+ op_str # ".ftz.f32x2 \t$dst, $a, $b;",
418
+ [(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
419
+ Requires<[doF32FTZ, hasF32x2Instructions]>;
420
+ def f32x2rr :
421
+ NVPTXInst<(outs Int64Regs:$dst),
422
+ (ins Int64Regs:$a, Int64Regs:$b),
423
+ op_str # ".f32x2 \t$dst, $a, $b;",
424
+ [(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
425
+ Requires<[hasF32x2Instructions]>;
415
426
def f16rr_ftz :
416
427
NVPTXInst<(outs Int16Regs:$dst),
417
428
(ins Int16Regs:$a, Int16Regs:$b),
@@ -443,7 +454,6 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
443
454
op_str # ".bf16 \t$dst, $a, $b;",
444
455
[(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>,
445
456
Requires<[hasBF16Math]>;
446
-
447
457
def bf16x2rr :
448
458
NVPTXInst<(outs Int32Regs:$dst),
449
459
(ins Int32Regs:$a, Int32Regs:$b),
@@ -1335,6 +1345,13 @@ multiclass FMA_BF16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred
1335
1345
Requires<[hasBF16Math, Pred]>;
1336
1346
}
1337
1347
1348
+ class FMA_F32x2<string OpcStr, Predicate Pred>
1349
+ : NVPTXInst<(outs Int64Regs:$res),
1350
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1351
+ OpcStr # ".f32x2 \t$res, $a, $b, $c;",
1352
+ [(set v2f32:$res, (fma v2f32:$a, v2f32:$b, v2f32:$c))]>,
1353
+ Requires<[hasF32x2Instructions, Pred]>;
1354
+
1338
1355
defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>;
1339
1356
defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>;
1340
1357
defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
@@ -1343,6 +1360,8 @@ defm BFMA16 : FMA_BF16<"fma.rn.bf16", bf16, Int16Regs, True>;
1343
1360
defm BFMA16x2 : FMA_BF16<"fma.rn.bf16x2", v2bf16, Int32Regs, True>;
1344
1361
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
1345
1362
defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
1363
+ def FMA32x2_ftz : FMA_F32x2<"fma.rn.ftz", doF32FTZ>;
1364
+ def FMA32x2 : FMA_F32x2<"fma.rn", True>;
1346
1365
defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
1347
1366
1348
1367
// sin/cos
0 commit comments