@@ -1581,27 +1581,50 @@ def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
15811581def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
15821582 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
15831583
1584- // F32x2 ops (sm_100+)
1585-
1586- def FADD_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1587- (ins Int64Regs:$a, Int64Regs:$b),
1588- "add.rn.f32x2 \t$res, $a, $b;", []>,
1589- Requires<[hasF32x2Instructions]>;
1590-
1591- def FSUB_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1592- (ins Int64Regs:$a, Int64Regs:$b),
1593- "sub.rn.f32x2 \t$res, $a, $b;", []>,
1594- Requires<[hasF32x2Instructions]>;
1595-
1596- def FMUL_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1597- (ins Int64Regs:$a, Int64Regs:$b),
1598- "mul.rn.f32x2 \t$res, $a, $b;", []>,
1599- Requires<[hasF32x2Instructions]>;
1600-
1601- def FMA_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1602- (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1603- "fma.rn.f32x2 \t$res, $a, $b;", []>,
1604- Requires<[hasF32x2Instructions]>;
1584+ // packed f32 ops (sm_100+)
1585+ class F32x2Op2<string OpcStr, Predicate Pred>
1586+ : NVPTXInst<(outs Int64Regs:$res),
1587+ (ins Int64Regs:$a, Int64Regs:$b),
1588+ OpcStr # ".f32x2 \t$res, $a, $b;", []>,
1589+ Requires<[hasF32x2Instructions, Pred]>;
1590+ class F32x2Op3<string OpcStr, Predicate Pred>
1591+ : NVPTXInst<(outs Int64Regs:$res),
1592+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1593+ OpcStr # ".f32x2 \t$res, $a, $b, $c;", []>,
1594+ Requires<[hasF32x2Instructions, Pred]>;
1595+
1596+ def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2", SDTIntBinOp>;
1597+ def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2", SDTIntBinOp>;
1598+ def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2", SDTIntBinOp>;
1599+ def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2", SDTIntTernaryOp>;
1600+
1601+ def FADD32x2 : F32x2Op2<"add.rn", doNoF32FTZ>;
1602+ def FSUB32x2 : F32x2Op2<"sub.rn", doNoF32FTZ>;
1603+ def FMUL32x2 : F32x2Op2<"mul.rn", doNoF32FTZ>;
1604+ def FMA32x2 : F32x2Op3<"fma.rn", doNoF32FTZ>;
1605+
1606+ def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1607+ (FADD32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1608+ def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1609+ (FSUB32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1610+ def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1611+ (FMUL32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1612+ def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1613+ (FMA32x2 $a, $b, $c)>, Requires<[doNoF32FTZ]>;
1614+
1615+ def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", doF32FTZ>;
1616+ def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", doF32FTZ>;
1617+ def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", doF32FTZ>;
1618+ def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", doF32FTZ>;
1619+
1620+ def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1621+ (FADD32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1622+ def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1623+ (FSUB32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1624+ def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1625+ (FMUL32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1626+ def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1627+ (FMA32x2_ftz $a, $b, $c)>, Requires<[doF32FTZ]>;
16051628
16061629//
16071630// BFIND
0 commit comments