@@ -1582,49 +1582,34 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
15821582 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
15831583
15841584// packed f32 ops (sm_100+)
1585- class F32x2Op2<string OpcStr, Predicate Pred>
1585+
1586+ def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2", SDTIntBinOp>;
1587+ def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2", SDTIntBinOp>;
1588+ def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2", SDTIntBinOp>;
1589+ def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2", SDTIntTernaryOp>;
1590+
1591+ class F32x2Op2<string OpcStr, SDNode Op, Predicate Pred>
15861592: NVPTXInst<(outs Int64Regs:$res),
15871593 (ins Int64Regs:$a, Int64Regs:$b),
1588- OpcStr # ".f32x2 \t$res, $a, $b;", []>,
1594+ OpcStr # ".f32x2 \t$res, $a, $b;",
1595+ [(set i64:$res, (Op i64:$a, i64:$b))]>,
15891596 Requires<[hasF32x2Instructions, Pred]>;
1590- class F32x2Op3<string OpcStr, Predicate Pred>
1597+ class F32x2Op3<string OpcStr, SDNode Op, Predicate Pred>
15911598: NVPTXInst<(outs Int64Regs:$res),
15921599 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1593- OpcStr # ".f32x2 \t$res, $a, $b, $c;", []>,
1600+ OpcStr # ".f32x2 \t$res, $a, $b, $c;",
1601+ [(set i64:$res, (Op i64:$a, i64:$b, i64:$c))]>,
15941602 Requires<[hasF32x2Instructions, Pred]>;
15951603
1596- def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2 ", SDTIntBinOp >;
1597- def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2 ", SDTIntBinOp >;
1598- def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2 ", SDTIntBinOp >;
1599- def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2 ", SDTIntTernaryOp >;
1604+ def FADD32x2 : F32x2Op2<"add.rn ", fadd32x2_nvptx, doNoF32FTZ >;
1605+ def FSUB32x2 : F32x2Op2<"sub.rn ", fsub32x2_nvptx, doNoF32FTZ >;
1606+ def FMUL32x2 : F32x2Op2<"mul.rn ", fmul32x2_nvptx, doNoF32FTZ >;
1607+ def FMA32x2 : F32x2Op3<"fma.rn ", fma32x2_nvptx, doNoF32FTZ >;
16001608
1601- def FADD32x2 : F32x2Op2<"add.rn", doNoF32FTZ>;
1602- def FSUB32x2 : F32x2Op2<"sub.rn", doNoF32FTZ>;
1603- def FMUL32x2 : F32x2Op2<"mul.rn", doNoF32FTZ>;
1604- def FMA32x2 : F32x2Op3<"fma.rn", doNoF32FTZ>;
1605-
1606- def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1607- (FADD32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1608- def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1609- (FSUB32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1610- def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1611- (FMUL32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1612- def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1613- (FMA32x2 $a, $b, $c)>, Requires<[doNoF32FTZ]>;
1614-
1615- def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", doF32FTZ>;
1616- def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", doF32FTZ>;
1617- def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", doF32FTZ>;
1618- def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", doF32FTZ>;
1619-
1620- def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1621- (FADD32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1622- def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1623- (FSUB32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1624- def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1625- (FMUL32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1626- def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1627- (FMA32x2_ftz $a, $b, $c)>, Requires<[doF32FTZ]>;
1609+ def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", fadd32x2_nvptx, doF32FTZ>;
1610+ def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", fsub32x2_nvptx, doF32FTZ>;
1611+ def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", fmul32x2_nvptx, doF32FTZ>;
1612+ def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", fma32x2_nvptx, doF32FTZ>;
16281613
16291614//
16301615// BFIND
0 commit comments