@@ -268,7 +268,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
268
268
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
269
269
multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
270
270
foreach t = [I16RT, I32RT, I64RT] in
271
- defm t.Ty # : I3Inst<op_str # t.Size, op_node, t, commutative>;
271
+ defm t.Size # : I3Inst<op_str # t.Size, op_node, t, commutative>;
272
272
}
273
273
274
274
class I16x2<string OpcStr, SDNode OpNode> :
@@ -757,8 +757,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
757
757
758
758
defm MULT : I3<"mul.lo.s", mul, commutative = true>;
759
759
760
- defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
761
- defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
760
+ defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
761
+ defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
762
762
763
763
defm SDIV : I3<"div.s", sdiv, commutative = false>;
764
764
defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -977,7 +977,7 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
977
977
}]>;
978
978
979
979
980
- def FRCP32_approx_r :
980
+ def RCP_APPROX_F32_r :
981
981
BasicFlagsNVPTXInst<(outs B32:$dst),
982
982
(ins B32:$b), (ins FTZFlag:$ftz),
983
983
"rcp.approx$ftz.f32",
@@ -986,12 +986,12 @@ def FRCP32_approx_r :
986
986
//
987
987
// F32 Approximate division
988
988
//
989
- def FDIV32_approx_rr :
989
+ def DIV_APPROX_F32_rr :
990
990
BasicFlagsNVPTXInst<(outs B32:$dst),
991
991
(ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
992
992
"div.approx$ftz.f32",
993
993
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
994
- def FDIV32_approx_ri :
994
+ def DIV_APPROX_F32_ri :
995
995
BasicFlagsNVPTXInst<(outs B32:$dst),
996
996
(ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
997
997
"div.approx$ftz.f32",
@@ -1009,7 +1009,7 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
1009
1009
1010
1010
1011
1011
def : Pat<(fdiv_full f32imm_1, f32:$b),
1012
- (FRCP32_approx_r $b)>;
1012
+ (RCP_APPROX_F32_r $b)>;
1013
1013
1014
1014
//
1015
1015
// F32 Semi-accurate division
@@ -1475,9 +1475,9 @@ def MmaCode : Operand<i32> {
1475
1475
// Get pointer to local stack.
1476
1476
let hasSideEffects = false in {
1477
1477
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
1478
- "mov.b32 \t$d, __local_depot$num;", [] >;
1478
+ "mov.b32 \t$d, __local_depot$num;">;
1479
1479
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
1480
- "mov.b64 \t$d, __local_depot$num;", [] >;
1480
+ "mov.b64 \t$d, __local_depot$num;">;
1481
1481
}
1482
1482
1483
1483
@@ -1533,9 +1533,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
1533
1533
1534
1534
//---- Copy Frame Index ----
1535
1535
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
1536
- "add.u32 \t$dst, ${addr:add};", [] >;
1536
+ "add.u32 \t$dst, ${addr:add};">;
1537
1537
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
1538
- "add.u64 \t$dst, ${addr:add};", [] >;
1538
+ "add.u64 \t$dst, ${addr:add};">;
1539
1539
1540
1540
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
1541
1541
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1612,12 +1612,12 @@ foreach is_convergent = [0, 1] in {
1612
1612
NVPTXInst<(outs),
1613
1613
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
1614
1614
i32imm:$proto),
1615
- "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", [] >;
1615
+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
1616
1616
1617
1617
def CALL_UNI # convergent_suffix :
1618
1618
NVPTXInst<(outs),
1619
1619
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
1620
- "call.uni${rets:RetList} $addr, (${params:ParamList});", [] >;
1620
+ "call.uni${rets:RetList} $addr, (${params:ParamList});">;
1621
1621
}
1622
1622
1623
1623
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1633,10 +1633,10 @@ foreach is_convergent = [0, 1] in {
1633
1633
1634
1634
def DECLARE_PARAM_array :
1635
1635
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
1636
- ".param .align $align .b8 \t$a[$size];", [] >;
1636
+ ".param .align $align .b8 \t$a[$size];">;
1637
1637
def DECLARE_PARAM_scalar :
1638
1638
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1639
- ".param .b$size \t$a;", [] >;
1639
+ ".param .b$size \t$a;">;
1640
1640
1641
1641
def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
1642
1642
(DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1709,7 +1709,7 @@ class LD<NVPTXRegClass regclass>
1709
1709
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
1710
1710
i32imm:$fromWidth, ADDR:$addr),
1711
1711
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
1712
- "\t$dst, [$addr];", [] >;
1712
+ "\t$dst, [$addr];">;
1713
1713
1714
1714
let mayLoad=1, hasSideEffects=0 in {
1715
1715
def LD_i16 : LD<B16>;
@@ -1724,7 +1724,7 @@ class ST<DAGOperand O>
1724
1724
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
1725
1725
ADDR:$addr),
1726
1726
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
1727
- " \t[$addr], $src;", [] >;
1727
+ " \t[$addr], $src;">;
1728
1728
1729
1729
let mayStore=1, hasSideEffects=0 in {
1730
1730
def ST_i16 : ST<RI16>;
@@ -1741,13 +1741,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
1741
1741
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
1742
1742
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
1743
1743
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
1744
- "\t{{$dst1, $dst2}}, [$addr];", [] >;
1744
+ "\t{{$dst1, $dst2}}, [$addr];">;
1745
1745
def _v4 : NVPTXInst<
1746
1746
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
1747
1747
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
1748
1748
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
1749
1749
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
1750
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", [] >;
1750
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
1751
1751
if support_v8 then
1752
1752
def _v8 : NVPTXInst<
1753
1753
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1756,7 +1756,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
1756
1756
i32imm:$fromWidth, ADDR:$addr),
1757
1757
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
1758
1758
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
1759
- "[$addr];", [] >;
1759
+ "[$addr];">;
1760
1760
}
1761
1761
let mayLoad=1, hasSideEffects=0 in {
1762
1762
defm LDV_i16 : LD_VEC<B16>;
@@ -1771,14 +1771,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
1771
1771
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
1772
1772
ADDR:$addr),
1773
1773
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
1774
- "\t[$addr], {{$src1, $src2}};", [] >;
1774
+ "\t[$addr], {{$src1, $src2}};">;
1775
1775
def _v4 : NVPTXInst<
1776
1776
(outs),
1777
1777
(ins O:$src1, O:$src2, O:$src3, O:$src4,
1778
1778
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
1779
1779
ADDR:$addr),
1780
1780
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
1781
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", [] >;
1781
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
1782
1782
if support_v8 then
1783
1783
def _v8 : NVPTXInst<
1784
1784
(outs),
@@ -1788,7 +1788,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
1788
1788
ADDR:$addr),
1789
1789
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
1790
1790
"\t[$addr], "
1791
- "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", [] >;
1791
+ "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};">;
1792
1792
}
1793
1793
1794
1794
let mayStore=1, hasSideEffects=0 in {
@@ -1983,60 +1983,52 @@ let hasSideEffects = false in {
1983
1983
def V4I16toI64 : NVPTXInst<(outs B64:$d),
1984
1984
(ins B16:$s1, B16:$s2,
1985
1985
B16:$s3, B16:$s4),
1986
- "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", [] >;
1986
+ "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};">;
1987
1987
def V2I16toI32 : NVPTXInst<(outs B32:$d),
1988
1988
(ins B16:$s1, B16:$s2),
1989
- "mov.b32 \t$d, {{$s1, $s2}};", [] >;
1989
+ "mov.b32 \t$d, {{$s1, $s2}};">;
1990
1990
def V2I32toI64 : NVPTXInst<(outs B64:$d),
1991
1991
(ins B32:$s1, B32:$s2),
1992
- "mov.b64 \t$d, {{$s1, $s2}};", [] >;
1992
+ "mov.b64 \t$d, {{$s1, $s2}};">;
1993
1993
def V2I64toI128 : NVPTXInst<(outs B128:$d),
1994
1994
(ins B64:$s1, B64:$s2),
1995
- "mov.b128 \t$d, {{$s1, $s2}};", [] >;
1995
+ "mov.b128 \t$d, {{$s1, $s2}};">;
1996
1996
1997
1997
// unpack a larger int register to a set of smaller int registers
1998
1998
def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2,
1999
1999
B16:$d3, B16:$d4),
2000
2000
(ins B64:$s),
2001
- "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", [] >;
2001
+ "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;">;
2002
2002
def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2),
2003
2003
(ins B32:$s),
2004
- "mov.b32 \t{{$d1, $d2}}, $s;", [] >;
2004
+ "mov.b32 \t{{$d1, $d2}}, $s;">;
2005
2005
def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2),
2006
2006
(ins B64:$s),
2007
- "mov.b64 \t{{$d1, $d2}}, $s;", [] >;
2007
+ "mov.b64 \t{{$d1, $d2}}, $s;">;
2008
2008
def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2),
2009
2009
(ins B128:$s),
2010
- "mov.b128 \t{{$d1, $d2}}, $s;", [] >;
2010
+ "mov.b128 \t{{$d1, $d2}}, $s;">;
2011
2011
2012
- def I32toI16H : NVPTXInst<(outs B16:$high),
2013
- (ins B32:$s),
2014
- "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
2015
- []>;
2016
- def I32toI16L : NVPTXInst<(outs B16:$low),
2017
- (ins B32:$s),
2018
- "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
2019
- []>;
2020
- def I64toI32H : NVPTXInst<(outs B32:$high),
2021
- (ins B64:$s),
2022
- "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
2023
- []>;
2024
- def I64toI32L : NVPTXInst<(outs B32:$low),
2025
- (ins B64:$s),
2026
- "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
2027
- []>;
2012
+ def I32toI16H : NVPTXInst<(outs B16:$high), (ins B32:$s),
2013
+ "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}">;
2014
+ def I32toI16L : NVPTXInst<(outs B16:$low), (ins B32:$s),
2015
+ "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}">;
2016
+ def I64toI32H : NVPTXInst<(outs B32:$high), (ins B64:$s),
2017
+ "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}">;
2018
+ def I64toI32L : NVPTXInst<(outs B32:$low), (ins B64:$s),
2019
+ "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}">;
2028
2020
2029
2021
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
2030
2022
// unused high/low part.
2031
2023
let Predicates = [hasPTX<71>] in {
2032
2024
def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s),
2033
- "mov.b32 \t{{_, $high}}, $s;", [] >;
2025
+ "mov.b32 \t{{_, $high}}, $s;">;
2034
2026
def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s),
2035
- "mov.b32 \t{{$low, _}}, $s;", [] >;
2027
+ "mov.b32 \t{{$low, _}}, $s;">;
2036
2028
def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s),
2037
- "mov.b64 \t{{_, $high}}, $s;", [] >;
2029
+ "mov.b64 \t{{_, $high}}, $s;">;
2038
2030
def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s),
2039
- "mov.b64 \t{{$low, _}}, $s;", [] >;
2031
+ "mov.b64 \t{{$low, _}}, $s;">;
2040
2032
}
2041
2033
}
2042
2034
0 commit comments