@@ -268,7 +268,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
268
268
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
269
269
multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
270
270
foreach t = [I16RT, I32RT, I64RT] in
271
- defm t.Ty # : I3Inst<op_str # t.Size, op_node, t, commutative>;
271
+ defm t.Size # : I3Inst<op_str # t.Size, op_node, t, commutative>;
272
272
}
273
273
274
274
class I16x2<string OpcStr, SDNode OpNode> :
@@ -787,8 +787,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
787
787
788
788
defm MULT : I3<"mul.lo.s", mul, commutative = true>;
789
789
790
- defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
791
- defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
790
+ defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
791
+ defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
792
792
793
793
defm SDIV : I3<"div.s", sdiv, commutative = false>;
794
794
defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -1021,7 +1021,7 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
1021
1021
}]>;
1022
1022
1023
1023
1024
- def FRCP32_approx_r :
1024
+ def RCP_APPROX_F32_r :
1025
1025
BasicFlagsNVPTXInst<(outs B32:$dst),
1026
1026
(ins B32:$b), (ins FTZFlag:$ftz),
1027
1027
"rcp.approx$ftz.f32",
@@ -1030,12 +1030,12 @@ def FRCP32_approx_r :
1030
1030
//
1031
1031
// F32 Approximate division
1032
1032
//
1033
- def FDIV32_approx_rr :
1033
+ def DIV_APPROX_F32_rr :
1034
1034
BasicFlagsNVPTXInst<(outs B32:$dst),
1035
1035
(ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
1036
1036
"div.approx$ftz.f32",
1037
1037
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1038
- def FDIV32_approx_ri :
1038
+ def DIV_APPROX_F32_ri :
1039
1039
BasicFlagsNVPTXInst<(outs B32:$dst),
1040
1040
(ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
1041
1041
"div.approx$ftz.f32",
@@ -1053,7 +1053,7 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
1053
1053
1054
1054
1055
1055
def : Pat<(fdiv_full f32imm_1, f32:$b),
1056
- (FRCP32_approx_r $b)>;
1056
+ (RCP_APPROX_F32_r $b)>;
1057
1057
1058
1058
//
1059
1059
// F32 Semi-accurate division
@@ -1519,9 +1519,9 @@ def MmaCode : Operand<i32> {
1519
1519
// Get pointer to local stack.
1520
1520
let hasSideEffects = false in {
1521
1521
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
1522
- "mov.b32 \t$d, __local_depot$num;", [] >;
1522
+ "mov.b32 \t$d, __local_depot$num;">;
1523
1523
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
1524
- "mov.b64 \t$d, __local_depot$num;", [] >;
1524
+ "mov.b64 \t$d, __local_depot$num;">;
1525
1525
}
1526
1526
1527
1527
@@ -1577,9 +1577,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
1577
1577
1578
1578
//---- Copy Frame Index ----
1579
1579
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
1580
- "add.u32 \t$dst, ${addr:add};", [] >;
1580
+ "add.u32 \t$dst, ${addr:add};">;
1581
1581
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
1582
- "add.u64 \t$dst, ${addr:add};", [] >;
1582
+ "add.u64 \t$dst, ${addr:add};">;
1583
1583
1584
1584
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
1585
1585
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1644,12 +1644,12 @@ foreach is_convergent = [0, 1] in {
1644
1644
NVPTXInst<(outs),
1645
1645
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
1646
1646
i32imm:$proto),
1647
- "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", [] >;
1647
+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
1648
1648
1649
1649
def CALL_UNI # convergent_suffix :
1650
1650
NVPTXInst<(outs),
1651
1651
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
1652
- "call.uni${rets:RetList} $addr, (${params:ParamList});", [] >;
1652
+ "call.uni${rets:RetList} $addr, (${params:ParamList});">;
1653
1653
}
1654
1654
1655
1655
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1665,10 +1665,10 @@ foreach is_convergent = [0, 1] in {
1665
1665
1666
1666
def DECLARE_PARAM_array :
1667
1667
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
1668
- ".param .align $align .b8 \t$a[$size];", [] >;
1668
+ ".param .align $align .b8 \t$a[$size];">;
1669
1669
def DECLARE_PARAM_scalar :
1670
1670
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1671
- ".param .b$size \t$a;", [] >;
1671
+ ".param .b$size \t$a;">;
1672
1672
1673
1673
def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
1674
1674
(DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1741,7 +1741,7 @@ class LD<NVPTXRegClass regclass>
1741
1741
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
1742
1742
i32imm:$fromWidth, ADDR:$addr),
1743
1743
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
1744
- "\t$dst, [$addr];", [] >;
1744
+ "\t$dst, [$addr];">;
1745
1745
1746
1746
let mayLoad=1, hasSideEffects=0 in {
1747
1747
def LD_i16 : LD<B16>;
@@ -1756,7 +1756,7 @@ class ST<DAGOperand O>
1756
1756
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
1757
1757
ADDR:$addr),
1758
1758
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
1759
- " \t[$addr], $src;", [] >;
1759
+ " \t[$addr], $src;">;
1760
1760
1761
1761
let mayStore=1, hasSideEffects=0 in {
1762
1762
def ST_i16 : ST<RI16>;
@@ -1773,13 +1773,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
1773
1773
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
1774
1774
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
1775
1775
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
1776
- "\t{{$dst1, $dst2}}, [$addr];", [] >;
1776
+ "\t{{$dst1, $dst2}}, [$addr];">;
1777
1777
def _v4 : NVPTXInst<
1778
1778
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
1779
1779
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
1780
1780
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
1781
1781
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
1782
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", [] >;
1782
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
1783
1783
if support_v8 then
1784
1784
def _v8 : NVPTXInst<
1785
1785
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1788,7 +1788,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
1788
1788
i32imm:$fromWidth, ADDR:$addr),
1789
1789
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
1790
1790
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
1791
- "[$addr];", [] >;
1791
+ "[$addr];">;
1792
1792
}
1793
1793
let mayLoad=1, hasSideEffects=0 in {
1794
1794
defm LDV_i16 : LD_VEC<B16>;
@@ -1803,14 +1803,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
1803
1803
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
1804
1804
ADDR:$addr),
1805
1805
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
1806
- "\t[$addr], {{$src1, $src2}};", [] >;
1806
+ "\t[$addr], {{$src1, $src2}};">;
1807
1807
def _v4 : NVPTXInst<
1808
1808
(outs),
1809
1809
(ins O:$src1, O:$src2, O:$src3, O:$src4,
1810
1810
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
1811
1811
ADDR:$addr),
1812
1812
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
1813
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", [] >;
1813
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
1814
1814
if support_v8 then
1815
1815
def _v8 : NVPTXInst<
1816
1816
(outs),
@@ -1820,7 +1820,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
1820
1820
ADDR:$addr),
1821
1821
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
1822
1822
"\t[$addr], "
1823
- "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", [] >;
1823
+ "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};">;
1824
1824
}
1825
1825
1826
1826
let mayStore=1, hasSideEffects=0 in {
@@ -2015,60 +2015,52 @@ let hasSideEffects = false in {
2015
2015
def V4I16toI64 : NVPTXInst<(outs B64:$d),
2016
2016
(ins B16:$s1, B16:$s2,
2017
2017
B16:$s3, B16:$s4),
2018
- "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", [] >;
2018
+ "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};">;
2019
2019
def V2I16toI32 : NVPTXInst<(outs B32:$d),
2020
2020
(ins B16:$s1, B16:$s2),
2021
- "mov.b32 \t$d, {{$s1, $s2}};", [] >;
2021
+ "mov.b32 \t$d, {{$s1, $s2}};">;
2022
2022
def V2I32toI64 : NVPTXInst<(outs B64:$d),
2023
2023
(ins B32:$s1, B32:$s2),
2024
- "mov.b64 \t$d, {{$s1, $s2}};", [] >;
2024
+ "mov.b64 \t$d, {{$s1, $s2}};">;
2025
2025
def V2I64toI128 : NVPTXInst<(outs B128:$d),
2026
2026
(ins B64:$s1, B64:$s2),
2027
- "mov.b128 \t$d, {{$s1, $s2}};", [] >;
2027
+ "mov.b128 \t$d, {{$s1, $s2}};">;
2028
2028
2029
2029
// unpack a larger int register to a set of smaller int registers
2030
2030
def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2,
2031
2031
B16:$d3, B16:$d4),
2032
2032
(ins B64:$s),
2033
- "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", [] >;
2033
+ "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;">;
2034
2034
def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2),
2035
2035
(ins B32:$s),
2036
- "mov.b32 \t{{$d1, $d2}}, $s;", [] >;
2036
+ "mov.b32 \t{{$d1, $d2}}, $s;">;
2037
2037
def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2),
2038
2038
(ins B64:$s),
2039
- "mov.b64 \t{{$d1, $d2}}, $s;", [] >;
2039
+ "mov.b64 \t{{$d1, $d2}}, $s;">;
2040
2040
def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2),
2041
2041
(ins B128:$s),
2042
- "mov.b128 \t{{$d1, $d2}}, $s;", [] >;
2042
+ "mov.b128 \t{{$d1, $d2}}, $s;">;
2043
2043
2044
- def I32toI16H : NVPTXInst<(outs B16:$high),
2045
- (ins B32:$s),
2046
- "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
2047
- []>;
2048
- def I32toI16L : NVPTXInst<(outs B16:$low),
2049
- (ins B32:$s),
2050
- "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
2051
- []>;
2052
- def I64toI32H : NVPTXInst<(outs B32:$high),
2053
- (ins B64:$s),
2054
- "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
2055
- []>;
2056
- def I64toI32L : NVPTXInst<(outs B32:$low),
2057
- (ins B64:$s),
2058
- "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
2059
- []>;
2044
+ def I32toI16H : NVPTXInst<(outs B16:$high), (ins B32:$s),
2045
+ "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}">;
2046
+ def I32toI16L : NVPTXInst<(outs B16:$low), (ins B32:$s),
2047
+ "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}">;
2048
+ def I64toI32H : NVPTXInst<(outs B32:$high), (ins B64:$s),
2049
+ "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}">;
2050
+ def I64toI32L : NVPTXInst<(outs B32:$low), (ins B64:$s),
2051
+ "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}">;
2060
2052
2061
2053
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
2062
2054
// unused high/low part.
2063
2055
let Predicates = [hasPTX<71>] in {
2064
2056
def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s),
2065
- "mov.b32 \t{{_, $high}}, $s;", [] >;
2057
+ "mov.b32 \t{{_, $high}}, $s;">;
2066
2058
def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s),
2067
- "mov.b32 \t{{$low, _}}, $s;", [] >;
2059
+ "mov.b32 \t{{$low, _}}, $s;">;
2068
2060
def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s),
2069
- "mov.b64 \t{{_, $high}}, $s;", [] >;
2061
+ "mov.b64 \t{{_, $high}}, $s;">;
2070
2062
def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s),
2071
- "mov.b64 \t{{$low, _}}, $s;", [] >;
2063
+ "mov.b64 \t{{$low, _}}, $s;">;
2072
2064
}
2073
2065
}
2074
2066
0 commit comments