@@ -268,7 +268,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
268268// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
269269multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
270270 foreach t = [I16RT, I32RT, I64RT] in
271- defm t.Ty # : I3Inst<op_str # t.Size, op_node, t, commutative>;
271+ defm t.Size # : I3Inst<op_str # t.Size, op_node, t, commutative>;
272272}
273273
274274class I16x2<string OpcStr, SDNode OpNode> :
@@ -787,8 +787,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
787787
788788defm MULT : I3<"mul.lo.s", mul, commutative = true>;
789789
790- defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
791- defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
790+ defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
791+ defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
792792
793793defm SDIV : I3<"div.s", sdiv, commutative = false>;
794794defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -1021,7 +1021,7 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
10211021}]>;
10221022
10231023
1024- def FRCP32_approx_r :
1024+ def RCP_APPROX_F32_r :
10251025 BasicFlagsNVPTXInst<(outs B32:$dst),
10261026 (ins B32:$b), (ins FTZFlag:$ftz),
10271027 "rcp.approx$ftz.f32",
@@ -1030,12 +1030,12 @@ def FRCP32_approx_r :
10301030//
10311031// F32 Approximate division
10321032//
1033- def FDIV32_approx_rr :
1033+ def DIV_APPROX_F32_rr :
10341034 BasicFlagsNVPTXInst<(outs B32:$dst),
10351035 (ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
10361036 "div.approx$ftz.f32",
10371037 [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
1038- def FDIV32_approx_ri :
1038+ def DIV_APPROX_F32_ri :
10391039 BasicFlagsNVPTXInst<(outs B32:$dst),
10401040 (ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
10411041 "div.approx$ftz.f32",
@@ -1053,7 +1053,7 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
10531053
10541054
10551055def : Pat<(fdiv_full f32imm_1, f32:$b),
1056- (FRCP32_approx_r $b)>;
1056+ (RCP_APPROX_F32_r $b)>;
10571057
10581058//
10591059// F32 Semi-accurate division
@@ -1519,9 +1519,9 @@ def MmaCode : Operand<i32> {
15191519// Get pointer to local stack.
15201520let hasSideEffects = false in {
15211521 def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
1522- "mov.b32 \t$d, __local_depot$num;", [] >;
1522+ "mov.b32 \t$d, __local_depot$num;">;
15231523 def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
1524- "mov.b64 \t$d, __local_depot$num;", [] >;
1524+ "mov.b64 \t$d, __local_depot$num;">;
15251525}
15261526
15271527
@@ -1577,9 +1577,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
15771577
15781578//---- Copy Frame Index ----
15791579def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
1580- "add.u32 \t$dst, ${addr:add};", [] >;
1580+ "add.u32 \t$dst, ${addr:add};">;
15811581def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
1582- "add.u64 \t$dst, ${addr:add};", [] >;
1582+ "add.u64 \t$dst, ${addr:add};">;
15831583
15841584def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
15851585def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1644,12 +1644,12 @@ foreach is_convergent = [0, 1] in {
16441644 NVPTXInst<(outs),
16451645 (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
16461646 i32imm:$proto),
1647- "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", [] >;
1647+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
16481648
16491649 def CALL_UNI # convergent_suffix :
16501650 NVPTXInst<(outs),
16511651 (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
1652- "call.uni${rets:RetList} $addr, (${params:ParamList});", [] >;
1652+ "call.uni${rets:RetList} $addr, (${params:ParamList});">;
16531653 }
16541654
16551655 defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1665,10 +1665,10 @@ foreach is_convergent = [0, 1] in {
16651665
16661666def DECLARE_PARAM_array :
16671667 NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
1668- ".param .align $align .b8 \t$a[$size];", [] >;
1668+ ".param .align $align .b8 \t$a[$size];">;
16691669def DECLARE_PARAM_scalar :
16701670 NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1671- ".param .b$size \t$a;", [] >;
1671+ ".param .b$size \t$a;">;
16721672
16731673def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
16741674 (DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1741,7 +1741,7 @@ class LD<NVPTXRegClass regclass>
17411741 (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
17421742 i32imm:$fromWidth, ADDR:$addr),
17431743 "ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
1744- "\t$dst, [$addr];", [] >;
1744+ "\t$dst, [$addr];">;
17451745
17461746let mayLoad=1, hasSideEffects=0 in {
17471747 def LD_i16 : LD<B16>;
@@ -1756,7 +1756,7 @@ class ST<DAGOperand O>
17561756 AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
17571757 ADDR:$addr),
17581758 "st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
1759- " \t[$addr], $src;", [] >;
1759+ " \t[$addr], $src;">;
17601760
17611761let mayStore=1, hasSideEffects=0 in {
17621762 def ST_i16 : ST<RI16>;
@@ -1773,13 +1773,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
17731773 (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
17741774 AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
17751775 "ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
1776- "\t{{$dst1, $dst2}}, [$addr];", [] >;
1776+ "\t{{$dst1, $dst2}}, [$addr];">;
17771777 def _v4 : NVPTXInst<
17781778 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
17791779 (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
17801780 AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
17811781 "ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
1782- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", [] >;
1782+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
17831783 if support_v8 then
17841784 def _v8 : NVPTXInst<
17851785 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1788,7 +1788,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
17881788 i32imm:$fromWidth, ADDR:$addr),
17891789 "ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
17901790 "\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
1791- "[$addr];", [] >;
1791+ "[$addr];">;
17921792}
17931793let mayLoad=1, hasSideEffects=0 in {
17941794 defm LDV_i16 : LD_VEC<B16>;
@@ -1803,14 +1803,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
18031803 AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
18041804 ADDR:$addr),
18051805 "st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
1806- "\t[$addr], {{$src1, $src2}};", [] >;
1806+ "\t[$addr], {{$src1, $src2}};">;
18071807 def _v4 : NVPTXInst<
18081808 (outs),
18091809 (ins O:$src1, O:$src2, O:$src3, O:$src4,
18101810 AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
18111811 ADDR:$addr),
18121812 "st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
1813- "\t[$addr], {{$src1, $src2, $src3, $src4}};", [] >;
1813+ "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
18141814 if support_v8 then
18151815 def _v8 : NVPTXInst<
18161816 (outs),
@@ -1820,7 +1820,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
18201820 ADDR:$addr),
18211821 "st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
18221822 "\t[$addr], "
1823- "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", [] >;
1823+ "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};">;
18241824}
18251825
18261826let mayStore=1, hasSideEffects=0 in {
@@ -2015,60 +2015,52 @@ let hasSideEffects = false in {
20152015 def V4I16toI64 : NVPTXInst<(outs B64:$d),
20162016 (ins B16:$s1, B16:$s2,
20172017 B16:$s3, B16:$s4),
2018- "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", [] >;
2018+ "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};">;
20192019 def V2I16toI32 : NVPTXInst<(outs B32:$d),
20202020 (ins B16:$s1, B16:$s2),
2021- "mov.b32 \t$d, {{$s1, $s2}};", [] >;
2021+ "mov.b32 \t$d, {{$s1, $s2}};">;
20222022 def V2I32toI64 : NVPTXInst<(outs B64:$d),
20232023 (ins B32:$s1, B32:$s2),
2024- "mov.b64 \t$d, {{$s1, $s2}};", [] >;
2024+ "mov.b64 \t$d, {{$s1, $s2}};">;
20252025 def V2I64toI128 : NVPTXInst<(outs B128:$d),
20262026 (ins B64:$s1, B64:$s2),
2027- "mov.b128 \t$d, {{$s1, $s2}};", [] >;
2027+ "mov.b128 \t$d, {{$s1, $s2}};">;
20282028
20292029 // unpack a larger int register to a set of smaller int registers
20302030 def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2,
20312031 B16:$d3, B16:$d4),
20322032 (ins B64:$s),
2033- "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", [] >;
2033+ "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;">;
20342034 def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2),
20352035 (ins B32:$s),
2036- "mov.b32 \t{{$d1, $d2}}, $s;", [] >;
2036+ "mov.b32 \t{{$d1, $d2}}, $s;">;
20372037 def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2),
20382038 (ins B64:$s),
2039- "mov.b64 \t{{$d1, $d2}}, $s;", [] >;
2039+ "mov.b64 \t{{$d1, $d2}}, $s;">;
20402040 def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2),
20412041 (ins B128:$s),
2042- "mov.b128 \t{{$d1, $d2}}, $s;", [] >;
2042+ "mov.b128 \t{{$d1, $d2}}, $s;">;
20432043
2044- def I32toI16H : NVPTXInst<(outs B16:$high),
2045- (ins B32:$s),
2046- "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
2047- []>;
2048- def I32toI16L : NVPTXInst<(outs B16:$low),
2049- (ins B32:$s),
2050- "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
2051- []>;
2052- def I64toI32H : NVPTXInst<(outs B32:$high),
2053- (ins B64:$s),
2054- "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
2055- []>;
2056- def I64toI32L : NVPTXInst<(outs B32:$low),
2057- (ins B64:$s),
2058- "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
2059- []>;
2044+ def I32toI16H : NVPTXInst<(outs B16:$high), (ins B32:$s),
2045+ "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}">;
2046+ def I32toI16L : NVPTXInst<(outs B16:$low), (ins B32:$s),
2047+ "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}">;
2048+ def I64toI32H : NVPTXInst<(outs B32:$high), (ins B64:$s),
2049+ "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}">;
2050+ def I64toI32L : NVPTXInst<(outs B32:$low), (ins B64:$s),
2051+ "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}">;
20602052
20612053 // PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
20622054 // unused high/low part.
20632055 let Predicates = [hasPTX<71>] in {
20642056 def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s),
2065- "mov.b32 \t{{_, $high}}, $s;", [] >;
2057+ "mov.b32 \t{{_, $high}}, $s;">;
20662058 def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s),
2067- "mov.b32 \t{{$low, _}}, $s;", [] >;
2059+ "mov.b32 \t{{$low, _}}, $s;">;
20682060 def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s),
2069- "mov.b64 \t{{_, $high}}, $s;", [] >;
2061+ "mov.b64 \t{{_, $high}}, $s;">;
20702062 def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s),
2071- "mov.b64 \t{{$low, _}}, $s;", [] >;
2063+ "mov.b64 \t{{$low, _}}, $s;">;
20722064 }
20732065}
20742066
0 commit comments