@@ -227,6 +227,7 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
227227 int Size = ty.Size;
228228}
229229
230+ def I1RT : RegTyInfo<i1, Int1Regs, i1imm, imm>;
230231def I16RT : RegTyInfo<i16, Int16Regs, i16imm, imm>;
231232def I32RT : RegTyInfo<i32, Int32Regs, i32imm, imm>;
232233def I64RT : RegTyInfo<i64, Int64Regs, i64imm, imm>;
@@ -240,26 +241,33 @@ def F16X2RT : RegTyInfo<v2f16, Int32Regs, ?, ?, supports_imm = 0>;
240241def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
241242
242243
244+ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
245+ bit commutative, list<Predicate> requires = []> {
246+ defvar asmstr = op_str # " \t$dst, $a, $b;";
247+
248+ def rr :
249+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
250+ asmstr,
251+ [(set t.Ty:$dst, (op_node t.Ty:$a, t.Ty:$b))]>,
252+ Requires<requires>;
253+ def ri :
254+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
255+ asmstr,
256+ [(set t.Ty:$dst, (op_node t.Ty:$a, (t.Ty imm:$b)))]>,
257+ Requires<requires>;
258+ if !not(commutative) then
259+ def ir :
260+ NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
261+ asmstr,
262+ [(set t.Ty:$dst, (op_node (t.Ty imm:$a), t.Ty:$b))]>,
263+ Requires<requires>;
264+ }
265+
243266// Template for instructions which take three int64, int32, or int16 args.
244267// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
245- multiclass I3<string OpcStr, SDNode OpNode, bit commutative> {
246- foreach t = [I16RT, I32RT, I64RT] in {
247- defvar asmstr = OpcStr # t.Size # " \t$dst, $a, $b;";
248-
249- def t.Ty # rr :
250- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
251- asmstr,
252- [(set t.Ty:$dst, (OpNode t.Ty:$a, t.Ty:$b))]>;
253- def t.Ty # ri :
254- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
255- asmstr,
256- [(set t.Ty:$dst, (OpNode t.RC:$a, imm:$b))]>;
257- if !not(commutative) then
258- def t.Ty # ir :
259- NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
260- asmstr,
261- [(set t.Ty:$dst, (OpNode imm:$a, t.RC:$b))]>;
262- }
268+ multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
269+ foreach t = [I16RT, I32RT, I64RT] in
270+ defm t.Ty# : I3Inst<op_str # t.Size, op_node, t, commutative>;
263271}
264272
265273class I16x2<string OpcStr, SDNode OpNode> :
@@ -270,26 +278,11 @@ class I16x2<string OpcStr, SDNode OpNode> :
270278
271279// Template for instructions which take 3 int args. The instructions are
272280// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
273- multiclass ADD_SUB_INT_CARRY<string OpcStr , SDNode OpNode > {
281+ multiclass ADD_SUB_INT_CARRY<string op_str , SDNode op_node, bit commutative > {
274282 let hasSideEffects = 1 in {
275- def i32rr :
276- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
277- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
278- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
279- def i32ri :
280- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
281- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
282- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
283- def i64rr :
284- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
285- !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
286- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>,
287- Requires<[hasPTX<43>]>;
288- def i64ri :
289- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
290- !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
291- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>,
292- Requires<[hasPTX<43>]>;
283+ defm i32 : I3Inst<op_str # ".s32", op_node, I32RT, commutative>;
284+ defm i64 : I3Inst<op_str # ".s64", op_node, I64RT, commutative,
285+ requires = [hasPTX<43>]>;
293286 }
294287}
295288
@@ -841,31 +834,31 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
841834
842835// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
843836// also use these for unsigned arithmetic.
844- defm ADD : I3<"add.s", add, /* commutative=*/ true>;
845- defm SUB : I3<"sub.s", sub, /* commutative=*/ false>;
837+ defm ADD : I3<"add.s", add, commutative = true>;
838+ defm SUB : I3<"sub.s", sub, commutative = false>;
846839
847840def ADD16x2 : I16x2<"add.s", add>;
848841
849842// in32 and int64 addition and subtraction with carry-out.
850- defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
851- defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
843+ defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc, commutative = true >;
844+ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc, commutative = false >;
852845
853846// int32 and int64 addition and subtraction with carry-in and carry-out.
854- defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
855- defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
847+ defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde, commutative = true >;
848+ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false >;
856849
857- defm MULT : I3<"mul.lo.s", mul, /* commutative=*/ true>;
850+ defm MULT : I3<"mul.lo.s", mul, commutative = true>;
858851
859- defm MULTHS : I3<"mul.hi.s", mulhs, /* commutative=*/ true>;
860- defm MULTHU : I3<"mul.hi.u", mulhu, /* commutative=*/ true>;
852+ defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
853+ defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
861854
862- defm SDIV : I3<"div.s", sdiv, /* commutative=*/ false>;
863- defm UDIV : I3<"div.u", udiv, /* commutative=*/ false>;
855+ defm SDIV : I3<"div.s", sdiv, commutative = false>;
856+ defm UDIV : I3<"div.u", udiv, commutative = false>;
864857
865858// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
866859// will lower it.
867- defm SREM : I3<"rem.s", srem, /* commutative=*/ false>;
868- defm UREM : I3<"rem.u", urem, /* commutative=*/ false>;
860+ defm SREM : I3<"rem.s", srem, commutative = false>;
861+ defm UREM : I3<"rem.u", urem, commutative = false>;
869862
870863// Integer absolute value. NumBits should be one minus the bit width of RC.
871864// This idiom implements the algorithm at
@@ -880,10 +873,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
880873defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
881874
882875// Integer min/max.
883- defm SMAX : I3<"max.s", smax, /* commutative=*/ true>;
884- defm UMAX : I3<"max.u", umax, /* commutative=*/ true>;
885- defm SMIN : I3<"min.s", smin, /* commutative=*/ true>;
886- defm UMIN : I3<"min.u", umin, /* commutative=*/ true>;
876+ defm SMAX : I3<"max.s", smax, commutative = true>;
877+ defm UMAX : I3<"max.u", umax, commutative = true>;
878+ defm SMIN : I3<"min.s", smin, commutative = true>;
879+ defm UMIN : I3<"min.u", umin, commutative = true>;
887880
888881def SMAX16x2 : I16x2<"max.s", smax>;
889882def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1393,38 +1386,10 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
13931386// Template for three-arg bitwise operations. Takes three args, Creates .b16,
13941387// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.
13951388multiclass BITWISE<string OpcStr, SDNode OpNode> {
1396- def b1rr :
1397- NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
1398- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1399- [(set i1:$dst, (OpNode i1:$a, i1:$b))]>;
1400- def b1ri :
1401- NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
1402- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1403- [(set i1:$dst, (OpNode i1:$a, imm:$b))]>;
1404- def b16rr :
1405- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1406- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1407- [(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
1408- def b16ri :
1409- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1410- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1411- [(set i16:$dst, (OpNode i16:$a, imm:$b))]>;
1412- def b32rr :
1413- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1414- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1415- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
1416- def b32ri :
1417- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1418- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1419- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
1420- def b64rr :
1421- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1422- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1423- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
1424- def b64ri :
1425- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1426- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1427- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
1389+ defm b1 : I3Inst<OpcStr # ".pred", OpNode, I1RT, commutative = true>;
1390+ defm b16 : I3Inst<OpcStr # ".b16", OpNode, I16RT, commutative = true>;
1391+ defm b32 : I3Inst<OpcStr # ".b32", OpNode, I32RT, commutative = true>;
1392+ defm b64 : I3Inst<OpcStr # ".b64", OpNode, I64RT, commutative = true>;
14281393}
14291394
14301395defm OR : BITWISE<"or", or>;
0 commit comments