77//===----------------------------------------------------------------------===//
88//
99// This file contains instruction formats, definitions and patterns needed for
10- // VIS, VIS II, VIS II instructions on SPARC.
10+ // VIS, VIS II, VIS III instructions on SPARC.
1111//===----------------------------------------------------------------------===//
1212
13+ //
14+ // NOTE Capstone:
15+ // This file matches mostly
16+ // https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
17+ // because it had fixes for several instructions.
18+ // It can be reset to llvm-18 before the Sparc module is updated.
19+ //
20+
1321// VIS Instruction Format.
14- class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr,
15- list<dag> pattern>
16- : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, pattern>;
22+ class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr>
23+ : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, []>;
1724
1825class VISInst<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
1926 : VISInstFormat<opfval,
2027 (outs RC:$rd), (ins RC:$rs1, RC:$rs2),
21- !strconcat(OpcStr, " $rs1, $rs2, $rd"), [] >;
28+ !strconcat(OpcStr, " $rs1, $rs2, $rd")>;
2229
2330// VIS Instruction with integer destination register.
2431class VISInstID<bits<9> opfval, string OpcStr>
2532 : VISInstFormat<opfval,
2633 (outs I64Regs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
27- !strconcat(OpcStr, " $rs1, $rs2, $rd"), [] >;
34+ !strconcat(OpcStr, " $rs1, $rs2, $rd")>;
2835
2936// For VIS Instructions with no operand.
3037let rd = 0, rs1 = 0, rs2 = 0 in
3138class VISInst0<bits<9> opfval, string asmstr>
32- : VISInstFormat<opfval, (outs), (ins), asmstr, [] >;
39+ : VISInstFormat<opfval, (outs), (ins), asmstr>;
3340
3441// For VIS Instructions with only rs1, rd operands.
3542let rs2 = 0 in
3643class VISInst1<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
3744 : VISInstFormat<opfval,
3845 (outs RC:$rd), (ins RC:$rs1),
39- !strconcat(OpcStr, " $rs1, $rd"), [] >;
46+ !strconcat(OpcStr, " $rs1, $rd")>;
4047
4148// For VIS Instructions with only rs2, rd operands.
4249let rs1 = 0 in
4350class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
4451 : VISInstFormat<opfval,
4552 (outs RC:$rd), (ins RC:$rs2),
46- !strconcat(OpcStr, " $rs2, $rd"), [] >;
53+ !strconcat(OpcStr, " $rs2, $rd")>;
4754
4855// For VIS Instructions with only rd operand.
49- let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
56+ let rs1 = 0, rs2 = 0 in
5057class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
5158 : VISInstFormat<opfval,
52- (outs RC:$rd), (ins RC:$f ),
53- !strconcat(OpcStr, " $rd"), [] >;
59+ (outs RC:$rd), (ins),
60+ !strconcat(OpcStr, " $rd")>;
5461
5562// VIS 1 Instructions
5663let Predicates = [HasVIS] in {
5764
5865def FPADD16 : VISInst<0b001010000, "fpadd16">;
59- def FPADD16S : VISInst<0b001010001, "fpadd16s">;
66+ def FPADD16S : VISInst<0b001010001, "fpadd16s", FPRegs >;
6067def FPADD32 : VISInst<0b001010010, "fpadd32">;
61- def FPADD32S : VISInst<0b001010011, "fpadd32s">;
68+ def FPADD32S : VISInst<0b001010011, "fpadd32s", FPRegs >;
6269def FPSUB16 : VISInst<0b001010100, "fpsub16">;
63- def FPSUB16S : VISInst<0b001010101, "fpsub16S" >;
70+ def FPSUB16S : VISInst<0b001010101, "fpsub16s", FPRegs >;
6471def FPSUB32 : VISInst<0b001010110, "fpsub32">;
65- def FPSUB32S : VISInst<0b001010111, "fpsub32S" >;
72+ def FPSUB32S : VISInst<0b001010111, "fpsub32s", FPRegs >;
6673
6774def FPACK16 : VISInst2<0b000111011, "fpack16">;
6875def FPACK32 : VISInst <0b000111010, "fpack32">;
69- def FPACKFIX : VISInst2<0b000111101, "fpackfix">;
70- def FEXPAND : VISInst2<0b001001101, "fexpand">;
71- def FPMERGE : VISInst <0b001001011, "fpmerge">;
72-
73- def FMUL8X16 : VISInst<0b000110001, "fmul8x16">;
74- def FMUL8X16AU : VISInst<0b000110011, "fmul8x16au">;
75- def FMUL8X16AL : VISInst<0b000110101, "fmul8x16al">;
76+ let rs1 = 0 in
77+ def FPACKFIX : VISInstFormat<0b000111101,
78+ (outs FPRegs:$rd), (ins DFPRegs:$rs2), "fpackfix $rs2, $rd">;
79+ let rs1 = 0 in
80+ def FEXPAND : VISInstFormat<0b001001101,
81+ (outs DFPRegs:$rd), (ins FPRegs:$rs2), "fexpand $rs2, $rd">;
82+ def FPMERGE : VISInstFormat<0b001001011,
83+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
84+ "fpmerge $rs1, $rs2, $rd">;
85+
86+ def FMUL8X16 : VISInstFormat<0b000110001,
87+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, DFPRegs:$rs2),
88+ "fmul8x16 $rs1, $rs2, $rd">;
89+ def FMUL8X16AU : VISInstFormat<0b000110011,
90+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
91+ "fmul8x16au $rs1, $rs2, $rd">;
92+ def FMUL8X16AL : VISInstFormat<0b000110101,
93+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
94+ "fmul8x16al $rs1, $rs2, $rd">;
7695def FMUL8SUX16 : VISInst<0b000110110, "fmul8sux16">;
7796def FMUL8ULX16 : VISInst<0b000110111, "fmul8ulx16">;
78- def FMULD8SUX16 : VISInst<0b000111000, "fmuld8sux16">;
79- def FMULD8ULX16 : VISInst<0b000111001, "fmuld8ulx16">;
97+ def FMULD8SUX16 : VISInstFormat<0b000111000,
98+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
99+ "fmuld8sux16 $rs1, $rs2, $rd">;
100+ def FMULD8ULX16 : VISInstFormat<0b000111001,
101+ (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
102+ "fmuld8ulx16 $rs1, $rs2, $rd">;
80103
81104def ALIGNADDR : VISInst<0b000011000, "alignaddr", I64Regs>;
82105def ALIGNADDRL : VISInst<0b000011010, "alignaddrl", I64Regs>;
@@ -148,10 +171,6 @@ def SHUTDOWN : VISInst0<0b010000000, "shutdown">;
148171let Predicates = [HasVIS2] in {
149172
150173def BMASK : VISInst<0b000011001, "bmask", I64Regs>;
151- // Capstone NOTE:
152- // BSHUFFLE was silently fixed with
153- // https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
154- // On rebase this can be reverted.
155174def BSHUFFLE : VISInst<0b001001100, "bshuffle">;
156175
157176def SIAM : VISInst0<0b010000001, "siam">;
@@ -176,59 +195,59 @@ def ADDXCCC : VISInst<0b000010011, "addxccc", I64Regs>;
176195
177196let rd = 0, rs1 = 0 in {
178197def CMASK8 : VISInstFormat<0b000011011, (outs), (ins I64Regs:$rs2),
179- "cmask8 $rs2", [] >;
198+ "cmask8 $rs2">;
180199def CMASK16 : VISInstFormat<0b000011101, (outs), (ins I64Regs:$rs2),
181- "cmask16 $rs2", [] >;
200+ "cmask16 $rs2">;
182201def CMASK32 : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2),
183- "cmask32 $rs2", [] >;
202+ "cmask32 $rs2">;
184203
185204}
186205
187206def FCHKSM16 : VISInst<0b001000100, "fchksm16">;
188207
189208def FHADDS : F3_3<0b10, 0b110100, 0b001100001,
190- (outs DFPRegs :$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
209+ (outs FPRegs :$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
191210 "fhadds $rs1, $rs2, $rd", []>;
192211def FHADDD : F3_3<0b10, 0b110100, 0b001100010,
193212 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
194213 "fhaddd $rs1, $rs2, $rd", []>;
195214def FHSUBS : F3_3<0b10, 0b110100, 0b001100101,
196- (outs DFPRegs :$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
215+ (outs FPRegs :$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
197216 "fhsubs $rs1, $rs2, $rd", []>;
198217def FHSUBD : F3_3<0b10, 0b110100, 0b001100110,
199218 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
200219 "fhsubd $rs1, $rs2, $rd", []>;
201220def FLCMPS : VISInstFormat<0b101010001, (outs FCCRegs:$rd),
202- (ins DFPRegs :$rs1, DFPRegs :$rs2),
203- "flcmps $rd, $rs1, $rs2", [] >;
221+ (ins FPRegs :$rs1, FPRegs :$rs2),
222+ "flcmps $rd, $rs1, $rs2">;
204223def FLCMPD : VISInstFormat<0b101010010, (outs FCCRegs:$rd),
205224 (ins DFPRegs:$rs1, DFPRegs:$rs2),
206- "flcmpd $rd, $rs1, $rs2", [] >;
225+ "flcmpd $rd, $rs1, $rs2">;
207226
208227def FMEAN16 : VISInst<0b001000000, "fmean16">;
209228
210229def FNADDS : F3_3<0b10, 0b110100, 0b001010001,
211- (outs DFPRegs :$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
230+ (outs FPRegs :$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
212231 "fnadds $rs1, $rs2, $rd", []>;
213232def FNADDD : F3_3<0b10, 0b110100, 0b001010010,
214233 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
215234 "fnaddd $rs1, $rs2, $rd", []>;
216235def FNHADDS : F3_3<0b10, 0b110100, 0b001110001,
217- (outs DFPRegs :$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
236+ (outs FPRegs :$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
218237 "fnhadds $rs1, $rs2, $rd", []>;
219238def FNHADDD : F3_3<0b10, 0b110100, 0b001110010,
220239 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
221240 "fnhaddd $rs1, $rs2, $rd", []>;
222241
223242def FNMULS : F3_3<0b10, 0b110100, 0b001011001,
224- (outs DFPRegs :$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
225- "fnhadds $rs1, $rs2, $rd", []>;
243+ (outs FPRegs :$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
244+ "fnmuls $rs1, $rs2, $rd", []>;
226245def FNMULD : F3_3<0b10, 0b110100, 0b001011010,
227246 (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
228- "fnhaddd $rs1, $rs2, $rd", []>;
247+ "fnmuld $rs1, $rs2, $rd", []>;
229248def FNSMULD : F3_3<0b10, 0b110100, 0b001111001,
230- (outs DFPRegs:$rd), (ins DFPRegs :$rs1, DFPRegs :$rs2),
231- "fnhadds $rs1, $rs2, $rd", []>;
249+ (outs DFPRegs:$rd), (ins FPRegs :$rs1, FPRegs :$rs2),
250+ "fnsmuld $rs1, $rs2, $rd", []>;
232251
233252def FPADD64 : VISInst<0b001000010, "fpadd64">;
234253
@@ -243,24 +262,97 @@ def FSRA32 : VISInst<0b000101111, "fsra32">;
243262
244263let rs1 = 0 in
245264def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
246- (ins I64Regs:$rs2), "lzcnt $rs2, $rd", [] >;
265+ (ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
247266
248267let rs1 = 0 in {
249- def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs :$rd),
250- (ins DFPRegs :$rs2), "movstosw $rs2, $rd", [] >;
251- def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs :$rd),
252- (ins DFPRegs :$rs2), "movstouw $rs2, $rd", [] >;
268+ def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs :$rd),
269+ (ins FPRegs :$rs2), "movstosw $rs2, $rd">;
270+ def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs :$rd),
271+ (ins FPRegs :$rs2), "movstouw $rs2, $rd">;
253272def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
254- (ins DFPRegs:$rs2), "movdtox $rs2, $rd", [] >;
255- def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs :$rd),
256- (ins I64Regs :$rs2), "movdtox $rs2, $rd", [] >;
273+ (ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
274+ def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs :$rd),
275+ (ins IntRegs :$rs2), "movwtos $rs2, $rd">;
257276def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
258- (ins I64Regs:$rs2), "movdtox $rs2, $rd", [] >;
277+ (ins I64Regs:$rs2), "movxtod $rs2, $rd">;
259278}
260279
261- def PDISTN : VISInst <0b000111111, "pdistn">;
280+ def PDISTN : VISInstID <0b000111111, "pdistn">;
262281
263282def UMULXHI : VISInst<0b000010110, "umulxhi", I64Regs>;
264283def XMULX : VISInst<0b100010101, "xmulx", I64Regs>;
265- def XMULXHI : VISInst<0b100010111 , "xmulxhi", I64Regs>;
284+ def XMULXHI : VISInst<0b100010110 , "xmulxhi", I64Regs>;
266285} // Predicates = [IsVIS3]
286+
287+ // FP immediate patterns.
288+ def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
289+ def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
290+ def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
291+ def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
292+
293+ // VIS instruction patterns.
294+ let Predicates = [HasVIS] in {
295+ // Zero immediate.
296+ def : Pat<(f64 fpimm0), (FZERO)>;
297+ def : Pat<(f32 fpimm0), (FZEROS)>;
298+ def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>;
299+ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
300+ } // Predicates = [HasVIS]
301+
302+ // VIS3 instruction patterns.
303+ let Predicates = [HasVIS3] in {
304+ // +/-0.5 immediate.
305+ // This is needed to enable halving instructions.
306+ // FIXME generalize this to arbitrary immediates.
307+ // SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
308+ // faster than constant pool loading.
309+ def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
310+ def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
311+ def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
312+ def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
313+
314+ def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
315+
316+ def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
317+ // Signed "MULXHI".
318+ // Based on the formula presented in OSA2011 §7.140, but with bitops to select
319+ // the values to be added.
320+ // TODO: This expansion should probably be moved to DAG legalization phase.
321+ def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
322+ (SUBrr (UMULXHI $lhs, $rhs),
323+ (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
324+ (ANDrr (SRAXri $rhs, 63), $lhs)))>;
325+
326+ def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
327+ def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
328+ // 32-bit LZCNT.
329+ // The zero extension will leave us with 32 extra leading zeros,
330+ // so we need to compensate for it.
331+ // FIXME remove this when the codegen supports using 64-bit values directly
332+ // in V8+ mode.
333+ def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
334+ def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
335+
336+ def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
337+ def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
338+ def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
339+ def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
340+ def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
341+ def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
342+
343+ // OP-then-neg FP operations.
344+ // TODO handle equivalent patterns like `rs1*-rs2`.
345+ def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
346+ def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
347+ def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
348+ def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
349+ def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
350+
351+ // Op-then-halve FP operations.
352+ def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
353+ def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
354+ def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
355+ def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
356+ def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
357+ def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
358+ } // Predicates = [HasVIS3]
0 commit comments