Skip to content

Commit 5671c1f

Browse files
committed
Apply all VIS instruction fixes.
1 parent 4c69385 commit 5671c1f

File tree

1 file changed

+147
-55
lines changed

1 file changed

+147
-55
lines changed

llvm/lib/Target/Sparc/SparcInstrVIS.td

Lines changed: 147 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -7,76 +7,99 @@
77
//===----------------------------------------------------------------------===//
88
//
99
// This file contains instruction formats, definitions and patterns needed for
10-
// VIS, VIS II, VIS II instructions on SPARC.
10+
// VIS, VIS II, VIS III instructions on SPARC.
1111
//===----------------------------------------------------------------------===//
1212

13+
//
14+
// NOTE Capstone:
15+
// This file matches mostly
16+
// https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
17+
// because it had fixes for several instructions.
18+
// It can be reset to llvm-18 before the Sparc module is updated.
19+
//
20+
1321
// VIS Instruction Format.
14-
class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr,
15-
list<dag> pattern>
16-
: F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, pattern>;
22+
class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr>
23+
: F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, []>;
1724

1825
class VISInst<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
1926
: VISInstFormat<opfval,
2027
(outs RC:$rd), (ins RC:$rs1, RC:$rs2),
21-
!strconcat(OpcStr, " $rs1, $rs2, $rd"), []>;
28+
!strconcat(OpcStr, " $rs1, $rs2, $rd")>;
2229

2330
// VIS Instruction with integer destination register.
2431
class VISInstID<bits<9> opfval, string OpcStr>
2532
: VISInstFormat<opfval,
2633
(outs I64Regs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
27-
!strconcat(OpcStr, " $rs1, $rs2, $rd"), []>;
34+
!strconcat(OpcStr, " $rs1, $rs2, $rd")>;
2835

2936
// For VIS Instructions with no operand.
3037
let rd = 0, rs1 = 0, rs2 = 0 in
3138
class VISInst0<bits<9> opfval, string asmstr>
32-
: VISInstFormat<opfval, (outs), (ins), asmstr, []>;
39+
: VISInstFormat<opfval, (outs), (ins), asmstr>;
3340

3441
// For VIS Instructions with only rs1, rd operands.
3542
let rs2 = 0 in
3643
class VISInst1<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
3744
: VISInstFormat<opfval,
3845
(outs RC:$rd), (ins RC:$rs1),
39-
!strconcat(OpcStr, " $rs1, $rd"), []>;
46+
!strconcat(OpcStr, " $rs1, $rd")>;
4047

4148
// For VIS Instructions with only rs2, rd operands.
4249
let rs1 = 0 in
4350
class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
4451
: VISInstFormat<opfval,
4552
(outs RC:$rd), (ins RC:$rs2),
46-
!strconcat(OpcStr, " $rs2, $rd"), []>;
53+
!strconcat(OpcStr, " $rs2, $rd")>;
4754

4855
// For VIS Instructions with only rd operand.
49-
let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
56+
let rs1 = 0, rs2 = 0 in
5057
class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
5158
: VISInstFormat<opfval,
52-
(outs RC:$rd), (ins RC:$f),
53-
!strconcat(OpcStr, " $rd"), []>;
59+
(outs RC:$rd), (ins),
60+
!strconcat(OpcStr, " $rd")>;
5461

5562
// VIS 1 Instructions
5663
let Predicates = [HasVIS] in {
5764

5865
def FPADD16 : VISInst<0b001010000, "fpadd16">;
59-
def FPADD16S : VISInst<0b001010001, "fpadd16s">;
66+
def FPADD16S : VISInst<0b001010001, "fpadd16s", FPRegs>;
6067
def FPADD32 : VISInst<0b001010010, "fpadd32">;
61-
def FPADD32S : VISInst<0b001010011, "fpadd32s">;
68+
def FPADD32S : VISInst<0b001010011, "fpadd32s", FPRegs>;
6269
def FPSUB16 : VISInst<0b001010100, "fpsub16">;
63-
def FPSUB16S : VISInst<0b001010101, "fpsub16S">;
70+
def FPSUB16S : VISInst<0b001010101, "fpsub16s", FPRegs>;
6471
def FPSUB32 : VISInst<0b001010110, "fpsub32">;
65-
def FPSUB32S : VISInst<0b001010111, "fpsub32S">;
72+
def FPSUB32S : VISInst<0b001010111, "fpsub32s", FPRegs>;
6673

6774
def FPACK16 : VISInst2<0b000111011, "fpack16">;
6875
def FPACK32 : VISInst <0b000111010, "fpack32">;
69-
def FPACKFIX : VISInst2<0b000111101, "fpackfix">;
70-
def FEXPAND : VISInst2<0b001001101, "fexpand">;
71-
def FPMERGE : VISInst <0b001001011, "fpmerge">;
72-
73-
def FMUL8X16 : VISInst<0b000110001, "fmul8x16">;
74-
def FMUL8X16AU : VISInst<0b000110011, "fmul8x16au">;
75-
def FMUL8X16AL : VISInst<0b000110101, "fmul8x16al">;
76+
let rs1 = 0 in
77+
def FPACKFIX : VISInstFormat<0b000111101,
78+
(outs FPRegs:$rd), (ins DFPRegs:$rs2), "fpackfix $rs2, $rd">;
79+
let rs1 = 0 in
80+
def FEXPAND : VISInstFormat<0b001001101,
81+
(outs DFPRegs:$rd), (ins FPRegs:$rs2), "fexpand $rs2, $rd">;
82+
def FPMERGE : VISInstFormat<0b001001011,
83+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
84+
"fpmerge $rs1, $rs2, $rd">;
85+
86+
def FMUL8X16 : VISInstFormat<0b000110001,
87+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, DFPRegs:$rs2),
88+
"fmul8x16 $rs1, $rs2, $rd">;
89+
def FMUL8X16AU : VISInstFormat<0b000110011,
90+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
91+
"fmul8x16au $rs1, $rs2, $rd">;
92+
def FMUL8X16AL : VISInstFormat<0b000110101,
93+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
94+
"fmul8x16al $rs1, $rs2, $rd">;
7695
def FMUL8SUX16 : VISInst<0b000110110, "fmul8sux16">;
7796
def FMUL8ULX16 : VISInst<0b000110111, "fmul8ulx16">;
78-
def FMULD8SUX16 : VISInst<0b000111000, "fmuld8sux16">;
79-
def FMULD8ULX16 : VISInst<0b000111001, "fmuld8ulx16">;
97+
def FMULD8SUX16 : VISInstFormat<0b000111000,
98+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
99+
"fmuld8sux16 $rs1, $rs2, $rd">;
100+
def FMULD8ULX16 : VISInstFormat<0b000111001,
101+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
102+
"fmuld8ulx16 $rs1, $rs2, $rd">;
80103

81104
def ALIGNADDR : VISInst<0b000011000, "alignaddr", I64Regs>;
82105
def ALIGNADDRL : VISInst<0b000011010, "alignaddrl", I64Regs>;
@@ -148,10 +171,6 @@ def SHUTDOWN : VISInst0<0b010000000, "shutdown">;
148171
let Predicates = [HasVIS2] in {
149172

150173
def BMASK : VISInst<0b000011001, "bmask", I64Regs>;
151-
// Capstone NOTE:
152-
// BSHUFFLE was silently fixed with
153-
// https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
154-
// On rebase this can be reverted.
155174
def BSHUFFLE : VISInst<0b001001100, "bshuffle">;
156175

157176
def SIAM : VISInst0<0b010000001, "siam">;
@@ -176,59 +195,59 @@ def ADDXCCC : VISInst<0b000010011, "addxccc", I64Regs>;
176195

177196
let rd = 0, rs1 = 0 in {
178197
def CMASK8 : VISInstFormat<0b000011011, (outs), (ins I64Regs:$rs2),
179-
"cmask8 $rs2", []>;
198+
"cmask8 $rs2">;
180199
def CMASK16 : VISInstFormat<0b000011101, (outs), (ins I64Regs:$rs2),
181-
"cmask16 $rs2", []>;
200+
"cmask16 $rs2">;
182201
def CMASK32 : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2),
183-
"cmask32 $rs2", []>;
202+
"cmask32 $rs2">;
184203

185204
}
186205

187206
def FCHKSM16 : VISInst<0b001000100, "fchksm16">;
188207

189208
def FHADDS : F3_3<0b10, 0b110100, 0b001100001,
190-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
209+
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
191210
"fhadds $rs1, $rs2, $rd", []>;
192211
def FHADDD : F3_3<0b10, 0b110100, 0b001100010,
193212
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
194213
"fhaddd $rs1, $rs2, $rd", []>;
195214
def FHSUBS : F3_3<0b10, 0b110100, 0b001100101,
196-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
215+
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
197216
"fhsubs $rs1, $rs2, $rd", []>;
198217
def FHSUBD : F3_3<0b10, 0b110100, 0b001100110,
199218
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
200219
"fhsubd $rs1, $rs2, $rd", []>;
201220
def FLCMPS : VISInstFormat<0b101010001, (outs FCCRegs:$rd),
202-
(ins DFPRegs:$rs1, DFPRegs:$rs2),
203-
"flcmps $rd, $rs1, $rs2", []>;
221+
(ins FPRegs:$rs1, FPRegs:$rs2),
222+
"flcmps $rd, $rs1, $rs2">;
204223
def FLCMPD : VISInstFormat<0b101010010, (outs FCCRegs:$rd),
205224
(ins DFPRegs:$rs1, DFPRegs:$rs2),
206-
"flcmpd $rd, $rs1, $rs2", []>;
225+
"flcmpd $rd, $rs1, $rs2">;
207226

208227
def FMEAN16 : VISInst<0b001000000, "fmean16">;
209228

210229
def FNADDS : F3_3<0b10, 0b110100, 0b001010001,
211-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
230+
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
212231
"fnadds $rs1, $rs2, $rd", []>;
213232
def FNADDD : F3_3<0b10, 0b110100, 0b001010010,
214233
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
215234
"fnaddd $rs1, $rs2, $rd", []>;
216235
def FNHADDS : F3_3<0b10, 0b110100, 0b001110001,
217-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
236+
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
218237
"fnhadds $rs1, $rs2, $rd", []>;
219238
def FNHADDD : F3_3<0b10, 0b110100, 0b001110010,
220239
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
221240
"fnhaddd $rs1, $rs2, $rd", []>;
222241

223242
def FNMULS : F3_3<0b10, 0b110100, 0b001011001,
224-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
225-
"fnhadds $rs1, $rs2, $rd", []>;
243+
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
244+
"fnmuls $rs1, $rs2, $rd", []>;
226245
def FNMULD : F3_3<0b10, 0b110100, 0b001011010,
227246
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
228-
"fnhaddd $rs1, $rs2, $rd", []>;
247+
"fnmuld $rs1, $rs2, $rd", []>;
229248
def FNSMULD : F3_3<0b10, 0b110100, 0b001111001,
230-
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
231-
"fnhadds $rs1, $rs2, $rd", []>;
249+
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
250+
"fnsmuld $rs1, $rs2, $rd", []>;
232251

233252
def FPADD64 : VISInst<0b001000010, "fpadd64">;
234253

@@ -243,24 +262,97 @@ def FSRA32 : VISInst<0b000101111, "fsra32">;
243262

244263
let rs1 = 0 in
245264
def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
246-
(ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;
265+
(ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
247266

248267
let rs1 = 0 in {
249-
def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
250-
(ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
251-
def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
252-
(ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
268+
def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd),
269+
(ins FPRegs:$rs2), "movstosw $rs2, $rd">;
270+
def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd),
271+
(ins FPRegs:$rs2), "movstouw $rs2, $rd">;
253272
def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
254-
(ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
255-
def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd),
256-
(ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
273+
(ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
274+
def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd),
275+
(ins IntRegs:$rs2), "movwtos $rs2, $rd">;
257276
def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
258-
(ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
277+
(ins I64Regs:$rs2), "movxtod $rs2, $rd">;
259278
}
260279

261-
def PDISTN : VISInst<0b000111111, "pdistn">;
280+
def PDISTN : VISInstID<0b000111111, "pdistn">;
262281

263282
def UMULXHI : VISInst<0b000010110, "umulxhi", I64Regs>;
264283
def XMULX : VISInst<0b100010101, "xmulx", I64Regs>;
265-
def XMULXHI : VISInst<0b100010111, "xmulxhi", I64Regs>;
284+
def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
266285
} // Predicates = [IsVIS3]
286+
287+
// FP immediate patterns.
288+
def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
289+
def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
290+
def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
291+
def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
292+
293+
// VIS instruction patterns.
294+
let Predicates = [HasVIS] in {
295+
// Zero immediate.
296+
def : Pat<(f64 fpimm0), (FZERO)>;
297+
def : Pat<(f32 fpimm0), (FZEROS)>;
298+
def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>;
299+
def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
300+
} // Predicates = [HasVIS]
301+
302+
// VIS3 instruction patterns.
303+
let Predicates = [HasVIS3] in {
304+
// +/-0.5 immediate.
305+
// This is needed to enable halving instructions.
306+
// FIXME generalize this to arbitrary immediates.
307+
// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
308+
// faster than constant pool loading.
309+
def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
310+
def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
311+
def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
312+
def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
313+
314+
def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
315+
316+
def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
317+
// Signed "MULXHI".
318+
// Based on the formula presented in OSA2011 §7.140, but with bitops to select
319+
// the values to be added.
320+
// TODO: This expansion should probably be moved to DAG legalization phase.
321+
def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
322+
(SUBrr (UMULXHI $lhs, $rhs),
323+
(ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
324+
(ANDrr (SRAXri $rhs, 63), $lhs)))>;
325+
326+
def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
327+
def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
328+
// 32-bit LZCNT.
329+
// The zero extension will leave us with 32 extra leading zeros,
330+
// so we need to compensate for it.
331+
// FIXME remove this when the codegen supports using 64-bit values directly
332+
// in V8+ mode.
333+
def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
334+
def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
335+
336+
def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
337+
def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
338+
def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
339+
def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
340+
def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
341+
def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
342+
343+
// OP-then-neg FP operations.
344+
// TODO handle equivalent patterns like `rs1*-rs2`.
345+
def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
346+
def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
347+
def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
348+
def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
349+
def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
350+
351+
// Op-then-halve FP operations.
352+
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
353+
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
354+
def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
355+
def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
356+
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
357+
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
358+
} // Predicates = [HasVIS3]

0 commit comments

Comments
 (0)