@@ -45,10 +45,10 @@ class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
4545 !strconcat(OpcStr, " $rs2, $rd")>;
4646
4747// For VIS Instructions with only rd operand.
48- let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
48+ let rs1 = 0, rs2 = 0 in
4949class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
5050 : VISInstFormat<opfval,
51- (outs RC:$rd), (ins RC:$f ),
51+ (outs RC:$rd), (ins),
5252 !strconcat(OpcStr, " $rd")>;
5353
5454// VIS 1 Instructions
@@ -259,14 +259,14 @@ def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
259259 (ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
260260
261261let rs1 = 0 in {
262- def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs :$rd),
263- (ins DFPRegs :$rs2), "movstosw $rs2, $rd">;
264- def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs :$rd),
265- (ins DFPRegs :$rs2), "movstouw $rs2, $rd">;
262+ def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs :$rd),
263+ (ins FPRegs :$rs2), "movstosw $rs2, $rd">;
264+ def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs :$rd),
265+ (ins FPRegs :$rs2), "movstouw $rs2, $rd">;
266266def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
267267 (ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
268- def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs :$rd),
269- (ins I64Regs :$rs2), "movwtos $rs2, $rd">;
268+ def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs :$rd),
269+ (ins IntRegs :$rs2), "movwtos $rs2, $rd">;
270270def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
271271 (ins I64Regs:$rs2), "movxtod $rs2, $rd">;
272272}
@@ -277,3 +277,74 @@ def UMULXHI : VISInst<0b000010110, "umulxhi", I64Regs>;
277277def XMULX : VISInst<0b100010101, "xmulx", I64Regs>;
278278def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
279279} // Predicates = [IsVIS3]
280+
281+ // FP immediate patterns.
282+ def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
283+ def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
284+ def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
285+ def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
286+
287+ // VIS instruction patterns.
288+ let Predicates = [HasVIS] in {
289+ // Zero immediate.
290+ def : Pat<(f64 fpimm0), (FZERO)>;
291+ def : Pat<(f32 fpimm0), (FZEROS)>;
292+ def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>;
293+ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
294+ } // Predicates = [HasVIS]
295+
296+ // VIS3 instruction patterns.
297+ let Predicates = [HasVIS3] in {
298+ // +/-0.5 immediate.
299+ // This is needed to enable halving instructions.
300+ // FIXME generalize this to arbitrary immediates.
301+ // SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
302+ // faster than constant pool loading.
303+ def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
304+ def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
305+ def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
306+ def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
307+
308+ def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
309+
310+ def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
311+ // Signed "MULXHI".
312+ // Based on the formula presented in OSA2011 §7.140, but with bitops to select
313+ // the values to be added.
314+ // TODO: This expansion should probably be moved to DAG legalization phase.
315+ def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
316+ (SUBrr (UMULXHI $lhs, $rhs),
317+ (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
318+ (ANDrr (SRAXri $rhs, 63), $lhs)))>;
319+
320+ def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
321+ def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
322+
323+ def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
324+ def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
325+ def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
326+ def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
327+ def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
328+ def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
329+
330+ // OP-then-neg FP operations.
331+ def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
332+ def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
333+ def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
334+ def : Pat<(f32 (fmul (fneg f32:$rs1), f32:$rs2)), (FNMULS $rs1, $rs2)>;
335+ def : Pat<(f32 (fmul f32:$rs1, (fneg f32:$rs2))), (FNMULS $rs1, $rs2)>;
336+ def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
337+ def : Pat<(f64 (fmul (fneg f64:$rs1), f64:$rs2)), (FNMULD $rs1, $rs2)>;
338+ def : Pat<(f64 (fmul f64:$rs1, (fneg f64:$rs2))), (FNMULD $rs1, $rs2)>;
339+ def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
340+ def : Pat<(f64 (fmul (fneg (fpextend f32:$rs1)), (fpextend f32:$rs2))), (FNSMULD $rs1, $rs2)>;
341+ def : Pat<(f64 (fmul (fpextend f32:$rs1), (fneg (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
342+
343+ // Op-then-halve FP operations.
344+ def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
345+ def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
346+ def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
347+ def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
348+ def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
349+ def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
350+ } // Predicates = [HasVIS3]
0 commit comments