Apply all VIS instruction fixes.

Rot127 · Rot127 · commit 5671c1f999ef · 2025-08-19T12:33:35.000-05:00
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -7,76 +7,99 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains instruction formats, definitions and patterns needed for
-// VIS, VIS II, VIS II instructions on SPARC.
+// VIS, VIS II, VIS III instructions on SPARC.
 //===----------------------------------------------------------------------===//
 
+//
+// NOTE Capstone:
+// This file matches mostly
+// https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
+// because it had fixes for several instructions.
+// It can be reset to llvm-18 before the Sparc module is updated.
+//
+
 // VIS Instruction Format.
-class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr,
-      list<dag> pattern>
-      : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, pattern>;
+class VISInstFormat<bits<9> opfval, dag outs, dag ins, string asmstr>
+      : F3_3<0b10, 0b110110, opfval, outs, ins, asmstr, []>;
 
 class VISInst<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
        : VISInstFormat<opfval,
         (outs RC:$rd), (ins RC:$rs1, RC:$rs2),
-        !strconcat(OpcStr, " $rs1, $rs2, $rd"), []>;
+        !strconcat(OpcStr, " $rs1, $rs2, $rd")>;
 
 // VIS Instruction with integer destination register.
 class VISInstID<bits<9> opfval, string OpcStr>
        : VISInstFormat<opfval,
         (outs I64Regs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
-        !strconcat(OpcStr, " $rs1, $rs2, $rd"), []>;
+        !strconcat(OpcStr, " $rs1, $rs2, $rd")>;
 
 // For VIS Instructions with no operand.
 let rd = 0, rs1 = 0, rs2 = 0 in
 class VISInst0<bits<9> opfval, string asmstr>
-       : VISInstFormat<opfval, (outs), (ins), asmstr, []>;
+       : VISInstFormat<opfval, (outs), (ins), asmstr>;
 
 // For VIS Instructions with only rs1, rd operands.
 let rs2 = 0 in
 class VISInst1<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
        : VISInstFormat<opfval,
         (outs RC:$rd), (ins RC:$rs1),
-        !strconcat(OpcStr, " $rs1, $rd"), []>;
+        !strconcat(OpcStr, " $rs1, $rd")>;
 
 // For VIS Instructions with only rs2, rd operands.
 let rs1 = 0 in
 class VISInst2<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
        : VISInstFormat<opfval,
         (outs RC:$rd), (ins RC:$rs2),
-        !strconcat(OpcStr, " $rs2, $rd"), []>;
+        !strconcat(OpcStr, " $rs2, $rd")>;
 
 // For VIS Instructions with only rd operand.
-let Constraints = "$rd = $f", rs1 = 0, rs2 = 0 in
+let rs1 = 0, rs2 = 0 in
 class VISInstD<bits<9> opfval, string OpcStr, RegisterClass RC = DFPRegs>
        : VISInstFormat<opfval,
-        (outs RC:$rd), (ins RC:$f),
-        !strconcat(OpcStr, " $rd"), []>;
+        (outs RC:$rd), (ins),
+        !strconcat(OpcStr, " $rd")>;
 
 // VIS 1 Instructions
 let Predicates = [HasVIS] in {
 
 def FPADD16     : VISInst<0b001010000, "fpadd16">;
-def FPADD16S    : VISInst<0b001010001, "fpadd16s">;
+def FPADD16S    : VISInst<0b001010001, "fpadd16s", FPRegs>;
 def FPADD32     : VISInst<0b001010010, "fpadd32">;
-def FPADD32S    : VISInst<0b001010011, "fpadd32s">;
+def FPADD32S    : VISInst<0b001010011, "fpadd32s", FPRegs>;
 def FPSUB16     : VISInst<0b001010100, "fpsub16">;
-def FPSUB16S    : VISInst<0b001010101, "fpsub16S">;
+def FPSUB16S    : VISInst<0b001010101, "fpsub16s", FPRegs>;
 def FPSUB32     : VISInst<0b001010110, "fpsub32">;
-def FPSUB32S    : VISInst<0b001010111, "fpsub32S">;
+def FPSUB32S    : VISInst<0b001010111, "fpsub32s", FPRegs>;
 
 def FPACK16     : VISInst2<0b000111011, "fpack16">;
 def FPACK32     : VISInst <0b000111010, "fpack32">;
-def FPACKFIX    : VISInst2<0b000111101, "fpackfix">;
-def FEXPAND     : VISInst2<0b001001101, "fexpand">;
-def FPMERGE     : VISInst <0b001001011, "fpmerge">;
-
-def FMUL8X16    : VISInst<0b000110001, "fmul8x16">;
-def FMUL8X16AU  : VISInst<0b000110011, "fmul8x16au">;
-def FMUL8X16AL  : VISInst<0b000110101, "fmul8x16al">;
+let rs1 = 0 in
+def FPACKFIX    : VISInstFormat<0b000111101,
+                  (outs FPRegs:$rd), (ins DFPRegs:$rs2), "fpackfix $rs2, $rd">;
+let rs1 = 0 in
+def FEXPAND     : VISInstFormat<0b001001101,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs2), "fexpand $rs2, $rd">;
+def FPMERGE     : VISInstFormat<0b001001011,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                  "fpmerge $rs1, $rs2, $rd">;
+
+def FMUL8X16    : VISInstFormat<0b000110001,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, DFPRegs:$rs2),
+                  "fmul8x16 $rs1, $rs2, $rd">;
+def FMUL8X16AU  : VISInstFormat<0b000110011,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                  "fmul8x16au $rs1, $rs2, $rd">;
+def FMUL8X16AL  : VISInstFormat<0b000110101,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                  "fmul8x16al $rs1, $rs2, $rd">;
 def FMUL8SUX16  : VISInst<0b000110110, "fmul8sux16">;
 def FMUL8ULX16  : VISInst<0b000110111, "fmul8ulx16">;
-def FMULD8SUX16 : VISInst<0b000111000, "fmuld8sux16">;
-def FMULD8ULX16 : VISInst<0b000111001, "fmuld8ulx16">;
+def FMULD8SUX16 : VISInstFormat<0b000111000,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                  "fmuld8sux16 $rs1, $rs2, $rd">;
+def FMULD8ULX16 : VISInstFormat<0b000111001,
+                  (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                  "fmuld8ulx16 $rs1, $rs2, $rd">;
 
 def ALIGNADDR   : VISInst<0b000011000, "alignaddr", I64Regs>;
 def ALIGNADDRL  : VISInst<0b000011010, "alignaddrl", I64Regs>;
@@ -148,10 +171,6 @@ def SHUTDOWN    : VISInst0<0b010000000, "shutdown">;
 let Predicates = [HasVIS2] in {
 
 def BMASK     : VISInst<0b000011001, "bmask", I64Regs>;
-// Capstone NOTE:
-// BSHUFFLE was silently fixed with
-// https://github.com/llvm/llvm-project/commit/ebacd46996a7f041be73cf31b5776503e8061e8b
-// On rebase this can be reverted.
 def BSHUFFLE  : VISInst<0b001001100, "bshuffle">;
 
 def SIAM      : VISInst0<0b010000001, "siam">;
@@ -176,59 +195,59 @@ def ADDXCCC : VISInst<0b000010011, "addxccc", I64Regs>;
 
 let rd = 0, rs1 = 0 in {
 def CMASK8  : VISInstFormat<0b000011011, (outs), (ins I64Regs:$rs2),
-              "cmask8 $rs2", []>;
+              "cmask8 $rs2">;
 def CMASK16  : VISInstFormat<0b000011101, (outs), (ins I64Regs:$rs2),
-              "cmask16 $rs2", []>;
+              "cmask16 $rs2">;
 def CMASK32  : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2),
-              "cmask32 $rs2", []>;
+              "cmask32 $rs2">;
 
 }
 
 def FCHKSM16 : VISInst<0b001000100, "fchksm16">;
 
 def FHADDS   : F3_3<0b10, 0b110100, 0b001100001,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
+                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                     "fhadds $rs1, $rs2, $rd", []>;
 def FHADDD   : F3_3<0b10, 0b110100, 0b001100010,
                     (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                     "fhaddd $rs1, $rs2, $rd", []>;
 def FHSUBS   : F3_3<0b10, 0b110100, 0b001100101,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
+                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                     "fhsubs $rs1, $rs2, $rd", []>;
 def FHSUBD   : F3_3<0b10, 0b110100, 0b001100110,
                     (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                     "fhsubd $rs1, $rs2, $rd", []>;
 def FLCMPS   : VISInstFormat<0b101010001, (outs FCCRegs:$rd),
-                     (ins DFPRegs:$rs1, DFPRegs:$rs2),
-                     "flcmps $rd, $rs1, $rs2", []>;
+                     (ins FPRegs:$rs1, FPRegs:$rs2),
+                     "flcmps $rd, $rs1, $rs2">;
 def FLCMPD   : VISInstFormat<0b101010010, (outs FCCRegs:$rd),
                      (ins DFPRegs:$rs1, DFPRegs:$rs2),
-                     "flcmpd $rd, $rs1, $rs2", []>;
+                     "flcmpd $rd, $rs1, $rs2">;
 
 def FMEAN16  : VISInst<0b001000000, "fmean16">;
 
 def FNADDS   : F3_3<0b10, 0b110100, 0b001010001,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
+                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                     "fnadds $rs1, $rs2, $rd", []>;
 def FNADDD   : F3_3<0b10, 0b110100, 0b001010010,
                     (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                     "fnaddd $rs1, $rs2, $rd", []>;
 def FNHADDS  : F3_3<0b10, 0b110100, 0b001110001,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
+                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                     "fnhadds $rs1, $rs2, $rd", []>;
 def FNHADDD  : F3_3<0b10, 0b110100, 0b001110010,
                     (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                     "fnhaddd $rs1, $rs2, $rd", []>;
 
 def FNMULS   : F3_3<0b10, 0b110100, 0b001011001,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
-                    "fnhadds $rs1, $rs2, $rd", []>;
+                    (outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                    "fnmuls $rs1, $rs2, $rd", []>;
 def FNMULD   : F3_3<0b10, 0b110100, 0b001011010,
                     (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
-                    "fnhaddd $rs1, $rs2, $rd", []>;
+                    "fnmuld $rs1, $rs2, $rd", []>;
 def FNSMULD  : F3_3<0b10, 0b110100, 0b001111001,
-                    (outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
-                    "fnhadds $rs1, $rs2, $rd", []>;
+                    (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
+                    "fnsmuld $rs1, $rs2, $rd", []>;
 
 def FPADD64   : VISInst<0b001000010, "fpadd64">;
 
@@ -243,24 +262,97 @@ def FSRA32    : VISInst<0b000101111, "fsra32">;
 
 let rs1 = 0 in
 def LZCNT     : VISInstFormat<0b000010111, (outs I64Regs:$rd),
-                   (ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;
+                   (ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
 
 let rs1 = 0 in {
-def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
-def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
+def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstosw $rs2, $rd">;
+def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstouw $rs2, $rd">;
 def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
-def MOVWTOS  :  VISInstFormat<0b100011001, (outs DFPRegs:$rd),
-                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
+                   (ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
+def MOVWTOS  :  VISInstFormat<0b100011001, (outs FPRegs:$rd),
+                   (ins IntRegs:$rs2), "movwtos $rs2, $rd">;
 def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd),
-                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
+                   (ins I64Regs:$rs2), "movxtod $rs2, $rd">;
 }
 
-def PDISTN   : VISInst<0b000111111, "pdistn">;
+def PDISTN   : VISInstID<0b000111111, "pdistn">;
 
 def UMULXHI  : VISInst<0b000010110, "umulxhi", I64Regs>;
 def XMULX    : VISInst<0b100010101, "xmulx",   I64Regs>;
-def XMULXHI  : VISInst<0b100010111, "xmulxhi", I64Regs>;
+def XMULXHI  : VISInst<0b100010110, "xmulxhi", I64Regs>;
 } // Predicates = [IsVIS3]
+
+// FP immediate patterns.
+def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
+def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
+def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
+def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
+
+// VIS instruction patterns.
+let Predicates = [HasVIS] in {
+// Zero immediate.
+def : Pat<(f64 fpimm0), (FZERO)>;
+def : Pat<(f32 fpimm0), (FZEROS)>;
+def : Pat<(f64 fpnegimm0), (FNEGD (FZERO))>;
+def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
+} // Predicates = [HasVIS]
+
+// VIS3 instruction patterns.
+let Predicates = [HasVIS3] in {
+// +/-0.5 immediate.
+// This is needed to enable halving instructions.
+// FIXME generalize this to arbitrary immediates.
+// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
+// faster than constant pool loading.
+def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
+def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
+def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
+def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
+
+def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
+
+def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
+// Signed "MULXHI".
+// Based on the formula presented in OSA2011 §7.140, but with bitops to select
+// the values to be added.
+// TODO: This expansion should probably be moved to DAG legalization phase.
+def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
+      (SUBrr (UMULXHI $lhs, $rhs),
+             (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
+                    (ANDrr (SRAXri $rhs, 63), $lhs)))>;
+
+def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
+def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
+// 32-bit LZCNT.
+// The zero extension will leave us with 32 extra leading zeros,
+// so we need to compensate for it.
+// FIXME remove this when the codegen supports using 64-bit values directly
+// in V8+ mode.
+def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
+def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
+
+def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
+def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
+def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
+def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
+def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
+def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
+
+// OP-then-neg FP operations.
+// TODO handle equivalent patterns like `rs1*-rs2`.
+def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
+def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
+
+// Op-then-halve FP operations.
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
+} // Predicates = [HasVIS3]