llvm
diff --git a/‎llvm/lib/Target/NVPTX/NVPTXInstrInfo.td‎
Lines changed: 68 additions & 109 deletions b/‎llvm/lib/Target/NVPTX/NVPTXInstrInfo.td‎
Lines changed: 68 additions & 109 deletions
@@ -160,7 +160,6 @@ def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">;
 def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">;
 
 def True : Predicate<"true">;
-def False : Predicate<"false">;
 
 class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
 class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
@@ -965,31 +964,17 @@ def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
 def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
 
 // Matchers for signed, unsigned mul.wide ISD nodes.
-def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)),
-          (MULWIDES32 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_signed i16:$a, imm:$b)),
-          (MULWIDES32Imm $a, imm:$b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)),
-          (MULWIDEU32 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned i16:$a, imm:$b)),
-          (MULWIDEU32Imm $a, imm:$b)>,
-      Requires<[doMulWide]>;
+let Predicates = [doMulWide] in {
+  def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), (MULWIDES32 $a, $b)>;
+  def : Pat<(i32 (mul_wide_signed i16:$a, imm:$b)), (MULWIDES32Imm $a, imm:$b)>;
+  def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), (MULWIDEU32 $a, $b)>;
+  def : Pat<(i32 (mul_wide_unsigned i16:$a, imm:$b)), (MULWIDEU32Imm $a, imm:$b)>;
 
-def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
-          (MULWIDES64 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed i32:$a, imm:$b)),
-          (MULWIDES64Imm $a, imm:$b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
-          (MULWIDEU64 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned i32:$a, imm:$b)),
-          (MULWIDEU64Imm $a, imm:$b)>,
-      Requires<[doMulWide]>;
+  def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)), (MULWIDES64 $a, $b)>;
+  def : Pat<(i64 (mul_wide_signed i32:$a, imm:$b)), (MULWIDES64Imm $a, imm:$b)>;
+  def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)), (MULWIDEU64 $a, $b)>;
+  def : Pat<(i64 (mul_wide_unsigned i32:$a, imm:$b)), (MULWIDEU64Imm $a, imm:$b)>;
+}
 
 // Predicates used for converting some patterns to mul.wide.
 def SInt32Const : PatLeaf<(imm), [{
@@ -1115,18 +1100,12 @@ defm MAD32 : MAD<"mad.lo.s32", i32, Int32Regs, i32imm>;
 defm MAD64 : MAD<"mad.lo.s64", i64, Int64Regs, i64imm>;
 }
 
-def INEG16 :
-  BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
-            "neg.s16",
-            [(set i16:$dst, (ineg i16:$src))]>;
-def INEG32 :
-  BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
-            "neg.s32",
-            [(set i32:$dst, (ineg i32:$src))]>;
-def INEG64 :
-  BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
-            "neg.s64",
-            [(set i64:$dst, (ineg i64:$src))]>;
+foreach t = [I16RT, I32RT, I64RT] in {
+  def NEG_S # t.Size :
+    BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
+              "neg.s" # t.Size,
+              [(set t.Ty:$dst, (ineg t.Ty:$src))]>;
+}
 
 //-----------------------------------
 // Floating Point Arithmetic
@@ -2506,24 +2485,20 @@ def : Pat<(f16 (uint_to_fp i32:$a)), (CVT_f16_u32 $a, CvtRN)>;
 def : Pat<(f16 (uint_to_fp i64:$a)), (CVT_f16_u64 $a, CvtRN)>;
 
 // sint -> bf16
-def : Pat<(bf16 (sint_to_fp i1:$a)), (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, 
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i16:$a)), (CVT_bf16_s16 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i32:$a)), (CVT_bf16_s32 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i64:$a)), (CVT_bf16_s64 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+  def : Pat<(bf16 (sint_to_fp i1:$a)), (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>;
+  def : Pat<(bf16 (sint_to_fp i16:$a)), (CVT_bf16_s16 $a, CvtRN)>;
+  def : Pat<(bf16 (sint_to_fp i32:$a)), (CVT_bf16_s32 $a, CvtRN)>;
+  def : Pat<(bf16 (sint_to_fp i64:$a)), (CVT_bf16_s64 $a, CvtRN)>;
+}
 
 // uint -> bf16
-def : Pat<(bf16 (uint_to_fp i1:$a)), (CVT_bf16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (uint_to_fp i16:$a)), (CVT_bf16_u16 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (uint_to_fp i32:$a)), (CVT_bf16_u32 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (uint_to_fp i64:$a)), (CVT_bf16_u64 $a, CvtRN)>,
-      Requires<[hasPTX<78>, hasSM<90>]>;
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+  def : Pat<(bf16 (uint_to_fp i1:$a)), (CVT_bf16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
+  def : Pat<(bf16 (uint_to_fp i16:$a)), (CVT_bf16_u16 $a, CvtRN)>;
+  def : Pat<(bf16 (uint_to_fp i32:$a)), (CVT_bf16_u32 $a, CvtRN)>;
+  def : Pat<(bf16 (uint_to_fp i64:$a)), (CVT_bf16_u64 $a, CvtRN)>;
+}
 
 // sint -> f32
 def : Pat<(f32 (sint_to_fp  i1:$a)), (CVT_f32_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
@@ -2574,27 +2549,25 @@ def : Pat<(i16 (fp_to_uint bf16:$a)), (CVT_u16_bf16 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint bf16:$a)), (CVT_u32_bf16 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint bf16:$a)), (CVT_u64_bf16 $a, CvtRZI)>;
 // f32 -> sint
-def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI_FTZ)>,
-      Requires<[doF32FTZ]>;
+let Predicates = [doF32FTZ] in {
+  def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI_FTZ)>;
+  def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI_FTZ)>;
+  def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI_FTZ)>;
+}
+def : Pat<(i1  (fp_to_sint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI)>;
-def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI_FTZ)>,
-      Requires<[doF32FTZ]>;
 def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI)>;
-def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI_FTZ)>,
-      Requires<[doF32FTZ]>;
 def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI)>;
 
 // f32 -> uint
+let Predicates = [doF32FTZ] in {
+  def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI_FTZ)>;
+  def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI_FTZ)>;
+  def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI_FTZ)>;
+}
 def : Pat<(i1  (fp_to_uint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI_FTZ)>, 
-      Requires<[doF32FTZ]>;
 def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI)>;
-def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI_FTZ)>,
-      Requires<[doF32FTZ]>;
 def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI)>;
-def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI_FTZ)>,
-      Requires<[doF32FTZ]>;
 def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI)>;
 
 // f64 -> sint
@@ -2716,28 +2689,24 @@ let hasSideEffects = false in {
 
   // PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
   // unused high/low part.
-  def I32toI16H_Sink  : NVPTXInst<(outs Int16Regs:$high),
-                             (ins Int32Regs:$s),
-                             "mov.b32 \t{{_, $high}}, $s;",
-                             []>, Requires<[hasPTX<71>]>;
-  def I32toI16L_Sink  : NVPTXInst<(outs Int16Regs:$low),
-                             (ins Int32Regs:$s),
-                             "mov.b32 \t{{$low, _}}, $s;",
-                             []>, Requires<[hasPTX<71>]>;
-  def I64toI32H_Sink  : NVPTXInst<(outs Int32Regs:$high),
-                             (ins Int64Regs:$s),
-                             "mov.b64 \t{{_, $high}}, $s;",
-                             []>, Requires<[hasPTX<71>]>;
-  def I64toI32L_Sink  : NVPTXInst<(outs Int32Regs:$low),
-                             (ins Int64Regs:$s),
-                             "mov.b64 \t{{$low, _}}, $s;",
-                             []>, Requires<[hasPTX<71>]>;
+  let Predicates = [hasPTX<71>] in {
+    def I32toI16H_Sink  : NVPTXInst<(outs Int16Regs:$high), (ins Int32Regs:$s),
+                              "mov.b32 \t{{_, $high}}, $s;", []>;
+    def I32toI16L_Sink  : NVPTXInst<(outs Int16Regs:$low), (ins Int32Regs:$s),
+                              "mov.b32 \t{{$low, _}}, $s;", []>;
+    def I64toI32H_Sink  : NVPTXInst<(outs Int32Regs:$high), (ins Int64Regs:$s),
+                              "mov.b64 \t{{_, $high}}, $s;", []>;
+    def I64toI32L_Sink  : NVPTXInst<(outs Int32Regs:$low), (ins Int64Regs:$s),
+                              "mov.b64 \t{{$low, _}}, $s;", []>;
+  }
 }
 
-def : Pat<(i16 (trunc (srl i32:$s, (i32 16)))), (I32toI16H_Sink i32:$s)>, Requires<[hasPTX<71>]>;
-def : Pat<(i16 (trunc (sra i32:$s, (i32 16)))), (I32toI16H_Sink i32:$s)>, Requires<[hasPTX<71>]>;
-def : Pat<(i32 (trunc (srl i64:$s, (i32 32)))), (I64toI32H_Sink i64:$s)>, Requires<[hasPTX<71>]>;
-def : Pat<(i32 (trunc (sra i64:$s, (i32 32)))), (I64toI32H_Sink i64:$s)>, Requires<[hasPTX<71>]>;
+let Predicates = [hasPTX<71>] in {
+  def : Pat<(i16 (trunc (srl i32:$s, (i32 16)))), (I32toI16H_Sink i32:$s)>;
+  def : Pat<(i16 (trunc (sra i32:$s, (i32 16)))), (I32toI16H_Sink i32:$s)>;
+  def : Pat<(i32 (trunc (srl i64:$s, (i32 32)))), (I64toI32H_Sink i64:$s)>;
+  def : Pat<(i32 (trunc (sra i64:$s, (i32 32)))), (I64toI32H_Sink i64:$s)>;
+}
 
 // Fall back to the old way if we don't have PTX 7.1.
 def : Pat<(i16 (trunc (srl i32:$s, (i32 16)))), (I32toI16H $s)>;
@@ -3070,29 +3039,19 @@ def stacksave :
   SDNode<"NVPTXISD::STACKSAVE", SDTIntLeaf,
          [SDNPHasChain, SDNPSideEffect]>;
 
-def STACKRESTORE_32 :
-  BasicNVPTXInst<(outs), (ins Int32Regs:$ptr),
-            "stackrestore.u32",
-            [(stackrestore i32:$ptr)]>,
-            Requires<[hasPTX<73>, hasSM<52>]>;
-
-def STACKSAVE_32 :
-  BasicNVPTXInst<(outs Int32Regs:$dst), (ins),
-            "stacksave.u32",
-            [(set i32:$dst, (i32 stacksave))]>,
-            Requires<[hasPTX<73>, hasSM<52>]>;
-
-def STACKRESTORE_64 :
-  BasicNVPTXInst<(outs), (ins Int64Regs:$ptr),
-            "stackrestore.u64",
-            [(stackrestore i64:$ptr)]>,
-            Requires<[hasPTX<73>, hasSM<52>]>;
-
-def STACKSAVE_64 :
-  BasicNVPTXInst<(outs Int64Regs:$dst), (ins),
-            "stacksave.u64",
-            [(set i64:$dst, (i64 stacksave))]>,
-            Requires<[hasPTX<73>, hasSM<52>]>;
+let Predicates = [hasPTX<73>, hasSM<52>] in {
+  foreach t = [I32RT, I64RT] in {
+    def STACKRESTORE_ # t.Size :
+      BasicNVPTXInst<(outs), (ins t.RC:$ptr),
+                "stackrestore.u" # t.Size,
+              [(stackrestore t.Ty:$ptr)]>;
+
+    def STACKSAVE_ # t.Size :
+      BasicNVPTXInst<(outs t.RC:$dst), (ins),
+                "stacksave.u" # t.Size,
+              [(set t.Ty:$dst, (t.Ty stacksave))]>;
+  }
+}
 
 include "NVPTXIntrinsics.td"