ROCm
diff --git a/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 44 additions & 46 deletions b/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 44 additions & 46 deletions
diff --git a/‎clang/include/clang/Basic/AMDGPUTypes.def‎
Lines changed: 8 additions & 0 deletions b/‎clang/include/clang/Basic/AMDGPUTypes.def‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/Builtins.def‎
Lines changed: 1 addition & 0 deletions b/‎clang/include/clang/Basic/Builtins.def‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/include/clang/Basic/BuiltinsAMDGPU.def‎
Lines changed: 2 additions & 2 deletions b/‎clang/include/clang/Basic/BuiltinsAMDGPU.def‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/include/clang/Basic/DiagnosticSemaKinds.td‎
Lines changed: 13 additions & 2 deletions b/‎clang/include/clang/Basic/DiagnosticSemaKinds.td‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎clang/include/clang/Sema/SemaAMDGPU.h‎
Lines changed: 9 additions & 0 deletions b/‎clang/include/clang/Sema/SemaAMDGPU.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎clang/lib/AST/ASTContext.cpp‎
Lines changed: 10 additions & 1 deletion b/‎clang/lib/AST/ASTContext.cpp‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎clang/lib/Basic/Targets/SPIR.cpp‎
Lines changed: 5 additions & 0 deletions b/‎clang/lib/Basic/Targets/SPIR.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎clang/lib/Basic/Targets/SPIR.h‎
Lines changed: 1 addition & 0 deletions b/‎clang/lib/Basic/Targets/SPIR.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 1 addition & 1 deletion b/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 1 addition & 1 deletion
@@ -4801,12 +4801,8 @@ a functional mechanism for programatically querying:
 
 .. code-block:: c
 
-  // When used as the predicate for a control structure
-  bool __builtin_amdgcn_processor_is(const char*);
-  bool __builtin_amdgcn_is_invocable(builtin_name);
-  // Otherwise
-  void __builtin_amdgcn_processor_is(const char*);
-  void __builtin_amdgcn_is_invocable(void);
+  __amdgpu_feature_predicate_t __builtin_amdgcn_processor_is(const char*);
+  __amdgpu_feature_predicate_t __builtin_amdgcn_is_invocable(builtin_name);
 
 **Example of use**:
 
@@ -4825,7 +4821,7 @@ a functional mechanism for programatically querying:
   while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
 
   do {
-    *p -= x;
+    break;
   } while (__builtin_amdgcn_processor_is("gfx1010"));
 
   for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
@@ -4836,7 +4832,7 @@ a functional mechanism for programatically querying:
     __builtin_amdgcn_s_ttracedata_imm(1);
 
   do {
-    *p -= x;
+    break;
   } while (
       __builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
 
@@ -4845,55 +4841,57 @@ a functional mechanism for programatically querying:
 
 **Description**:
 
-When used as the predicate value of the following control structures:
+The builtins return a value of type ``__amdgpu_feature_predicate_t``, which is a
+target specific type that behaves as if its C++ definition was the following:
 
 .. code-block:: c++
 
-  if (...)
-  while (...)
-  do { } while (...)
-  for (...)
+  struct __amdgpu_feature_predicate_t {
+    __amdgpu_feature_predicate_t() = delete;
+    __amdgpu_feature_predicate_t(const __amdgpu_feature_predicate_t&) = delete;
+    __amdgpu_feature_predicate_t(__amdgpu_feature_predicate_t&&) = delete;
+
+    explicit
+    operator bool() const noexcept;
+  };
+
+The builtins can be used in C as well, wherein the
+``__amdgpu_feature_predicate_t`` type behaves as an opaque, forward declared
+type with conditional automated conversion to ``_Bool`` when used as the
+predicate argument to a control structure:
+
+.. code-block:: c
+
+  struct __amdgpu_feature_predicate_t ret();     // Error
+  void arg(struct __amdgpu_feature_predicate_t); // Error
+  void local() {
+    struct __amdgpu_feature_predicate_t x;       // Error
+    struct __amdgpu_feature_predicate_t y =
+        __builtin_amdgcn_processor_is("gfx900"); // Error
+  }
+  void valid_use() {
+    _Bool x = (_Bool)__builtin_amdgcn_processor_is("gfx900"); // OK
+    if (__builtin_amdgcn_processor_is("gfx900"))       // Implicit cast to _Bool
+      return;
+    for (; __builtin_amdgcn_processor_is("gfx900");)   // Implicit cast to _Bool
+      break;
+    while (__builtin_amdgcn_processor_is("gfx900"))    // Implicit cast to _Bool
+      break;
+    do {
+      break;
+    } while (__builtin_amdgcn_processor_is("gfx900")); // Implicit cast to _Bool
+
+    __builtin_amdgcn_processor_is("gfx900") ? x : !x;
+  }
 
-be it directly, or as arguments to logical operators such as ``!, ||, &&``, the
-builtins return a boolean value that:
+The boolean interpretation of the predicate values returned by the builtins:
 
 * indicates whether the current target matches the argument; the argument MUST
   be a string literal and a valid AMDGPU target
 * indicates whether the builtin function passed as the argument can be invoked
   by the current target; the argument MUST be either a generic or AMDGPU
   specific builtin name
 
-Outside of these contexts, the builtins have a ``void`` returning signature
-which prevents their misuse.
-
-**Example of invalid use**:
-
-.. code-block:: c++
-
-  void kernel(int* p, int x, bool (*pfn)(bool), const char* str) {
-    if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
-    else if (__builtin_amdgcn_processor_is(str)) __builtin_trap();
-
-    bool a = __builtin_amdgcn_processor_is("gfx906");
-    const bool b = !__builtin_amdgcn_processor_is("gfx906");
-    const bool c = !__builtin_amdgcn_processor_is("gfx906");
-    bool d = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
-    bool e = !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
-    const auto f =
-        !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
-        || __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
-    const auto g =
-        !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
-        || !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
-    __builtin_amdgcn_processor_is("gfx1201")
-      ? __builtin_amdgcn_s_sleep_var(x) : __builtin_amdgcn_s_sleep(42);
-    if (pfn(__builtin_amdgcn_processor_is("gfx1200")))
-      __builtin_amdgcn_s_sleep_var(x);
-
-    if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
-    else if (__builtin_amdgcn_is_invocable(x)) __builtin_trap();
-  }
-
 When invoked while compiling for a concrete target, the builtins are evaluated
 early by Clang, and never produce any CodeGen effects / have no observable
 side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
 
@@ -20,10 +20,18 @@
   AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
 #endif
 
+#ifndef AMDGPU_FEATURE_PREDICATE_TYPE
+#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
+  AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
+#endif
+
 AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
 
 AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
 
+AMDGPU_FEATURE_PREDICATE_TYPE("__amdgpu_feature_predicate_t", AMDGPUFeaturePredicate, AMDGPUFeaturePredicateTy, 1, 1)
+
 #undef AMDGPU_TYPE
 #undef AMDGPU_OPAQUE_PTR_TYPE
 #undef AMDGPU_NAMED_BARRIER_TYPE
+#undef AMDGPU_FEATURE_PREDICATE_TYPE
@@ -34,6 +34,7 @@
 //  Q -> target builtin type, followed by a character to distinguish the builtin type
 //    Qa -> AArch64 svcount_t builtin type.
 //    Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
+//    Qc -> AMDGPU __amdgpu_feature_predicate_t builtin type.
 //  E -> ext_vector, followed by the number of elements and the base type.
 //  X -> _Complex, followed by the base type.
 //  Y -> ptrdiff_t
 
@@ -352,8 +352,8 @@ BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
 // These are special FE only builtins intended for forwarding the requirements
 // to the ME.
-BUILTIN(__builtin_amdgcn_processor_is, "vcC*", "nctu")
-BUILTIN(__builtin_amdgcn_is_invocable, "v", "nctu")
+BUILTIN(__builtin_amdgcn_processor_is, "QccC*", "nctu")
+BUILTIN(__builtin_amdgcn_is_invocable, "Qc", "nctu")
 
 //===----------------------------------------------------------------------===//
 // R600-NI only builtins.
 
@@ -11695,9 +11695,9 @@ def err_omp_inscan_reduction_expected : Error<
 def note_omp_previous_inscan_reduction : Note<
   "'reduction' clause with 'inscan' modifier is used here">;
 def err_omp_multivar_xteam_scan_unsupported : Error<
-  "multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;  
+  "multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;
 def err_omp_xteam_scan_prohibited : Error<
-  "'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;  
+  "'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;
 def err_omp_expected_predefined_allocator : Error<
   "expected one of the predefined allocators for the variables with the static "
   "storage: 'omp_default_mem_alloc', 'omp_large_cap_mem_alloc', "
@@ -12915,8 +12915,19 @@ def err_amdgcn_processor_is_arg_not_literal
 def err_amdgcn_processor_is_arg_invalid_value
     : Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
             "AMDGCN processor identifier; '%0' is not valid">;
+def note_amdgcn_processor_is_valid_options
+    : Note<"valid AMDGCN processor identifiers are: %0">;
 def err_amdgcn_is_invocable_arg_invalid_value
     : Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
             "target agnostic builtin or an AMDGCN target specific builtin; `%0`"
             " is not valid">;
+def err_amdgcn_predicate_type_is_not_constructible
+    : Error<"%0 has type __amdgpu_feature_predicate_t, which is not"
+            " constructible">;
+def err_amdgcn_predicate_type_needs_explicit_bool_cast
+    : Error<"%0 must be explicitly cast to %1; however, please note that this "
+            "is almost always an error and that it prevents the effective "
+            "guarding of target dependent code, and thus should be avoided">;
+def note_amdgcn_protected_by_predicate : Note<"jump enters statement controlled"
+                                              " by AMDGPU feature predicate">;
 } // end of sema component.
@@ -15,12 +15,16 @@
 
 #include "clang/AST/ASTFwd.h"
 #include "clang/Sema/SemaBase.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 namespace clang {
 class AttributeCommonInfo;
+class Expr;
 class ParsedAttr;
 
 class SemaAMDGPU : public SemaBase {
+  llvm::SmallPtrSet<Expr *, 32> ExpandedPredicates;
+
 public:
   SemaAMDGPU(Sema &S);
 
@@ -64,6 +68,11 @@ class SemaAMDGPU : public SemaBase {
   void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL);
   void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL);
   void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL);
+
+  /// Expand a valid use of the feature identification builtins into its
+  /// corresponding sequence of instructions.
+  Expr *ExpandAMDGPUPredicateBI(CallExpr *CE);
+  bool IsPredicate(Expr *E) const;
 };
 } // namespace clang
 
 
@@ -1467,7 +1467,12 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
   }
 
   if (Target.getTriple().isAMDGPU() ||
-      (AuxTarget && AuxTarget->getTriple().isAMDGPU())) {
+      (Target.getTriple().isSPIRV() &&
+       Target.getTriple().getVendor() == llvm::Triple::AMD) ||
+      (AuxTarget &&
+       (AuxTarget->getTriple().isAMDGPU() ||
+        ((AuxTarget->getTriple().isSPIRV() &&
+          AuxTarget->getTriple().getVendor() == llvm::Triple::AMD))))) {
 #define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)                       \
   InitBuiltinType(SingletonId, BuiltinType::Id);
 #include "clang/Basic/AMDGPUTypes.def"
@@ -12313,6 +12318,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       Type = Context.AMDGPUBufferRsrcTy;
       break;
     }
+    case 'c': {
+      Type = Context.AMDGPUFeaturePredicateTy;
+      break;
+    }
     default:
       llvm_unreachable("Unexpected target builtin type");
     }
 
@@ -146,3 +146,8 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
 bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
   return AMDGPUTI.isValidCPUName(CPU);
 }
+
+void SPIRV64AMDGCNTargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  return AMDGPUTI.fillValidCPUList(Values);
+}
@@ -426,6 +426,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
   // This is only needed for validating arguments passed to
   // __builtin_amdgcn_processor_is
   bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 };
 
 } // namespace targets
 
@@ -19823,7 +19823,7 @@ static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
   P->setExternallyInitialized(true);
 
   return CGF.Builder.CreateLoad(
-      RawAddress(P, PTy, CharUnits::One(), KnownNonNull));
+      RawAddress(P, PTy, CharUnits::One(), KnownNonNull), true);
 }
 
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Original file line number	Diff line number	Diff line change
`@@ -146,3 +146,8 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {`
`146`	`146`	`bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {`
`147`	`147`	`return AMDGPUTI.isValidCPUName(CPU);`
`148`	`148`	`}`
	`149`	`+`
	`150`	`+void SPIRV64AMDGCNTargetInfo::fillValidCPUList(`
	`151`	`+ SmallVectorImpl<StringRef> &Values) const {`
	`152`	`+ return AMDGPUTI.fillValidCPUList(Values);`
	`153`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -19823,7 +19823,7 @@ static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {`
`19823`	`19823`	`P->setExternallyInitialized(true);`
`19824`	`19824`
`19825`	`19825`	`return CGF.Builder.CreateLoad(`
`19826`		`- RawAddress(P, PTy, CharUnits::One(), KnownNonNull));`
	`19826`	`+ RawAddress(P, PTy, CharUnits::One(), KnownNonNull), true);`
`19827`	`19827`	`}`
`19828`	`19828`
`19829`	`19829`	`Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,`