@@ -4788,6 +4788,120 @@ If no address spaces names are provided, all address spaces are fenced.
47884788 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local")
47894789 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local", "global")
47904790
4791+ __builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4792+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4793+
4794+ ``__builtin_amdgcn_processor_is `` and ``__builtin_amdgcn_is_invocable `` provide
4795+ a functional mechanism for programatically querying:
4796+
4797+ * the identity of the current target processor;
4798+ * the capability of the current target processor to invoke a particular builtin.
4799+
4800+ **Syntax **:
4801+
4802+ .. code-block :: c
4803+
4804+ // When used as the predicate for a control structure
4805+ bool __builtin_amdgcn_processor_is(const char*);
4806+ bool __builtin_amdgcn_is_invocable(builtin_name);
4807+ // Otherwise
4808+ void __builtin_amdgcn_processor_is(const char*);
4809+ void __builtin_amdgcn_is_invocable(void);
4810+
4811+ **Example of use **:
4812+
4813+ .. code-block :: c++
4814+
4815+ if (__builtin_amdgcn_processor_is ("gfx1201") ||
4816+ __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var))
4817+ __builtin_amdgcn_s_sleep_var (x);
4818+
4819+ if (!__builtin_amdgcn_processor_is ("gfx906"))
4820+ __builtin_amdgcn_s_wait_event_export_ready ();
4821+ else if (__builtin_amdgcn_processor_is ("gfx1010") ||
4822+ __builtin_amdgcn_processor_is ("gfx1101"))
4823+ __builtin_amdgcn_s_ttracedata_imm (1);
4824+
4825+ while (__builtin_amdgcn_processor_is ("gfx1101")) *p += x;
4826+
4827+ do {
4828+ *p -= x;
4829+ } while (__builtin_amdgcn_processor_is("gfx1010"));
4830+
4831+ for (; __builtin_amdgcn_processor_is ("gfx1201"); ++*p) break;
4832+
4833+ if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready))
4834+ __builtin_amdgcn_s_wait_event_export_ready ();
4835+ else if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_ttracedata_imm))
4836+ __builtin_amdgcn_s_ttracedata_imm (1);
4837+
4838+ do {
4839+ *p -= x;
4840+ } while (
4841+ __builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4842+
4843+ for (; __builtin_amdgcn_is_invocable (__builtin_amdgcn_permlane64); ++*p)
4844+ break;
4845+
4846+ **Description **:
4847+
4848+ When used as the predicate value of the following control structures:
4849+
4850+ .. code-block :: c++
4851+
4852+ if (...)
4853+ while (...)
4854+ do { } while (...)
4855+ for (...)
4856+
4857+ be it directly, or as arguments to logical operators such as ``!, ||, && ``, the
4858+ builtins return a boolean value that:
4859+
4860+ * indicates whether the current target matches the argument; the argument MUST
4861+ be a string literal and a valid AMDGPU target
4862+ * indicates whether the builtin function passed as the argument can be invoked
4863+ by the current target; the argument MUST be either a generic or AMDGPU
4864+ specific builtin name
4865+
4866+ Outside of these contexts, the builtins have a ``void `` returning signature
4867+ which prevents their misuse.
4868+
4869+ **Example of invalid use **:
4870+
4871+ .. code-block :: c++
4872+
4873+ void kernel (int* p, int x, bool (*pfn)(bool), const char * str) {
4874+ if (__builtin_amdgcn_processor_is ("not_an_amdgcn_gfx_id")) return;
4875+ else if (__builtin_amdgcn_processor_is (str)) __builtin_trap ();
4876+
4877+ bool a = __builtin_amdgcn_processor_is ("gfx906");
4878+ const bool b = !__builtin_amdgcn_processor_is ("gfx906");
4879+ const bool c = !__builtin_amdgcn_processor_is ("gfx906");
4880+ bool d = __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4881+ bool e = !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4882+ const auto f =
4883+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
4884+ || __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4885+ const auto g =
4886+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
4887+ || !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4888+ __builtin_amdgcn_processor_is ("gfx1201")
4889+ ? __builtin_amdgcn_s_sleep_var (x) : __builtin_amdgcn_s_sleep (42);
4890+ if (pfn (__builtin_amdgcn_processor_is ("gfx1200")))
4891+ __builtin_amdgcn_s_sleep_var (x);
4892+
4893+ if (__builtin_amdgcn_is_invocable ("__builtin_amdgcn_s_sleep_var")) return;
4894+ else if (__builtin_amdgcn_is_invocable (x)) __builtin_trap ();
4895+ }
4896+
4897+ When invoked while compiling for a concrete target, the builtins are evaluated
4898+ early by Clang, and never produce any CodeGen effects / have no observable
4899+ side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
4900+ which is an abstract target, a series of predicate values are implicitly
4901+ created. These predicates get resolved when finalizing the compilation process
4902+ for a concrete target, and shall reflect the latter's identity and features.
4903+ Thus, it is possible to author high-level code, in e.g . HIP, that is target
4904+ adaptive in a dynamic fashion, contrary to macro based mechanisms.
47914905
47924906ARM/AArch64 Language Extensions
47934907-------------------------------
0 commit comments