@@ -4920,6 +4920,120 @@ If no address spaces names are provided, all address spaces are fenced.
49204920 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local")
49214921 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local", "global")
49224922
4923+ __builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4924+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4925+
4926+ ``__builtin_amdgcn_processor_is `` and ``__builtin_amdgcn_is_invocable `` provide
4927+ a functional mechanism for programatically querying:
4928+
4929+ * the identity of the current target processor;
4930+ * the capability of the current target processor to invoke a particular builtin.
4931+
4932+ **Syntax **:
4933+
4934+ .. code-block :: c
4935+
4936+ // When used as the predicate for a control structure
4937+ bool __builtin_amdgcn_processor_is(const char*);
4938+ bool __builtin_amdgcn_is_invocable(builtin_name);
4939+ // Otherwise
4940+ void __builtin_amdgcn_processor_is(const char*);
4941+ void __builtin_amdgcn_is_invocable(void);
4942+
4943+ **Example of use **:
4944+
4945+ .. code-block :: c++
4946+
4947+ if (__builtin_amdgcn_processor_is ("gfx1201") ||
4948+ __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var))
4949+ __builtin_amdgcn_s_sleep_var (x);
4950+
4951+ if (!__builtin_amdgcn_processor_is ("gfx906"))
4952+ __builtin_amdgcn_s_wait_event_export_ready ();
4953+ else if (__builtin_amdgcn_processor_is ("gfx1010") ||
4954+ __builtin_amdgcn_processor_is ("gfx1101"))
4955+ __builtin_amdgcn_s_ttracedata_imm (1);
4956+
4957+ while (__builtin_amdgcn_processor_is ("gfx1101")) *p += x;
4958+
4959+ do {
4960+ *p -= x;
4961+ } while (__builtin_amdgcn_processor_is("gfx1010"));
4962+
4963+ for (; __builtin_amdgcn_processor_is ("gfx1201"); ++*p) break;
4964+
4965+ if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready))
4966+ __builtin_amdgcn_s_wait_event_export_ready ();
4967+ else if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_ttracedata_imm))
4968+ __builtin_amdgcn_s_ttracedata_imm (1);
4969+
4970+ do {
4971+ *p -= x;
4972+ } while (
4973+ __builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4974+
4975+ for (; __builtin_amdgcn_is_invocable (__builtin_amdgcn_permlane64); ++*p)
4976+ break;
4977+
4978+ **Description **:
4979+
4980+ When used as the predicate value of the following control structures:
4981+
4982+ .. code-block :: c++
4983+
4984+ if (...)
4985+ while (...)
4986+ do { } while (...)
4987+ for (...)
4988+
4989+ be it directly, or as arguments to logical operators such as ``!, ||, && ``, the
4990+ builtins return a boolean value that:
4991+
4992+ * indicates whether the current target matches the argument; the argument MUST
4993+ be a string literal and a valid AMDGPU target
4994+ * indicates whether the builtin function passed as the argument can be invoked
4995+ by the current target; the argument MUST be either a generic or AMDGPU
4996+ specific builtin name
4997+
4998+ Outside of these contexts, the builtins have a ``void `` returning signature
4999+ which prevents their misuse.
5000+
5001+ **Example of invalid use **:
5002+
5003+ .. code-block :: c++
5004+
5005+ void kernel (int* p, int x, bool (*pfn)(bool), const char * str) {
5006+ if (__builtin_amdgcn_processor_is ("not_an_amdgcn_gfx_id")) return;
5007+ else if (__builtin_amdgcn_processor_is (str)) __builtin_trap ();
5008+
5009+ bool a = __builtin_amdgcn_processor_is ("gfx906");
5010+ const bool b = !__builtin_amdgcn_processor_is ("gfx906");
5011+ const bool c = !__builtin_amdgcn_processor_is ("gfx906");
5012+ bool d = __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5013+ bool e = !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5014+ const auto f =
5015+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
5016+ || __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5017+ const auto g =
5018+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
5019+ || !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5020+ __builtin_amdgcn_processor_is ("gfx1201")
5021+ ? __builtin_amdgcn_s_sleep_var (x) : __builtin_amdgcn_s_sleep (42);
5022+ if (pfn (__builtin_amdgcn_processor_is ("gfx1200")))
5023+ __builtin_amdgcn_s_sleep_var (x);
5024+
5025+ if (__builtin_amdgcn_is_invocable ("__builtin_amdgcn_s_sleep_var")) return;
5026+ else if (__builtin_amdgcn_is_invocable (x)) __builtin_trap ();
5027+ }
5028+
5029+ When invoked while compiling for a concrete target, the builtins are evaluated
5030+ early by Clang, and never produce any CodeGen effects / have no observable
5031+ side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
5032+ which is an abstract target, a series of predicate values are implicitly
5033+ created. These predicates get resolved when finalizing the compilation process
5034+ for a concrete target, and shall reflect the latter's identity and features.
5035+ Thus, it is possible to author high-level code, in e.g . HIP, that is target
5036+ adaptive in a dynamic fashion, contrary to macro based mechanisms.
49235037
49245038ARM/AArch64 Language Extensions
49255039-------------------------------
0 commit comments