@@ -4536,6 +4536,120 @@ If no address spaces names are provided, all address spaces are fenced.
45364536 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local")
45374537 __builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local", "global")
45384538
4539+ __builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4540+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4541+
4542+ ``__builtin_amdgcn_processor_is `` and ``__builtin_amdgcn_is_invocable `` provide
4543+ a functional mechanism for programatically querying:
4544+
4545+ * the identity of the current target processor;
4546+ * the capability of the current target processor to invoke a particular builtin.
4547+
4548+ **Syntax **:
4549+
4550+ .. code-block :: c
4551+
4552+ // When used as the predicate for a control structure
4553+ bool __builtin_amdgcn_processor_is(const char*);
4554+ bool __builtin_amdgcn_is_invocable(builtin_name);
4555+ // Otherwise
4556+ void __builtin_amdgcn_processor_is(const char*);
4557+ void __builtin_amdgcn_is_invocable(void);
4558+
4559+ **Example of use **:
4560+
4561+ .. code-block :: c++
4562+
4563+ if (__builtin_amdgcn_processor_is ("gfx1201") ||
4564+ __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var))
4565+ __builtin_amdgcn_s_sleep_var (x);
4566+
4567+ if (!__builtin_amdgcn_processor_is ("gfx906"))
4568+ __builtin_amdgcn_s_wait_event_export_ready ();
4569+ else if (__builtin_amdgcn_processor_is ("gfx1010") ||
4570+ __builtin_amdgcn_processor_is ("gfx1101"))
4571+ __builtin_amdgcn_s_ttracedata_imm (1);
4572+
4573+ while (__builtin_amdgcn_processor_is ("gfx1101")) *p += x;
4574+
4575+ do {
4576+ *p -= x;
4577+ } while (__builtin_amdgcn_processor_is("gfx1010"));
4578+
4579+ for (; __builtin_amdgcn_processor_is ("gfx1201"); ++*p) break;
4580+
4581+ if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready))
4582+ __builtin_amdgcn_s_wait_event_export_ready ();
4583+ else if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_ttracedata_imm))
4584+ __builtin_amdgcn_s_ttracedata_imm (1);
4585+
4586+ do {
4587+ *p -= x;
4588+ } while (
4589+ __builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4590+
4591+ for (; __builtin_amdgcn_is_invocable (__builtin_amdgcn_permlane64); ++*p)
4592+ break;
4593+
4594+ **Description **:
4595+
4596+ When used as the predicate value of the following control structures:
4597+
4598+ .. code-block :: c++
4599+
4600+ if (...)
4601+ while (...)
4602+ do { } while (...)
4603+ for (...)
4604+
4605+ be it directly, or as arguments to logical operators such as ``!, ||, && ``, the
4606+ builtins return a boolean value that:
4607+
4608+ * indicates whether the current target matches the argument; the argument MUST
4609+ be a string literal and a valid AMDGPU target
4610+ * indicates whether the builtin function passed as the argument can be invoked
4611+ by the current target; the argument MUST be either a generic or AMDGPU
4612+ specific builtin name
4613+
4614+ Outside of these contexts, the builtins have a ``void `` returning signature
4615+ which prevents their misuse.
4616+
4617+ **Example of invalid use **:
4618+
4619+ .. code-block :: c++
4620+
4621+ void kernel (int* p, int x, bool (*pfn)(bool), const char * str) {
4622+ if (__builtin_amdgcn_processor_is ("not_an_amdgcn_gfx_id")) return;
4623+ else if (__builtin_amdgcn_processor_is (str)) __builtin_trap ();
4624+
4625+ bool a = __builtin_amdgcn_processor_is ("gfx906");
4626+ const bool b = !__builtin_amdgcn_processor_is ("gfx906");
4627+ const bool c = !__builtin_amdgcn_processor_is ("gfx906");
4628+ bool d = __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4629+ bool e = !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4630+ const auto f =
4631+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
4632+ || __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4633+ const auto g =
4634+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
4635+ || !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
4636+ __builtin_amdgcn_processor_is ("gfx1201")
4637+ ? __builtin_amdgcn_s_sleep_var (x) : __builtin_amdgcn_s_sleep (42);
4638+ if (pfn (__builtin_amdgcn_processor_is ("gfx1200")))
4639+ __builtin_amdgcn_s_sleep_var (x);
4640+
4641+ if (__builtin_amdgcn_is_invocable ("__builtin_amdgcn_s_sleep_var")) return;
4642+ else if (__builtin_amdgcn_is_invocable (x)) __builtin_trap ();
4643+ }
4644+
4645+ When invoked while compiling for a concrete target, the builtins are evaluated
4646+ early by Clang, and never produce any CodeGen effects / have no observable
4647+ side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
4648+ which is an abstract target, a series of predicate values are implicitly
4649+ created. These predicates get resolved when finalizing the compilation process
4650+ for a concrete target, and shall reflect the latter's identity and features.
4651+ Thus, it is possible to author high-level code, in e.g . HIP, that is target
4652+ adaptive in a dynamic fashion, contrary to macro based mechanisms.
45394653
45404654ARM/AArch64 Language Extensions
45414655-------------------------------
0 commit comments