@@ -4920,6 +4920,120 @@ If no address spaces names are provided, all address spaces are fenced.
4920
4920
__builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local")
4921
4921
__builtin_amdgcn_fence (__ATOMIC_SEQ_CST, "workgroup", "local", "global")
4922
4922
4923
+ __builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4924
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4925
+
4926
+ ``__builtin_amdgcn_processor_is `` and ``__builtin_amdgcn_is_invocable `` provide
4927
+ a functional mechanism for programatically querying:
4928
+
4929
+ * the identity of the current target processor;
4930
+ * the capability of the current target processor to invoke a particular builtin.
4931
+
4932
+ **Syntax **:
4933
+
4934
+ .. code-block :: c
4935
+
4936
+ // When used as the predicate for a control structure
4937
+ bool __builtin_amdgcn_processor_is(const char*);
4938
+ bool __builtin_amdgcn_is_invocable(builtin_name);
4939
+ // Otherwise
4940
+ void __builtin_amdgcn_processor_is(const char*);
4941
+ void __builtin_amdgcn_is_invocable(void);
4942
+
4943
+ **Example of use **:
4944
+
4945
+ .. code-block :: c++
4946
+
4947
+ if (__builtin_amdgcn_processor_is ("gfx1201") ||
4948
+ __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var))
4949
+ __builtin_amdgcn_s_sleep_var (x);
4950
+
4951
+ if (!__builtin_amdgcn_processor_is ("gfx906"))
4952
+ __builtin_amdgcn_s_wait_event_export_ready ();
4953
+ else if (__builtin_amdgcn_processor_is ("gfx1010") ||
4954
+ __builtin_amdgcn_processor_is ("gfx1101"))
4955
+ __builtin_amdgcn_s_ttracedata_imm (1);
4956
+
4957
+ while (__builtin_amdgcn_processor_is ("gfx1101")) *p += x;
4958
+
4959
+ do {
4960
+ *p -= x;
4961
+ } while (__builtin_amdgcn_processor_is("gfx1010"));
4962
+
4963
+ for (; __builtin_amdgcn_processor_is ("gfx1201"); ++*p) break;
4964
+
4965
+ if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready))
4966
+ __builtin_amdgcn_s_wait_event_export_ready ();
4967
+ else if (__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_ttracedata_imm))
4968
+ __builtin_amdgcn_s_ttracedata_imm (1);
4969
+
4970
+ do {
4971
+ *p -= x;
4972
+ } while (
4973
+ __builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4974
+
4975
+ for (; __builtin_amdgcn_is_invocable (__builtin_amdgcn_permlane64); ++*p)
4976
+ break;
4977
+
4978
+ **Description **:
4979
+
4980
+ When used as the predicate value of the following control structures:
4981
+
4982
+ .. code-block :: c++
4983
+
4984
+ if (...)
4985
+ while (...)
4986
+ do { } while (...)
4987
+ for (...)
4988
+
4989
+ be it directly, or as arguments to logical operators such as ``!, ||, && ``, the
4990
+ builtins return a boolean value that:
4991
+
4992
+ * indicates whether the current target matches the argument; the argument MUST
4993
+ be a string literal and a valid AMDGPU target
4994
+ * indicates whether the builtin function passed as the argument can be invoked
4995
+ by the current target; the argument MUST be either a generic or AMDGPU
4996
+ specific builtin name
4997
+
4998
+ Outside of these contexts, the builtins have a ``void `` returning signature
4999
+ which prevents their misuse.
5000
+
5001
+ **Example of invalid use **:
5002
+
5003
+ .. code-block :: c++
5004
+
5005
+ void kernel (int* p, int x, bool (*pfn)(bool), const char * str) {
5006
+ if (__builtin_amdgcn_processor_is ("not_an_amdgcn_gfx_id")) return;
5007
+ else if (__builtin_amdgcn_processor_is (str)) __builtin_trap ();
5008
+
5009
+ bool a = __builtin_amdgcn_processor_is ("gfx906");
5010
+ const bool b = !__builtin_amdgcn_processor_is ("gfx906");
5011
+ const bool c = !__builtin_amdgcn_processor_is ("gfx906");
5012
+ bool d = __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5013
+ bool e = !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5014
+ const auto f =
5015
+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
5016
+ || __builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5017
+ const auto g =
5018
+ !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_wait_event_export_ready)
5019
+ || !__builtin_amdgcn_is_invocable (__builtin_amdgcn_s_sleep_var);
5020
+ __builtin_amdgcn_processor_is ("gfx1201")
5021
+ ? __builtin_amdgcn_s_sleep_var (x) : __builtin_amdgcn_s_sleep (42);
5022
+ if (pfn (__builtin_amdgcn_processor_is ("gfx1200")))
5023
+ __builtin_amdgcn_s_sleep_var (x);
5024
+
5025
+ if (__builtin_amdgcn_is_invocable ("__builtin_amdgcn_s_sleep_var")) return;
5026
+ else if (__builtin_amdgcn_is_invocable (x)) __builtin_trap ();
5027
+ }
5028
+
5029
+ When invoked while compiling for a concrete target, the builtins are evaluated
5030
+ early by Clang, and never produce any CodeGen effects / have no observable
5031
+ side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
5032
+ which is an abstract target, a series of predicate values are implicitly
5033
+ created. These predicates get resolved when finalizing the compilation process
5034
+ for a concrete target, and shall reflect the latter's identity and features.
5035
+ Thus, it is possible to author high-level code, in e.g . HIP, that is target
5036
+ adaptive in a dynamic fashion, contrary to macro based mechanisms.
4923
5037
4924
5038
ARM/AArch64 Language Extensions
4925
5039
-------------------------------
0 commit comments