Skip to content

Commit 8ea525f

Browse files
authored
[Clang][LLVM] Port ZCFS from upstream (llvm#1567)
2 parents 7e7bcb9 + e04edcc commit 8ea525f

20 files changed

+1051
-3
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4920,6 +4920,120 @@ If no address spaces names are provided, all address spaces are fenced.
49204920
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
49214921
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
49224922
4923+
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4924+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4925+
4926+
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide
4927+
a functional mechanism for programatically querying:
4928+
4929+
* the identity of the current target processor;
4930+
* the capability of the current target processor to invoke a particular builtin.
4931+
4932+
**Syntax**:
4933+
4934+
.. code-block:: c
4935+
4936+
// When used as the predicate for a control structure
4937+
bool __builtin_amdgcn_processor_is(const char*);
4938+
bool __builtin_amdgcn_is_invocable(builtin_name);
4939+
// Otherwise
4940+
void __builtin_amdgcn_processor_is(const char*);
4941+
void __builtin_amdgcn_is_invocable(void);
4942+
4943+
**Example of use**:
4944+
4945+
.. code-block:: c++
4946+
4947+
if (__builtin_amdgcn_processor_is("gfx1201") ||
4948+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
4949+
__builtin_amdgcn_s_sleep_var(x);
4950+
4951+
if (!__builtin_amdgcn_processor_is("gfx906"))
4952+
__builtin_amdgcn_s_wait_event_export_ready();
4953+
else if (__builtin_amdgcn_processor_is("gfx1010") ||
4954+
__builtin_amdgcn_processor_is("gfx1101"))
4955+
__builtin_amdgcn_s_ttracedata_imm(1);
4956+
4957+
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
4958+
4959+
do {
4960+
*p -= x;
4961+
} while (__builtin_amdgcn_processor_is("gfx1010"));
4962+
4963+
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
4964+
4965+
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
4966+
__builtin_amdgcn_s_wait_event_export_ready();
4967+
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm))
4968+
__builtin_amdgcn_s_ttracedata_imm(1);
4969+
4970+
do {
4971+
*p -= x;
4972+
} while (
4973+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4974+
4975+
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p)
4976+
break;
4977+
4978+
**Description**:
4979+
4980+
When used as the predicate value of the following control structures:
4981+
4982+
.. code-block:: c++
4983+
4984+
if (...)
4985+
while (...)
4986+
do { } while (...)
4987+
for (...)
4988+
4989+
be it directly, or as arguments to logical operators such as ``!, ||, &&``, the
4990+
builtins return a boolean value that:
4991+
4992+
* indicates whether the current target matches the argument; the argument MUST
4993+
be a string literal and a valid AMDGPU target
4994+
* indicates whether the builtin function passed as the argument can be invoked
4995+
by the current target; the argument MUST be either a generic or AMDGPU
4996+
specific builtin name
4997+
4998+
Outside of these contexts, the builtins have a ``void`` returning signature
4999+
which prevents their misuse.
5000+
5001+
**Example of invalid use**:
5002+
5003+
.. code-block:: c++
5004+
5005+
void kernel(int* p, int x, bool (*pfn)(bool), const char* str) {
5006+
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
5007+
else if (__builtin_amdgcn_processor_is(str)) __builtin_trap();
5008+
5009+
bool a = __builtin_amdgcn_processor_is("gfx906");
5010+
const bool b = !__builtin_amdgcn_processor_is("gfx906");
5011+
const bool c = !__builtin_amdgcn_processor_is("gfx906");
5012+
bool d = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
5013+
bool e = !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
5014+
const auto f =
5015+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
5016+
|| __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
5017+
const auto g =
5018+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
5019+
|| !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
5020+
__builtin_amdgcn_processor_is("gfx1201")
5021+
? __builtin_amdgcn_s_sleep_var(x) : __builtin_amdgcn_s_sleep(42);
5022+
if (pfn(__builtin_amdgcn_processor_is("gfx1200")))
5023+
__builtin_amdgcn_s_sleep_var(x);
5024+
5025+
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
5026+
else if (__builtin_amdgcn_is_invocable(x)) __builtin_trap();
5027+
}
5028+
5029+
When invoked while compiling for a concrete target, the builtins are evaluated
5030+
early by Clang, and never produce any CodeGen effects / have no observable
5031+
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
5032+
which is an abstract target, a series of predicate values are implicitly
5033+
created. These predicates get resolved when finalizing the compilation process
5034+
for a concrete target, and shall reflect the latter's identity and features.
5035+
Thus, it is possible to author high-level code, in e.g. HIP, that is target
5036+
adaptive in a dynamic fashion, contrary to macro based mechanisms.
49235037
49245038
ARM/AArch64 Language Extensions
49255039
-------------------------------

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,10 @@ AMDGPU Support
473473
^^^^^^^^^^^^^^
474474

475475
- Bump the default code object version to 6. ROCm 6.3 is required to run any program compiled with COV6.
476+
- Introduced a new target specific builtin ``__builtin_amdgcn_processor_is``,
477+
a late / deferred query for the current target processor
478+
- Introduced a new target specific builtin ``__builtin_amdgcn_is_invocable``,
479+
which enables fine-grained, per-builtin, feature availability
476480

477481
NVPTX Support
478482
^^^^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,11 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
349349
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
350350
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
351351

352+
// These are special FE only builtins intended for forwarding the requirements
353+
// to the ME.
354+
BUILTIN(__builtin_amdgcn_processor_is, "vcC*", "nctu")
355+
BUILTIN(__builtin_amdgcn_is_invocable, "v", "nctu")
356+
352357
//===----------------------------------------------------------------------===//
353358
// R600-NI only builtins.
354359
//===----------------------------------------------------------------------===//

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11749,9 +11749,9 @@ def err_omp_inscan_reduction_expected : Error<
1174911749
def note_omp_previous_inscan_reduction : Note<
1175011750
"'reduction' clause with 'inscan' modifier is used here">;
1175111751
def err_omp_multivar_xteam_scan_unsupported : Error<
11752-
"multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;
11752+
"multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;
1175311753
def err_omp_xteam_scan_prohibited : Error<
11754-
"'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;
11754+
"'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;
1175511755
def err_omp_expected_predefined_allocator : Error<
1175611756
"expected one of the predefined allocators for the variables with the static "
1175711757
"storage: 'omp_default_mem_alloc', 'omp_large_cap_mem_alloc', "
@@ -13069,4 +13069,14 @@ def err_acc_invalid_modifier
1306913069
// AMDGCN builtins diagnostics
1307013070
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
1307113071
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
13072+
def err_amdgcn_processor_is_arg_not_literal
13073+
: Error<"the argument to __builtin_amdgcn_processor_is must be a string "
13074+
"literal">;
13075+
def err_amdgcn_processor_is_arg_invalid_value
13076+
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
13077+
"AMDGCN processor identifier; '%0' is not valid">;
13078+
def err_amdgcn_is_invocable_arg_invalid_value
13079+
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
13080+
"target agnostic builtin or an AMDGCN target specific builtin; `%0`"
13081+
" is not valid">;
1307213082
} // end of sema component.

clang/lib/Basic/Targets/SPIR.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,7 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
152152
Float128Format = DoubleFormat;
153153
}
154154
}
155+
156+
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
157+
return AMDGPUTI.isValidCPUName(CPU);
158+
}

clang/lib/Basic/Targets/SPIR.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
432432
}
433433

434434
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
435+
436+
// This is only needed for validating arguments passed to
437+
// __builtin_amdgcn_processor_is
438+
bool isValidCPUName(StringRef Name) const override;
435439
};
436440

437441
} // namespace targets

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,18 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
284284
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
285285
}
286286

287+
static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
288+
auto PTy = IntegerType::getInt1Ty(CGF.getLLVMContext());
289+
290+
auto *P = cast<GlobalVariable>(
291+
CGF.CGM.getModule().getOrInsertGlobal(Name.str(), PTy));
292+
P->setConstant(true);
293+
P->setExternallyInitialized(true);
294+
295+
return CGF.Builder.CreateLoad(
296+
RawAddress(P, PTy, CharUnits::One(), KnownNonNull));
297+
}
298+
287299
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
288300
const CallExpr *E) {
289301
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -585,6 +597,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
585597
llvm::Value *Env = EmitScalarExpr(E->getArg(0));
586598
return Builder.CreateCall(F, {Env});
587599
}
600+
case AMDGPU::BI__builtin_amdgcn_processor_is: {
601+
assert(CGM.getTriple().isSPIRV() &&
602+
"__builtin_amdgcn_processor_is should never reach CodeGen for "
603+
"concrete targets!");
604+
StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
605+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.is." + Proc);
606+
}
607+
case AMDGPU::BI__builtin_amdgcn_is_invocable: {
608+
assert(CGM.getTriple().isSPIRV() &&
609+
"__builtin_amdgcn_is_invocable should never reach CodeGen for "
610+
"concrete targets!");
611+
auto *FD = cast<FunctionDecl>(
612+
cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
613+
StringRef RF =
614+
getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
615+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.has." + RF);
616+
}
588617
case AMDGPU::BI__builtin_amdgcn_read_exec:
589618
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
590619
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:

0 commit comments

Comments
 (0)