Skip to content

Commit 8c1e1a0

Browse files
author
Salinas, David
authored
[Clang][LLVM] Port ZCFS from staging (llvm#1617) (llvm#1901)
2 parents a097554 + 0cb9ca8 commit 8c1e1a0

20 files changed

+1050
-2
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,120 @@ If no address spaces names are provided, all address spaces are fenced.
47884788
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
47894789
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
47904790
4791+
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4792+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4793+
4794+
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide
4795+
a functional mechanism for programatically querying:
4796+
4797+
* the identity of the current target processor;
4798+
* the capability of the current target processor to invoke a particular builtin.
4799+
4800+
**Syntax**:
4801+
4802+
.. code-block:: c
4803+
4804+
// When used as the predicate for a control structure
4805+
bool __builtin_amdgcn_processor_is(const char*);
4806+
bool __builtin_amdgcn_is_invocable(builtin_name);
4807+
// Otherwise
4808+
void __builtin_amdgcn_processor_is(const char*);
4809+
void __builtin_amdgcn_is_invocable(void);
4810+
4811+
**Example of use**:
4812+
4813+
.. code-block:: c++
4814+
4815+
if (__builtin_amdgcn_processor_is("gfx1201") ||
4816+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
4817+
__builtin_amdgcn_s_sleep_var(x);
4818+
4819+
if (!__builtin_amdgcn_processor_is("gfx906"))
4820+
__builtin_amdgcn_s_wait_event_export_ready();
4821+
else if (__builtin_amdgcn_processor_is("gfx1010") ||
4822+
__builtin_amdgcn_processor_is("gfx1101"))
4823+
__builtin_amdgcn_s_ttracedata_imm(1);
4824+
4825+
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
4826+
4827+
do {
4828+
*p -= x;
4829+
} while (__builtin_amdgcn_processor_is("gfx1010"));
4830+
4831+
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
4832+
4833+
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
4834+
__builtin_amdgcn_s_wait_event_export_ready();
4835+
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm))
4836+
__builtin_amdgcn_s_ttracedata_imm(1);
4837+
4838+
do {
4839+
*p -= x;
4840+
} while (
4841+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4842+
4843+
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p)
4844+
break;
4845+
4846+
**Description**:
4847+
4848+
When used as the predicate value of the following control structures:
4849+
4850+
.. code-block:: c++
4851+
4852+
if (...)
4853+
while (...)
4854+
do { } while (...)
4855+
for (...)
4856+
4857+
be it directly, or as arguments to logical operators such as ``!, ||, &&``, the
4858+
builtins return a boolean value that:
4859+
4860+
* indicates whether the current target matches the argument; the argument MUST
4861+
be a string literal and a valid AMDGPU target
4862+
* indicates whether the builtin function passed as the argument can be invoked
4863+
by the current target; the argument MUST be either a generic or AMDGPU
4864+
specific builtin name
4865+
4866+
Outside of these contexts, the builtins have a ``void`` returning signature
4867+
which prevents their misuse.
4868+
4869+
**Example of invalid use**:
4870+
4871+
.. code-block:: c++
4872+
4873+
void kernel(int* p, int x, bool (*pfn)(bool), const char* str) {
4874+
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
4875+
else if (__builtin_amdgcn_processor_is(str)) __builtin_trap();
4876+
4877+
bool a = __builtin_amdgcn_processor_is("gfx906");
4878+
const bool b = !__builtin_amdgcn_processor_is("gfx906");
4879+
const bool c = !__builtin_amdgcn_processor_is("gfx906");
4880+
bool d = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4881+
bool e = !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4882+
const auto f =
4883+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4884+
|| __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4885+
const auto g =
4886+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4887+
|| !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4888+
__builtin_amdgcn_processor_is("gfx1201")
4889+
? __builtin_amdgcn_s_sleep_var(x) : __builtin_amdgcn_s_sleep(42);
4890+
if (pfn(__builtin_amdgcn_processor_is("gfx1200")))
4891+
__builtin_amdgcn_s_sleep_var(x);
4892+
4893+
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
4894+
else if (__builtin_amdgcn_is_invocable(x)) __builtin_trap();
4895+
}
4896+
4897+
When invoked while compiling for a concrete target, the builtins are evaluated
4898+
early by Clang, and never produce any CodeGen effects / have no observable
4899+
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
4900+
which is an abstract target, a series of predicate values are implicitly
4901+
created. These predicates get resolved when finalizing the compilation process
4902+
for a concrete target, and shall reflect the latter's identity and features.
4903+
Thus, it is possible to author high-level code, in e.g. HIP, that is target
4904+
adaptive in a dynamic fashion, contrary to macro based mechanisms.
47914905
47924906
ARM/AArch64 Language Extensions
47934907
-------------------------------

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,12 @@ AMDGPU Support
11381138
definitions for GPU builtin functions. This header can be included for OpenMP,
11391139
CUDA, HIP, OpenCL, and C/C++.
11401140

1141+
- Bump the default code object version to 6.
1142+
- Introduced a new target specific builtin ``__builtin_amdgcn_processor_is``,
1143+
a late / deferred query for the current target processor
1144+
- Introduced a new target specific builtin ``__builtin_amdgcn_is_invocable``,
1145+
which enables fine-grained, per-builtin, feature availability
1146+
11411147
NVPTX Support
11421148
^^^^^^^^^^^^^^
11431149

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,11 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
349349
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
350350
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
351351

352+
// These are special FE only builtins intended for forwarding the requirements
353+
// to the ME.
354+
BUILTIN(__builtin_amdgcn_processor_is, "vcC*", "nctu")
355+
BUILTIN(__builtin_amdgcn_is_invocable, "v", "nctu")
356+
352357
//===----------------------------------------------------------------------===//
353358
// R600-NI only builtins.
354359
//===----------------------------------------------------------------------===//

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12909,4 +12909,14 @@ def err_acc_update_as_body
1290912909
// AMDGCN builtins diagnostics
1291012910
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
1291112911
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
12912+
def err_amdgcn_processor_is_arg_not_literal
12913+
: Error<"the argument to __builtin_amdgcn_processor_is must be a string "
12914+
"literal">;
12915+
def err_amdgcn_processor_is_arg_invalid_value
12916+
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
12917+
"AMDGCN processor identifier; '%0' is not valid">;
12918+
def err_amdgcn_is_invocable_arg_invalid_value
12919+
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
12920+
"target agnostic builtin or an AMDGCN target specific builtin; `%0`"
12921+
" is not valid">;
1291212922
} // end of sema component.

clang/lib/Basic/Targets/SPIR.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,7 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
142142
Float128Format = DoubleFormat;
143143
}
144144
}
145+
146+
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
147+
return AMDGPUTI.isValidCPUName(CPU);
148+
}

clang/lib/Basic/Targets/SPIR.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
422422
}
423423

424424
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
425+
426+
// This is only needed for validating arguments passed to
427+
// __builtin_amdgcn_processor_is
428+
bool isValidCPUName(StringRef Name) const override;
425429
};
426430

427431
} // namespace targets

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17343,7 +17343,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1734317343
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
1734417344
}
1734517345
case X86::BI__builtin_ia32_cvtsbf162ss_32:
17346-
return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17346+
return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
1734717347

1734817348
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
1734917349
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
@@ -19784,6 +19784,18 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
1978419784
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
1978519785
}
1978619786

19787+
static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
19788+
auto PTy = IntegerType::getInt1Ty(CGF.getLLVMContext());
19789+
19790+
auto *P = cast<GlobalVariable>(
19791+
CGF.CGM.getModule().getOrInsertGlobal(Name.str(), PTy));
19792+
P->setConstant(true);
19793+
P->setExternallyInitialized(true);
19794+
19795+
return CGF.Builder.CreateLoad(
19796+
RawAddress(P, PTy, CharUnits::One(), KnownNonNull));
19797+
}
19798+
1978719799
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1978819800
const CallExpr *E) {
1978919801
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -20085,6 +20097,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
2008520097
llvm::Value *Env = EmitScalarExpr(E->getArg(0));
2008620098
return Builder.CreateCall(F, {Env});
2008720099
}
20100+
case AMDGPU::BI__builtin_amdgcn_processor_is: {
20101+
assert(CGM.getTriple().isSPIRV() &&
20102+
"__builtin_amdgcn_processor_is should never reach CodeGen for "
20103+
"concrete targets!");
20104+
StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
20105+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.is." + Proc);
20106+
}
20107+
case AMDGPU::BI__builtin_amdgcn_is_invocable: {
20108+
assert(CGM.getTriple().isSPIRV() &&
20109+
"__builtin_amdgcn_is_invocable should never reach CodeGen for "
20110+
"concrete targets!");
20111+
auto *FD = cast<FunctionDecl>(
20112+
cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
20113+
StringRef RF =
20114+
getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
20115+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.has." + RF);
20116+
}
2008820117
case AMDGPU::BI__builtin_amdgcn_read_exec:
2008920118
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
2009020119
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:

0 commit comments

Comments
 (0)