Skip to content

Commit 21033ba

Browse files
authored
Port ZCFS improvements (llvm#2817)
Authored by Alex Voicu
1 parent 4c8ef2e commit 21033ba

32 files changed

+560
-245
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4801,12 +4801,8 @@ a functional mechanism for programatically querying:
48014801
48024802
.. code-block:: c
48034803
4804-
// When used as the predicate for a control structure
4805-
bool __builtin_amdgcn_processor_is(const char*);
4806-
bool __builtin_amdgcn_is_invocable(builtin_name);
4807-
// Otherwise
4808-
void __builtin_amdgcn_processor_is(const char*);
4809-
void __builtin_amdgcn_is_invocable(void);
4804+
__amdgpu_feature_predicate_t __builtin_amdgcn_processor_is(const char*);
4805+
__amdgpu_feature_predicate_t __builtin_amdgcn_is_invocable(builtin_name);
48104806
48114807
**Example of use**:
48124808
@@ -4825,7 +4821,7 @@ a functional mechanism for programatically querying:
48254821
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
48264822
48274823
do {
4828-
*p -= x;
4824+
break;
48294825
} while (__builtin_amdgcn_processor_is("gfx1010"));
48304826
48314827
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
@@ -4836,7 +4832,7 @@ a functional mechanism for programatically querying:
48364832
__builtin_amdgcn_s_ttracedata_imm(1);
48374833
48384834
do {
4839-
*p -= x;
4835+
break;
48404836
} while (
48414837
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
48424838
@@ -4845,55 +4841,57 @@ a functional mechanism for programatically querying:
48454841
48464842
**Description**:
48474843
4848-
When used as the predicate value of the following control structures:
4844+
The builtins return a value of type ``__amdgpu_feature_predicate_t``, which is a
4845+
target specific type that behaves as if its C++ definition was the following:
48494846
48504847
.. code-block:: c++
48514848
4852-
if (...)
4853-
while (...)
4854-
do { } while (...)
4855-
for (...)
4849+
struct __amdgpu_feature_predicate_t {
4850+
__amdgpu_feature_predicate_t() = delete;
4851+
__amdgpu_feature_predicate_t(const __amdgpu_feature_predicate_t&) = delete;
4852+
__amdgpu_feature_predicate_t(__amdgpu_feature_predicate_t&&) = delete;
4853+
4854+
explicit
4855+
operator bool() const noexcept;
4856+
};
4857+
4858+
The builtins can be used in C as well, wherein the
4859+
``__amdgpu_feature_predicate_t`` type behaves as an opaque, forward declared
4860+
type with conditional automated conversion to ``_Bool`` when used as the
4861+
predicate argument to a control structure:
4862+
4863+
.. code-block:: c
4864+
4865+
struct __amdgpu_feature_predicate_t ret(); // Error
4866+
void arg(struct __amdgpu_feature_predicate_t); // Error
4867+
void local() {
4868+
struct __amdgpu_feature_predicate_t x; // Error
4869+
struct __amdgpu_feature_predicate_t y =
4870+
__builtin_amdgcn_processor_is("gfx900"); // Error
4871+
}
4872+
void valid_use() {
4873+
_Bool x = (_Bool)__builtin_amdgcn_processor_is("gfx900"); // OK
4874+
if (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
4875+
return;
4876+
for (; __builtin_amdgcn_processor_is("gfx900");) // Implicit cast to _Bool
4877+
break;
4878+
while (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
4879+
break;
4880+
do {
4881+
break;
4882+
} while (__builtin_amdgcn_processor_is("gfx900")); // Implicit cast to _Bool
4883+
4884+
__builtin_amdgcn_processor_is("gfx900") ? x : !x;
4885+
}
48564886
4857-
be it directly, or as arguments to logical operators such as ``!, ||, &&``, the
4858-
builtins return a boolean value that:
4887+
The boolean interpretation of the predicate values returned by the builtins:
48594888
48604889
* indicates whether the current target matches the argument; the argument MUST
48614890
be a string literal and a valid AMDGPU target
48624891
* indicates whether the builtin function passed as the argument can be invoked
48634892
by the current target; the argument MUST be either a generic or AMDGPU
48644893
specific builtin name
48654894
4866-
Outside of these contexts, the builtins have a ``void`` returning signature
4867-
which prevents their misuse.
4868-
4869-
**Example of invalid use**:
4870-
4871-
.. code-block:: c++
4872-
4873-
void kernel(int* p, int x, bool (*pfn)(bool), const char* str) {
4874-
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
4875-
else if (__builtin_amdgcn_processor_is(str)) __builtin_trap();
4876-
4877-
bool a = __builtin_amdgcn_processor_is("gfx906");
4878-
const bool b = !__builtin_amdgcn_processor_is("gfx906");
4879-
const bool c = !__builtin_amdgcn_processor_is("gfx906");
4880-
bool d = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4881-
bool e = !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4882-
const auto f =
4883-
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4884-
|| __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4885-
const auto g =
4886-
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4887-
|| !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4888-
__builtin_amdgcn_processor_is("gfx1201")
4889-
? __builtin_amdgcn_s_sleep_var(x) : __builtin_amdgcn_s_sleep(42);
4890-
if (pfn(__builtin_amdgcn_processor_is("gfx1200")))
4891-
__builtin_amdgcn_s_sleep_var(x);
4892-
4893-
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
4894-
else if (__builtin_amdgcn_is_invocable(x)) __builtin_trap();
4895-
}
4896-
48974895
When invoked while compiling for a concrete target, the builtins are evaluated
48984896
early by Clang, and never produce any CodeGen effects / have no observable
48994897
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,

clang/include/clang/Basic/AMDGPUTypes.def

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,18 @@
2020
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
2121
#endif
2222

23+
#ifndef AMDGPU_FEATURE_PREDICATE_TYPE
24+
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
25+
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
26+
#endif
27+
2328
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
2429

2530
AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
2631

32+
AMDGPU_FEATURE_PREDICATE_TYPE("__amdgpu_feature_predicate_t", AMDGPUFeaturePredicate, AMDGPUFeaturePredicateTy, 1, 1)
33+
2734
#undef AMDGPU_TYPE
2835
#undef AMDGPU_OPAQUE_PTR_TYPE
2936
#undef AMDGPU_NAMED_BARRIER_TYPE
37+
#undef AMDGPU_FEATURE_PREDICATE_TYPE

clang/include/clang/Basic/Builtins.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
// Q -> target builtin type, followed by a character to distinguish the builtin type
3535
// Qa -> AArch64 svcount_t builtin type.
3636
// Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
37+
// Qc -> AMDGPU __amdgpu_feature_predicate_t builtin type.
3738
// E -> ext_vector, followed by the number of elements and the base type.
3839
// X -> _Complex, followed by the base type.
3940
// Y -> ptrdiff_t

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
352352

353353
// These are special FE only builtins intended for forwarding the requirements
354354
// to the ME.
355-
BUILTIN(__builtin_amdgcn_processor_is, "vcC*", "nctu")
356-
BUILTIN(__builtin_amdgcn_is_invocable, "v", "nctu")
355+
BUILTIN(__builtin_amdgcn_processor_is, "QccC*", "nctu")
356+
BUILTIN(__builtin_amdgcn_is_invocable, "Qc", "nctu")
357357

358358
//===----------------------------------------------------------------------===//
359359
// R600-NI only builtins.

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11695,9 +11695,9 @@ def err_omp_inscan_reduction_expected : Error<
1169511695
def note_omp_previous_inscan_reduction : Note<
1169611696
"'reduction' clause with 'inscan' modifier is used here">;
1169711697
def err_omp_multivar_xteam_scan_unsupported : Error<
11698-
"multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;
11698+
"multiple list items are not yet supported with the 'inclusive' or the 'exclusive' clauses that appear with the 'scan' directive">;
1169911699
def err_omp_xteam_scan_prohibited : Error<
11700-
"'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;
11700+
"'scan' directive is not supported inside target regions. Use flag '-fopenmp-target-xteam-scan' to enable it">;
1170111701
def err_omp_expected_predefined_allocator : Error<
1170211702
"expected one of the predefined allocators for the variables with the static "
1170311703
"storage: 'omp_default_mem_alloc', 'omp_large_cap_mem_alloc', "
@@ -12915,8 +12915,19 @@ def err_amdgcn_processor_is_arg_not_literal
1291512915
def err_amdgcn_processor_is_arg_invalid_value
1291612916
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
1291712917
"AMDGCN processor identifier; '%0' is not valid">;
12918+
def note_amdgcn_processor_is_valid_options
12919+
: Note<"valid AMDGCN processor identifiers are: %0">;
1291812920
def err_amdgcn_is_invocable_arg_invalid_value
1291912921
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
1292012922
"target agnostic builtin or an AMDGCN target specific builtin; `%0`"
1292112923
" is not valid">;
12924+
def err_amdgcn_predicate_type_is_not_constructible
12925+
: Error<"%0 has type __amdgpu_feature_predicate_t, which is not"
12926+
" constructible">;
12927+
def err_amdgcn_predicate_type_needs_explicit_bool_cast
12928+
: Error<"%0 must be explicitly cast to %1; however, please note that this "
12929+
"is almost always an error and that it prevents the effective "
12930+
"guarding of target dependent code, and thus should be avoided">;
12931+
def note_amdgcn_protected_by_predicate : Note<"jump enters statement controlled"
12932+
" by AMDGPU feature predicate">;
1292212933
} // end of sema component.

clang/include/clang/Sema/SemaAMDGPU.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515

1616
#include "clang/AST/ASTFwd.h"
1717
#include "clang/Sema/SemaBase.h"
18+
#include "llvm/ADT/SmallPtrSet.h"
1819

1920
namespace clang {
2021
class AttributeCommonInfo;
22+
class Expr;
2123
class ParsedAttr;
2224

2325
class SemaAMDGPU : public SemaBase {
26+
llvm::SmallPtrSet<Expr *, 32> ExpandedPredicates;
27+
2428
public:
2529
SemaAMDGPU(Sema &S);
2630

@@ -64,6 +68,11 @@ class SemaAMDGPU : public SemaBase {
6468
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL);
6569
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL);
6670
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL);
71+
72+
/// Expand a valid use of the feature identification builtins into its
73+
/// corresponding sequence of instructions.
74+
Expr *ExpandAMDGPUPredicateBI(CallExpr *CE);
75+
bool IsPredicate(Expr *E) const;
6776
};
6877
} // namespace clang
6978

clang/lib/AST/ASTContext.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1467,7 +1467,12 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
14671467
}
14681468

14691469
if (Target.getTriple().isAMDGPU() ||
1470-
(AuxTarget && AuxTarget->getTriple().isAMDGPU())) {
1470+
(Target.getTriple().isSPIRV() &&
1471+
Target.getTriple().getVendor() == llvm::Triple::AMD) ||
1472+
(AuxTarget &&
1473+
(AuxTarget->getTriple().isAMDGPU() ||
1474+
((AuxTarget->getTriple().isSPIRV() &&
1475+
AuxTarget->getTriple().getVendor() == llvm::Triple::AMD))))) {
14711476
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
14721477
InitBuiltinType(SingletonId, BuiltinType::Id);
14731478
#include "clang/Basic/AMDGPUTypes.def"
@@ -12313,6 +12318,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
1231312318
Type = Context.AMDGPUBufferRsrcTy;
1231412319
break;
1231512320
}
12321+
case 'c': {
12322+
Type = Context.AMDGPUFeaturePredicateTy;
12323+
break;
12324+
}
1231612325
default:
1231712326
llvm_unreachable("Unexpected target builtin type");
1231812327
}

clang/lib/Basic/Targets/SPIR.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,8 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
146146
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
147147
return AMDGPUTI.isValidCPUName(CPU);
148148
}
149+
150+
void SPIRV64AMDGCNTargetInfo::fillValidCPUList(
151+
SmallVectorImpl<StringRef> &Values) const {
152+
return AMDGPUTI.fillValidCPUList(Values);
153+
}

clang/lib/Basic/Targets/SPIR.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
426426
// This is only needed for validating arguments passed to
427427
// __builtin_amdgcn_processor_is
428428
bool isValidCPUName(StringRef Name) const override;
429+
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
429430
};
430431

431432
} // namespace targets

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19823,7 +19823,7 @@ static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
1982319823
P->setExternallyInitialized(true);
1982419824

1982519825
return CGF.Builder.CreateLoad(
19826-
RawAddress(P, PTy, CharUnits::One(), KnownNonNull));
19826+
RawAddress(P, PTy, CharUnits::One(), KnownNonNull), true);
1982719827
}
1982819828

1982919829
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,

0 commit comments

Comments
 (0)