Skip to content

Commit 2f0ae82

Browse files
macurtis-amdLukacma
authored andcommitted
[clang] Add support for cluster sync scope (llvm#162575)
From Sam Liu: >CUDA supports thread block clusters https://docs.nvidia.com/cuda/cuda-c-programming-guide/#thread-block-clusters > >In their atomic intrinsics, cluster scope is supported https://docs.nvidia.com/cuda/cuda-c-programming-guide/#nv-atomic-fetch-add-and-nv-atomic-add > >For compatibility, clang and hip needs to support cluster scope.
1 parent 9b5780c commit 2f0ae82

File tree

15 files changed

+4462
-265
lines changed

15 files changed

+4462
-265
lines changed

clang/docs/HIPSupport.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ Predefined Macros
164164
- Represents wavefront memory scope in HIP (value is 2).
165165
* - ``__HIP_MEMORY_SCOPE_WORKGROUP``
166166
- Represents workgroup memory scope in HIP (value is 3).
167+
* - ``__HIP_MEMORY_SCOPE_CLUSTER``
168+
- Represents cluster memory scope in HIP (value is 6).
167169
* - ``__HIP_MEMORY_SCOPE_AGENT``
168170
- Represents agent memory scope in HIP (value is 4).
169171
* - ``__HIP_MEMORY_SCOPE_SYSTEM``

clang/docs/LanguageExtensions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4846,6 +4846,7 @@ currently supported:
48464846
* ``__MEMORY_SCOPE_SYSTEM``
48474847
* ``__MEMORY_SCOPE_DEVICE``
48484848
* ``__MEMORY_SCOPE_WRKGRP``
4849+
* ``__MEMORY_SCOPE_CLUSTR``
48494850
* ``__MEMORY_SCOPE_WVFRNT``
48504851
* ``__MEMORY_SCOPE_SINGLE``
48514852

clang/include/clang/Basic/SyncScope.h

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ enum class SyncScope {
4343
SystemScope,
4444
DeviceScope,
4545
WorkgroupScope,
46+
ClusterScope,
4647
WavefrontScope,
4748
SingleScope,
4849
HIPSingleThread,
4950
HIPWavefront,
5051
HIPWorkgroup,
52+
HIPCluster,
5153
HIPAgent,
5254
HIPSystem,
5355
OpenCLWorkGroup,
@@ -65,6 +67,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
6567
return "device_scope";
6668
case SyncScope::WorkgroupScope:
6769
return "workgroup_scope";
70+
case SyncScope::ClusterScope:
71+
return "cluster_scope";
6872
case SyncScope::WavefrontScope:
6973
return "wavefront_scope";
7074
case SyncScope::SingleScope:
@@ -75,6 +79,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
7579
return "hip_wavefront";
7680
case SyncScope::HIPWorkgroup:
7781
return "hip_workgroup";
82+
case SyncScope::HIPCluster:
83+
return "hip_cluster";
7884
case SyncScope::HIPAgent:
7985
return "hip_agent";
8086
case SyncScope::HIPSystem:
@@ -174,13 +180,18 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
174180
/// The enum values match the pre-defined macros
175181
/// __HIP_MEMORY_SCOPE_*, which are used to define memory_scope_*
176182
/// enums in hip-c.h.
183+
/// These may be present in pch files or bitcode so preserve existing values
184+
/// when adding a new ID.
177185
enum ID {
178186
SingleThread = 1,
179187
Wavefront = 2,
180188
Workgroup = 3,
181189
Agent = 4,
182190
System = 5,
183-
Last = System
191+
Cluster = 6,
192+
End,
193+
Last = End - 1,
194+
Count = Last
184195
};
185196

186197
AtomicScopeHIPModel() {}
@@ -193,10 +204,14 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
193204
return SyncScope::HIPWavefront;
194205
case Workgroup:
195206
return SyncScope::HIPWorkgroup;
207+
case Cluster:
208+
return SyncScope::HIPCluster;
196209
case Agent:
197210
return SyncScope::HIPAgent;
198211
case System:
199212
return SyncScope::HIPSystem;
213+
case End:
214+
break;
200215
}
201216
llvm_unreachable("Invalid language sync scope value");
202217
}
@@ -207,11 +222,12 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
207222
}
208223

209224
ArrayRef<unsigned> getRuntimeValues() const override {
210-
static_assert(Last == System, "Does not include all sync scopes");
211225
static const unsigned Scopes[] = {
212226
static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
213-
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
214-
static_cast<unsigned>(System)};
227+
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
228+
static_cast<unsigned>(System), static_cast<unsigned>(Agent)};
229+
static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
230+
"Does not include all sync scopes");
215231
return llvm::ArrayRef(Scopes);
216232
}
217233

@@ -223,14 +239,18 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
223239
/// Defines the generic atomic scope model.
224240
class AtomicScopeGenericModel : public AtomicScopeModel {
225241
public:
226-
/// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
242+
/// The enum values match predefined built-in macros __MEMORY_SCOPE_*.
243+
/// These may be present in pch files or bitcode so preserve existing values
244+
/// when adding a new ID.
227245
enum ID {
228246
System = 0,
229247
Device = 1,
230248
Workgroup = 2,
231249
Wavefront = 3,
232250
Single = 4,
233-
Last = Single
251+
Cluster = 5,
252+
Count,
253+
Last = Count - 1
234254
};
235255

236256
AtomicScopeGenericModel() = default;
@@ -243,10 +263,14 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
243263
return SyncScope::SystemScope;
244264
case Workgroup:
245265
return SyncScope::WorkgroupScope;
266+
case Cluster:
267+
return SyncScope::ClusterScope;
246268
case Wavefront:
247269
return SyncScope::WavefrontScope;
248270
case Single:
249271
return SyncScope::SingleScope;
272+
case Count:
273+
break;
250274
}
251275
llvm_unreachable("Invalid language sync scope value");
252276
}
@@ -256,11 +280,12 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
256280
}
257281

258282
ArrayRef<unsigned> getRuntimeValues() const override {
259-
static_assert(Last == Single, "Does not include all sync scopes");
260283
static const unsigned Scopes[] = {
261-
static_cast<unsigned>(Device), static_cast<unsigned>(System),
262-
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
263-
static_cast<unsigned>(Single)};
284+
static_cast<unsigned>(System), static_cast<unsigned>(Device),
285+
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
286+
static_cast<unsigned>(Wavefront), static_cast<unsigned>(Single)};
287+
static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
288+
"Does not include all sync scopes");
264289
return llvm::ArrayRef(Scopes);
265290
}
266291

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "CGBuiltin.h"
1414
#include "CodeGenFunction.h"
15+
#include "clang/Basic/SyncScope.h"
1516
#include "clang/Basic/TargetBuiltins.h"
1617
#include "clang/Frontend/FrontendDiagnostic.h"
1718
#include "llvm/Analysis/ValueTracking.h"
@@ -313,33 +314,33 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
313314
}
314315

315316
// Older builtins had an enum argument for the memory scope.
317+
const char *SSN = nullptr;
316318
int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
317319
switch (scope) {
318-
case 0: // __MEMORY_SCOPE_SYSTEM
320+
case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
319321
SSID = llvm::SyncScope::System;
320322
break;
321-
case 1: // __MEMORY_SCOPE_DEVICE
322-
if (getTarget().getTriple().isSPIRV())
323-
SSID = getLLVMContext().getOrInsertSyncScopeID("device");
324-
else
325-
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
323+
case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
324+
SSN = getTarget().getTriple().isSPIRV() ? "device" : "agent";
326325
break;
327-
case 2: // __MEMORY_SCOPE_WRKGRP
328-
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
326+
case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
327+
SSN = "workgroup";
329328
break;
330-
case 3: // __MEMORY_SCOPE_WVFRNT
331-
if (getTarget().getTriple().isSPIRV())
332-
SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
333-
else
334-
SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
329+
case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
330+
SSN = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
331+
break;
332+
case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
333+
SSN = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
335334
break;
336-
case 4: // __MEMORY_SCOPE_SINGLE
335+
case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
337336
SSID = llvm::SyncScope::SingleThread;
338337
break;
339338
default:
340339
SSID = llvm::SyncScope::System;
341340
break;
342341
}
342+
if (SSN)
343+
SSID = getLLVMContext().getOrInsertSyncScopeID(SSN);
343344
}
344345

345346
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,10 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
508508
case SyncScope::WavefrontScope:
509509
Name = "wavefront";
510510
break;
511+
case SyncScope::HIPCluster:
512+
case SyncScope::ClusterScope:
513+
Name = "cluster";
514+
break;
511515
case SyncScope::HIPWorkgroup:
512516
case SyncScope::OpenCLWorkGroup:
513517
case SyncScope::WorkgroupScope:

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
9393
case SyncScope::OpenCLSubGroup:
9494
case SyncScope::WavefrontScope:
9595
return "subgroup";
96+
case SyncScope::HIPCluster:
97+
case SyncScope::ClusterScope:
9698
case SyncScope::HIPWorkgroup:
9799
case SyncScope::OpenCLWorkGroup:
98100
case SyncScope::WorkgroupScope:

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
585585
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
586586
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
587587
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
588+
Builder.defineMacro("__HIP_MEMORY_SCOPE_CLUSTER", "6");
588589
if (LangOpts.HIPStdPar) {
589590
Builder.defineMacro("__HIPSTDPAR__");
590591
if (LangOpts.HIPStdParInterposeAlloc) {
@@ -873,6 +874,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
873874
Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2");
874875
Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3");
875876
Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4");
877+
Builder.defineMacro("__MEMORY_SCOPE_CLUSTR", "5");
876878

877879
// Define macros for the OpenCL memory scope.
878880
// The values should match AtomicScopeOpenCLModel::ID enum.

0 commit comments

Comments
 (0)