Skip to content

Commit d481e5f

Browse files
authored
[AMDGPU][SPIRV] Use SPIR-V syncscopes for some AMDGCN BIs (#154867)
AMDGCN flavoured SPIR-V allows AMDGCN specific builtins, including those for scoped fences and some specific RMWs. However, at present we don't map syncscopes to their SPIR-V equivalents, but rather use the AMDGCN ones. This ends up pessimising the resulting code as system scope is used instead of device (agent) or subgroup (wavefront), so we correct the behaviour, to ensure that we do the right thing during reverse translation.
1 parent af5c1a6 commit d481e5f

File tree

6 files changed

+576
-29
lines changed

6 files changed

+576
-29
lines changed

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
192192
return CGF.Builder.CreateCall(F, {Src0, Src1});
193193
}
194194

195+
static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
196+
if (AMDGCNScope == "agent")
197+
return "device";
198+
if (AMDGCNScope == "wavefront")
199+
return "subgroup";
200+
return AMDGCNScope;
201+
}
202+
195203
// For processing memory ordering and memory scope arguments of various
196204
// amdgcn builtins.
197-
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
205+
// \p Order takes a C++11 compatible memory-ordering specifier and converts
198206
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
199207
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
200208
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
227235
// Some of the atomic builtins take the scope as a string name.
228236
StringRef scp;
229237
if (llvm::getConstantStringInfo(Scope, scp)) {
238+
if (getTarget().getTriple().isSPIRV())
239+
scp = mapScopeToSPIRV(scp);
230240
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
231241
return;
232242
}
@@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
238248
SSID = llvm::SyncScope::System;
239249
break;
240250
case 1: // __MEMORY_SCOPE_DEVICE
241-
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
251+
if (getTarget().getTriple().isSPIRV())
252+
SSID = getLLVMContext().getOrInsertSyncScopeID("device");
253+
else
254+
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
242255
break;
243256
case 2: // __MEMORY_SCOPE_WRKGRP
244257
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
245258
break;
246259
case 3: // __MEMORY_SCOPE_WVFRNT
247-
SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
260+
if (getTarget().getTriple().isSPIRV())
261+
SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
262+
else
263+
SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
248264
break;
249265
case 4: // __MEMORY_SCOPE_SINGLE
250266
SSID = llvm::SyncScope::SingleThread;
@@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
15101526
//
15111527
// The global/flat cases need to use agent scope to consistently produce
15121528
// the native instruction instead of a cmpxchg expansion.
1513-
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1529+
if (getTarget().getTriple().isSPIRV())
1530+
SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1531+
else
1532+
SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
15141533
AO = AtomicOrdering::Monotonic;
15151534

15161535
// The v2bf16 builtin uses i16 instead of a natural bfloat type.

0 commit comments

Comments
 (0)