Skip to content

Commit 5384dbd

Browse files
[Backport to llvm_release_180] Add cl_khr_kernel_clock support (#3650)
Backport of PR #2582 into `llvm_release_180`. All commits applied cleanly. Co-authored-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
1 parent 126be4f commit 5384dbd

File tree

6 files changed

+110
-0
lines changed

6 files changed

+110
-0
lines changed

lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,10 @@ void OCLToSPIRVBase::visitCallInst(CallInst &CI) {
340340
visitCallDot(&CI, MangledName, DemangledName);
341341
return;
342342
}
343+
if (DemangledName.starts_with(kOCLBuiltinName::ClockReadPrefix)) {
344+
visitCallClockRead(&CI, MangledName, DemangledName);
345+
return;
346+
}
343347
if (DemangledName == kOCLBuiltinName::FMin ||
344348
DemangledName == kOCLBuiltinName::FMax ||
345349
DemangledName == kOCLBuiltinName::Min ||
@@ -1330,6 +1334,23 @@ void OCLToSPIRVBase::visitCallDot(CallInst *CI, StringRef MangledName,
13301334
}
13311335
}
13321336

1337+
void OCLToSPIRVBase::visitCallClockRead(CallInst *CI, StringRef MangledName,
1338+
StringRef DemangledName) {
1339+
// The builtin returns i64 or <2 x i32>, but both variants are mapped to the
1340+
// same instruction; hence include the return type.
1341+
std::string OpName = getSPIRVFuncName(OpReadClockKHR, CI->getType());
1342+
1343+
// Scope is part of the OpenCL builtin name.
1344+
Scope ScopeArg = StringSwitch<Scope>(DemangledName)
1345+
.EndsWith("device", ScopeDevice)
1346+
.EndsWith("work_group", ScopeWorkgroup)
1347+
.EndsWith("sub_group", ScopeSubgroup)
1348+
.Default(ScopeMax);
1349+
1350+
auto Mutator = mutateCallInst(CI, OpName);
1351+
Mutator.appendArg(getInt32(M, ScopeArg));
1352+
}
1353+
13331354
void OCLToSPIRVBase::visitCallScalToVec(CallInst *CI, StringRef MangledName,
13341355
StringRef DemangledName) {
13351356
// Check if all arguments have the same type - it's simple case.

lib/SPIRV/OCLToSPIRV.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ class OCLToSPIRVBase : public InstVisitor<OCLToSPIRVBase>, BuiltinCallHelper {
217217
void visitCallDot(CallInst *CI, StringRef MangledName,
218218
StringRef DemangledName);
219219

220+
/// Transform clock_read_* calls to OpReadClockKHR instructions.
221+
void visitCallClockRead(CallInst *CI, StringRef MangledName,
222+
StringRef DemangledName);
223+
220224
/// Fixes for built-in functions with vector+scalar arguments that are
221225
/// translated to the SPIR-V instructions where all arguments must have the
222226
/// same type.

lib/SPIRV/OCLUtil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ const static char AtomicInit[] = "atomic_init";
245245
const static char AtomicWorkItemFence[] = "atomic_work_item_fence";
246246
const static char Barrier[] = "barrier";
247247
const static char Clamp[] = "clamp";
248+
const static char ClockReadPrefix[] = "clock_read_";
248249
const static char ConvertPrefix[] = "convert_";
249250
const static char Dot[] = "dot";
250251
const static char DotAccSat[] = "dot_acc_sat";

lib/SPIRV/SPIRVToOCL.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ void SPIRVToOCLBase::visitCallInst(CallInst &CI) {
210210
visitCallSPIRVRelational(&CI, OC);
211211
return;
212212
}
213+
if (OC == OpReadClockKHR) {
214+
visitCallSPIRVReadClockKHR(&CI);
215+
return;
216+
}
213217
if (OC == internal::OpConvertFToBF16INTEL ||
214218
OC == internal::OpConvertBF16ToFINTEL) {
215219
visitCallSPIRVBFloat16Conversions(&CI, OC);
@@ -1031,6 +1035,33 @@ void SPIRVToOCLBase::visitCallSPIRVRelational(CallInst *CI, Op OC) {
10311035
});
10321036
}
10331037

1038+
void SPIRVToOCLBase::visitCallSPIRVReadClockKHR(CallInst *CI) {
1039+
std::ostringstream Name;
1040+
Name << "clock_read_";
1041+
1042+
if (CI->getType()->isVectorTy())
1043+
Name << "hilo_";
1044+
1045+
// Encode the scope (taken from the argument) in the function name.
1046+
ConstantInt *ScopeOp = cast<ConstantInt>(CI->getArgOperand(0));
1047+
switch (static_cast<Scope>(ScopeOp->getZExtValue())) {
1048+
case ScopeDevice:
1049+
Name << "device";
1050+
break;
1051+
case ScopeWorkgroup:
1052+
Name << "work_group";
1053+
break;
1054+
case ScopeSubgroup:
1055+
Name << "sub_group";
1056+
break;
1057+
default:
1058+
break;
1059+
}
1060+
1061+
auto Mutator = mutateCallInst(CI, Name.str());
1062+
Mutator.removeArg(0);
1063+
}
1064+
10341065
std::string SPIRVToOCLBase::getGroupBuiltinPrefix(CallInst *CI) {
10351066
std::string Prefix;
10361067
auto ES = getArgAsScope(CI, 0);

lib/SPIRV/SPIRVToOCL.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ class SPIRVToOCLBase : public InstVisitor<SPIRVToOCLBase>,
241241
/// Transform relational builtin, e.g. __spirv_IsNan, to OpenCL builtin.
242242
void visitCallSPIRVRelational(CallInst *CI, Op OC);
243243

244+
/// Transform __spirv_ReadClockKHR to OpenCL builtin.
245+
void visitCallSPIRVReadClockKHR(CallInst *CI);
246+
244247
/// Conduct generic mutations for all atomic builtins
245248
virtual CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) = 0;
246249

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// REQUIRES: spirv-dis
2+
// RUN: %clang_cc1 -triple spir-unknown-unknown -O1 -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -emit-llvm-bc %s -o %t.bc
3+
// RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_shader_clock -o %t.spv
4+
// RUN: spirv-dis %t.spv -o - | FileCheck %s --check-prefix=CHECK-SPIRV
5+
// TODO: spirv-val %t.spv
6+
// RUN: llvm-spirv -r %t.spv -o %t.rev.bc
7+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
8+
// RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o %t.rev.bc
9+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
10+
11+
// CHECK-SPIRV: OpCapability ShaderClockKHR
12+
// CHECK-SPIRV: OpExtension "SPV_KHR_shader_clock"
13+
// CHECK-SPIRV-DAG: [[uint:%[a-z0-9_]+]] = OpTypeInt 32
14+
// CHECK-SPIRV-DAG: [[ulong:%[a-z0-9_]+]] = OpTypeInt 64
15+
// CHECK-SPIRV-DAG: [[v2uint:%[a-z0-9_]+]] = OpTypeVector [[uint]] 2
16+
// CHECK-SPIRV-DAG: [[uint_1:%[a-z0-9_]+]] = OpConstant [[uint]] 1
17+
// CHECK-SPIRV-DAG: [[uint_2:%[a-z0-9_]+]] = OpConstant [[uint]] 2
18+
// CHECK-SPIRV-DAG: [[uint_3:%[a-z0-9_]+]] = OpConstant [[uint]] 3
19+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_1]]
20+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_2]]
21+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_3]]
22+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_1]]
23+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_2]]
24+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_3]]
25+
26+
// CHECK-LLVM-LABEL: test_clocks
27+
// CHECK-LLVM: call spir_func i64 @_Z17clock_read_devicev()
28+
// CHECK-LLVM: call spir_func i64 @_Z21clock_read_work_groupv()
29+
// CHECK-LLVM: call spir_func i64 @_Z20clock_read_sub_groupv()
30+
// CHECK-LLVM: call spir_func <2 x i32> @_Z22clock_read_hilo_devicev()
31+
// CHECK-LLVM: call spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv()
32+
// CHECK-LLVM: call spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv()
33+
34+
// CHECK-SPV-IR-LABEL: test_clocks
35+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 1)
36+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 2)
37+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 3)
38+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 1)
39+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 2)
40+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 3)
41+
42+
kernel void test_clocks(global ulong *out64, global uint2 *outv2) {
43+
out64[0] = clock_read_device();
44+
out64[1] = clock_read_work_group();
45+
out64[2] = clock_read_sub_group();
46+
47+
outv2[0] = clock_read_hilo_device();
48+
outv2[1] = clock_read_hilo_work_group();
49+
outv2[2] = clock_read_hilo_sub_group();
50+
}

0 commit comments

Comments
 (0)