Skip to content

Commit 1bb7989

Browse files
svenvhvmaksimo
authored andcommitted
Add cl_khr_kernel_clock support (#2582)
Add support for mapping the `cl_khr_kernel_clock` extension builtins to and from the `SPV_KHR_shader_clock` extension.
1 parent 5a9241b commit 1bb7989

File tree

6 files changed

+110
-0
lines changed

6 files changed

+110
-0
lines changed

lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,10 @@ void OCLToSPIRVBase::visitCallInst(CallInst &CI) {
320320
visitCallDot(&CI, MangledName, DemangledName);
321321
return;
322322
}
323+
if (DemangledName.starts_with(kOCLBuiltinName::ClockReadPrefix)) {
324+
visitCallClockRead(&CI, MangledName, DemangledName);
325+
return;
326+
}
323327
if (DemangledName == kOCLBuiltinName::FMin ||
324328
DemangledName == kOCLBuiltinName::FMax ||
325329
DemangledName == kOCLBuiltinName::Min ||
@@ -1423,6 +1427,23 @@ void OCLToSPIRVBase::visitCallDot(CallInst *CI, StringRef MangledName,
14231427
&Attrs);
14241428
}
14251429

1430+
void OCLToSPIRVBase::visitCallClockRead(CallInst *CI, StringRef MangledName,
1431+
StringRef DemangledName) {
1432+
// The builtin returns i64 or <2 x i32>, but both variants are mapped to the
1433+
// same instruction; hence include the return type.
1434+
std::string OpName = getSPIRVFuncName(OpReadClockKHR, CI->getType());
1435+
1436+
// Scope is part of the OpenCL builtin name.
1437+
Scope ScopeArg = StringSwitch<Scope>(DemangledName)
1438+
.EndsWith("device", ScopeDevice)
1439+
.EndsWith("work_group", ScopeWorkgroup)
1440+
.EndsWith("sub_group", ScopeSubgroup)
1441+
.Default(ScopeMax);
1442+
1443+
auto Mutator = mutateCallInst(CI, OpName);
1444+
Mutator.appendArg(getInt32(M, ScopeArg));
1445+
}
1446+
14261447
void OCLToSPIRVBase::visitCallScalToVec(CallInst *CI, StringRef MangledName,
14271448
StringRef DemangledName) {
14281449
// Check if all arguments have the same type - it's simple case.

lib/SPIRV/OCLToSPIRV.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ class OCLToSPIRVBase : public InstVisitor<OCLToSPIRVBase> {
215215
void visitCallDot(CallInst *CI, StringRef MangledName,
216216
StringRef DemangledName);
217217

218+
/// Transform clock_read_* calls to OpReadClockKHR instructions.
219+
void visitCallClockRead(CallInst *CI, StringRef MangledName,
220+
StringRef DemangledName);
221+
218222
/// Fixes for built-in functions with vector+scalar arguments that are
219223
/// translated to the SPIR-V instructions where all arguments must have the
220224
/// same type.

lib/SPIRV/OCLUtil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ const static char AtomicInit[] = "atomic_init";
239239
const static char AtomicWorkItemFence[] = "atomic_work_item_fence";
240240
const static char Barrier[] = "barrier";
241241
const static char Clamp[] = "clamp";
242+
const static char ClockReadPrefix[] = "clock_read_";
242243
const static char ConvertPrefix[] = "convert_";
243244
const static char Dot[] = "dot";
244245
const static char DotAccSat[] = "dot_acc_sat";

lib/SPIRV/SPIRVToOCL.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ void SPIRVToOCLBase::visitCallInst(CallInst &CI) {
209209
visitCallSPIRVRelational(&CI, OC);
210210
return;
211211
}
212+
if (OC == OpReadClockKHR) {
213+
visitCallSPIRVReadClockKHR(&CI);
214+
return;
215+
}
212216
if (OC == internal::OpConvertFToBF16INTEL ||
213217
OC == internal::OpConvertBF16ToFINTEL) {
214218
visitCallSPIRVBFloat16Conversions(&CI, OC);
@@ -1242,6 +1246,33 @@ void SPIRVToOCLBase::visitCallSPIRVRelational(CallInst *CI, Op OC) {
12421246
&Attrs);
12431247
}
12441248

1249+
void SPIRVToOCLBase::visitCallSPIRVReadClockKHR(CallInst *CI) {
1250+
std::ostringstream Name;
1251+
Name << "clock_read_";
1252+
1253+
if (CI->getType()->isVectorTy())
1254+
Name << "hilo_";
1255+
1256+
// Encode the scope (taken from the argument) in the function name.
1257+
ConstantInt *ScopeOp = cast<ConstantInt>(CI->getArgOperand(0));
1258+
switch (static_cast<Scope>(ScopeOp->getZExtValue())) {
1259+
case ScopeDevice:
1260+
Name << "device";
1261+
break;
1262+
case ScopeWorkgroup:
1263+
Name << "work_group";
1264+
break;
1265+
case ScopeSubgroup:
1266+
Name << "sub_group";
1267+
break;
1268+
default:
1269+
break;
1270+
}
1271+
1272+
auto Mutator = mutateCallInst(CI, Name.str());
1273+
Mutator.removeArg(0);
1274+
}
1275+
12451276
std::string SPIRVToOCLBase::getGroupBuiltinPrefix(CallInst *CI) {
12461277
std::string Prefix;
12471278
auto ES = getArgAsScope(CI, 0);

lib/SPIRV/SPIRVToOCL.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ class SPIRVToOCLBase : public InstVisitor<SPIRVToOCLBase> {
237237
/// Transform relational builtin, e.g. __spirv_IsNan, to OpenCL builtin.
238238
void visitCallSPIRVRelational(CallInst *CI, Op OC);
239239

240+
/// Transform __spirv_ReadClockKHR to OpenCL builtin.
241+
void visitCallSPIRVReadClockKHR(CallInst *CI);
242+
240243
/// Conduct generic mutations for all atomic builtins
241244
virtual CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) = 0;
242245

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// REQUIRES: spirv-dis
2+
// RUN: %clang_cc1 -triple spir-unknown-unknown -O1 -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -emit-llvm-bc %s -o %t.bc
3+
// RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_shader_clock -o %t.spv
4+
// RUN: spirv-dis %t.spv -o - | FileCheck %s --check-prefix=CHECK-SPIRV
5+
// TODO: spirv-val %t.spv
6+
// RUN: llvm-spirv -r %t.spv -o %t.rev.bc
7+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
8+
// RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o %t.rev.bc
9+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
10+
11+
// CHECK-SPIRV: OpCapability ShaderClockKHR
12+
// CHECK-SPIRV: OpExtension "SPV_KHR_shader_clock"
13+
// CHECK-SPIRV-DAG: [[uint:%[a-z0-9_]+]] = OpTypeInt 32
14+
// CHECK-SPIRV-DAG: [[ulong:%[a-z0-9_]+]] = OpTypeInt 64
15+
// CHECK-SPIRV-DAG: [[v2uint:%[a-z0-9_]+]] = OpTypeVector [[uint]] 2
16+
// CHECK-SPIRV-DAG: [[uint_1:%[a-z0-9_]+]] = OpConstant [[uint]] 1
17+
// CHECK-SPIRV-DAG: [[uint_2:%[a-z0-9_]+]] = OpConstant [[uint]] 2
18+
// CHECK-SPIRV-DAG: [[uint_3:%[a-z0-9_]+]] = OpConstant [[uint]] 3
19+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_1]]
20+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_2]]
21+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_3]]
22+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_1]]
23+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_2]]
24+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_3]]
25+
26+
// CHECK-LLVM-LABEL: test_clocks
27+
// CHECK-LLVM: call spir_func i64 @_Z17clock_read_devicev()
28+
// CHECK-LLVM: call spir_func i64 @_Z21clock_read_work_groupv()
29+
// CHECK-LLVM: call spir_func i64 @_Z20clock_read_sub_groupv()
30+
// CHECK-LLVM: call spir_func <2 x i32> @_Z22clock_read_hilo_devicev()
31+
// CHECK-LLVM: call spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv()
32+
// CHECK-LLVM: call spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv()
33+
34+
// CHECK-SPV-IR-LABEL: test_clocks
35+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 1)
36+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 2)
37+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 3)
38+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 1)
39+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 2)
40+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 3)
41+
42+
kernel void test_clocks(global ulong *out64, global uint2 *outv2) {
43+
out64[0] = clock_read_device();
44+
out64[1] = clock_read_work_group();
45+
out64[2] = clock_read_sub_group();
46+
47+
outv2[0] = clock_read_hilo_device();
48+
outv2[1] = clock_read_hilo_work_group();
49+
outv2[2] = clock_read_hilo_sub_group();
50+
}

0 commit comments

Comments
 (0)