Skip to content

Commit 7e4a623

Browse files
[Profiling] add profiling code to get kernel submit/execute time (#548)
* add tracer benchmark usage
1 parent 3609987 commit 7e4a623

File tree

4 files changed

+54
-13
lines changed

4 files changed

+54
-13
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,18 @@ Add '-v' to the above command-line to get verbose output.
173173
## License
174174
This code is made available under the Apache License 2.0 with LLVM Exceptions.
175175
See the `LICENSE.txt` file for more details.
176+
177+
## Profiling kernel execute time
178+
### sycl event
179+
```sh
180+
export IMEX_ENABLE_PROFILING=ON
181+
run the test
182+
```
183+
### trace tools
184+
```sh
185+
python {your_path}/imex_runner.py xxx -o test.mlir
186+
mlir-translate test.mlir -mlir-to-llvmir -o test.ll
187+
llc test.ll -filetype=obj -o test.o
188+
clang++ test.o {path}/libmlir_runner_utils.so {path}/libmlir_c_runner_utils.so {path}/libsycl-runtime.so -no-pie -o test
189+
ze_tracer ./test
190+
```

include/imex/Transforms/Passes.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def InsertGPUAllocs : Pass<"insert-gpu-allocs", "::mlir::func::FuncOp"> {
4646
def SetSPIRVCapabilities : Pass<"set-spirv-capabilities"> {
4747
let summary = "Sets Spirv capabilities";
4848
let constructor = "imex::createSetSPIRVCapabilitiesPass()";
49+
let dependentDialects = ["::mlir::spirv::SPIRVDialect"];
4950
let options = [
5051
Option<"clientAPI", "client-api", "std::string", /*default=*/"\"opencl\"",
5152
"The client API to use for setting Spirv capabilities">

lib/ExecutionEngine/SYCLRUNTIME/SyclRuntimeWrappers.cpp

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,30 +99,31 @@ struct GPUSYCLQUEUE {
9999
sycl::context syclContext_;
100100
sycl::queue syclQueue_;
101101

102-
GPUSYCLQUEUE() {
102+
GPUSYCLQUEUE(sycl::property_list propList) {
103103

104104
syclDevice_ = getDefaultDevice();
105105
syclContext_ = sycl::context(syclDevice_);
106-
syclQueue_ = sycl::queue(syclContext_, syclDevice_);
106+
syclQueue_ = sycl::queue(syclContext_, syclDevice_, propList);
107107
}
108108

109-
GPUSYCLQUEUE(sycl::device *device, sycl::context *context) {
109+
GPUSYCLQUEUE(sycl::device *device, sycl::context *context,
110+
sycl::property_list propList) {
110111
syclDevice_ = *device;
111112
syclContext_ = *context;
112-
syclQueue_ = sycl::queue(syclContext_, syclDevice_);
113+
syclQueue_ = sycl::queue(syclContext_, syclDevice_, propList);
113114
}
114-
GPUSYCLQUEUE(sycl::device *device) {
115+
GPUSYCLQUEUE(sycl::device *device, sycl::property_list propList) {
115116

116117
syclDevice_ = *device;
117118
syclContext_ = sycl::context(syclDevice_);
118-
syclQueue_ = sycl::queue(syclContext_, syclDevice_);
119+
syclQueue_ = sycl::queue(syclContext_, syclDevice_, propList);
119120
}
120121

121-
GPUSYCLQUEUE(sycl::context *context) {
122+
GPUSYCLQUEUE(sycl::context *context, sycl::property_list propList) {
122123

123124
syclDevice_ = getDefaultDevice();
124125
syclContext_ = *context;
125-
syclQueue_ = sycl::queue(syclContext_, syclDevice_);
126+
syclQueue_ = sycl::queue(syclContext_, syclDevice_, propList);
126127
}
127128

128129
}; // end of GPUSYCLQUEUE
@@ -197,32 +198,49 @@ static void launchKernel(GPUSYCLQUEUE *queue, sycl::kernel *kernel,
197198

198199
auto paramsCount = countUntil(params, ParamDesc{nullptr, 0});
199200

200-
syclQueue.submit([&](sycl::handler &cgh) {
201+
sycl::event event = syclQueue.submit([&](sycl::handler &cgh) {
201202
for (size_t i = 0; i < paramsCount; i++) {
202203
auto param = params[i];
203204
cgh.set_arg(static_cast<uint32_t>(i),
204205
*(static_cast<void **>(param.data)));
205206
}
206207
cgh.parallel_for(syclNdRange, *kernel);
207208
});
209+
if (getenv("IMEX_ENABLE_PROFILING")) {
210+
// auto submitTime = event.get_profiling_info<
211+
// cl::sycl::info::event_profiling::command_submit>();
212+
auto startTime = event.get_profiling_info<
213+
cl::sycl::info::event_profiling::command_start>();
214+
auto endTime =
215+
event
216+
.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
217+
// auto submissionTime = float(startTime - submitTime) / 1000000.0f;
218+
// fprintf(stdout, "the kernel submission time is %f ms\n", submissionTime);
219+
auto executionTime = float(endTime - startTime) / 1000000.0f;
220+
fprintf(stdout, "the kernel execution time is %f ms\n", executionTime);
221+
}
208222
}
209223

210224
// Wrappers
211225

212226
extern "C" SYCL_RUNTIME_EXPORT GPUSYCLQUEUE *gpuCreateStream(void *device,
213227
void *context) {
228+
auto propList = sycl::property_list{};
229+
if (getenv("IMEX_ENABLE_PROFILING")) {
230+
propList = sycl::property_list{sycl::property::queue::enable_profiling()};
231+
}
214232
return catchAll([&]() {
215233
if (!device && !context) {
216-
return new GPUSYCLQUEUE();
234+
return new GPUSYCLQUEUE(propList);
217235
} else if (device && context) {
218236
// TODO: Check if the pointers/address is valid and holds the correct
219237
// device and context
220238
return new GPUSYCLQUEUE(static_cast<sycl::device *>(device),
221-
static_cast<sycl::context *>(context));
239+
static_cast<sycl::context *>(context), propList);
222240
} else if (device && !context) {
223-
return new GPUSYCLQUEUE(static_cast<sycl::device *>(device));
241+
return new GPUSYCLQUEUE(static_cast<sycl::device *>(device), propList);
224242
} else {
225-
return new GPUSYCLQUEUE(static_cast<sycl::context *>(context));
243+
return new GPUSYCLQUEUE(static_cast<sycl::context *>(context), propList);
226244
}
227245
});
228246
}

tools/imex-runner/imex-runner.py.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ parser = argparse.ArgumentParser(
7373
description="Run imex-opt, optionally pipe result into selected mlir runner (default: mlir-cpu-runner) and then optionally pipe output into FileCheck"
7474
)
7575
parser.add_argument("--input-file", "-i", default=None, help="input MLIR file")
76+
parser.add_argument("--output-file", "-o", default=None, help="output MLIR file")
7677
parser.add_argument("--pass-pipeline-file", "-f", default=None, help="file defining pass pipeline")
7778
parser.add_argument("--pass-pipeline", "-p", default=None, help="pass pipeline (string)")
7879
parser.add_argument("--imex-print-before-all", "-b", action='store_true', dest='before', help="print ir before all passes")
@@ -165,6 +166,12 @@ if args.after:
165166
cmd.append(f'--mlir-print-ir-after-all')
166167
cmds.append(cmd)
167168

169+
# output to a file
170+
if args.output_file:
171+
cmd=['tee']
172+
cmd.append(args.output_file)
173+
cmds.append(cmd)
174+
168175
# build runner command
169176
if not args.no_mlir_runner:
170177
# build runner command: all unknown args will be passed to the runner

0 commit comments

Comments
 (0)