Skip to content

Commit 5122527

Browse files
author
Fábio Mestre
committed
Merge remote-tracking branch 'origin/main' into fabio/cmd_buffer_kernel_update
2 parents ef48291 + 229869c commit 5122527

File tree

17 files changed

+340
-125
lines changed

17 files changed

+340
-125
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ List of options provided by CMake:
145145
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
146146
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
147147
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
148-
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
148+
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `$ENV{ROCM_PATH}` or `/opt/rocm` |
149149
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |
150150
| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` |
151151
| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` |

include/ur_api_funcs.def

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
2+
/*
3+
*
4+
* Copyright (C) 2024 Intel Corporation
5+
*
6+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
7+
* See LICENSE.TXT
8+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9+
*
10+
* @file ur_api_funcs.def
11+
* @version v0.11-r0
12+
*
13+
*/
14+
15+
// Auto-generated file, do not edit.
16+
17+
_UR_API(urPlatformGet)
18+
_UR_API(urPlatformGetInfo)
19+
_UR_API(urPlatformGetNativeHandle)
20+
_UR_API(urPlatformCreateWithNativeHandle)
21+
_UR_API(urPlatformGetApiVersion)
22+
_UR_API(urPlatformGetBackendOption)
23+
_UR_API(urContextCreate)
24+
_UR_API(urContextRetain)
25+
_UR_API(urContextRelease)
26+
_UR_API(urContextGetInfo)
27+
_UR_API(urContextGetNativeHandle)
28+
_UR_API(urContextCreateWithNativeHandle)
29+
_UR_API(urContextSetExtendedDeleter)
30+
_UR_API(urEventGetInfo)
31+
_UR_API(urEventGetProfilingInfo)
32+
_UR_API(urEventWait)
33+
_UR_API(urEventRetain)
34+
_UR_API(urEventRelease)
35+
_UR_API(urEventGetNativeHandle)
36+
_UR_API(urEventCreateWithNativeHandle)
37+
_UR_API(urEventSetCallback)
38+
_UR_API(urProgramCreateWithIL)
39+
_UR_API(urProgramCreateWithBinary)
40+
_UR_API(urProgramBuild)
41+
_UR_API(urProgramCompile)
42+
_UR_API(urProgramLink)
43+
_UR_API(urProgramRetain)
44+
_UR_API(urProgramRelease)
45+
_UR_API(urProgramGetFunctionPointer)
46+
_UR_API(urProgramGetGlobalVariablePointer)
47+
_UR_API(urProgramGetInfo)
48+
_UR_API(urProgramGetBuildInfo)
49+
_UR_API(urProgramSetSpecializationConstants)
50+
_UR_API(urProgramGetNativeHandle)
51+
_UR_API(urProgramCreateWithNativeHandle)
52+
_UR_API(urProgramBuildExp)
53+
_UR_API(urProgramCompileExp)
54+
_UR_API(urProgramLinkExp)
55+
_UR_API(urKernelCreate)
56+
_UR_API(urKernelGetInfo)
57+
_UR_API(urKernelGetGroupInfo)
58+
_UR_API(urKernelGetSubGroupInfo)
59+
_UR_API(urKernelRetain)
60+
_UR_API(urKernelRelease)
61+
_UR_API(urKernelGetNativeHandle)
62+
_UR_API(urKernelCreateWithNativeHandle)
63+
_UR_API(urKernelGetSuggestedLocalWorkSize)
64+
_UR_API(urKernelSetArgValue)
65+
_UR_API(urKernelSetArgLocal)
66+
_UR_API(urKernelSetArgPointer)
67+
_UR_API(urKernelSetExecInfo)
68+
_UR_API(urKernelSetArgSampler)
69+
_UR_API(urKernelSetArgMemObj)
70+
_UR_API(urKernelSetSpecializationConstants)
71+
_UR_API(urKernelSuggestMaxCooperativeGroupCountExp)
72+
_UR_API(urQueueGetInfo)
73+
_UR_API(urQueueCreate)
74+
_UR_API(urQueueRetain)
75+
_UR_API(urQueueRelease)
76+
_UR_API(urQueueGetNativeHandle)
77+
_UR_API(urQueueCreateWithNativeHandle)
78+
_UR_API(urQueueFinish)
79+
_UR_API(urQueueFlush)
80+
_UR_API(urSamplerCreate)
81+
_UR_API(urSamplerRetain)
82+
_UR_API(urSamplerRelease)
83+
_UR_API(urSamplerGetInfo)
84+
_UR_API(urSamplerGetNativeHandle)
85+
_UR_API(urSamplerCreateWithNativeHandle)
86+
_UR_API(urMemImageCreate)
87+
_UR_API(urMemBufferCreate)
88+
_UR_API(urMemRetain)
89+
_UR_API(urMemRelease)
90+
_UR_API(urMemBufferPartition)
91+
_UR_API(urMemGetNativeHandle)
92+
_UR_API(urMemBufferCreateWithNativeHandle)
93+
_UR_API(urMemImageCreateWithNativeHandle)
94+
_UR_API(urMemGetInfo)
95+
_UR_API(urMemImageGetInfo)
96+
_UR_API(urPhysicalMemCreate)
97+
_UR_API(urPhysicalMemRetain)
98+
_UR_API(urPhysicalMemRelease)
99+
_UR_API(urAdapterGet)
100+
_UR_API(urAdapterRelease)
101+
_UR_API(urAdapterRetain)
102+
_UR_API(urAdapterGetLastError)
103+
_UR_API(urAdapterGetInfo)
104+
_UR_API(urEnqueueKernelLaunch)
105+
_UR_API(urEnqueueEventsWait)
106+
_UR_API(urEnqueueEventsWaitWithBarrier)
107+
_UR_API(urEnqueueMemBufferRead)
108+
_UR_API(urEnqueueMemBufferWrite)
109+
_UR_API(urEnqueueMemBufferReadRect)
110+
_UR_API(urEnqueueMemBufferWriteRect)
111+
_UR_API(urEnqueueMemBufferCopy)
112+
_UR_API(urEnqueueMemBufferCopyRect)
113+
_UR_API(urEnqueueMemBufferFill)
114+
_UR_API(urEnqueueMemImageRead)
115+
_UR_API(urEnqueueMemImageWrite)
116+
_UR_API(urEnqueueMemImageCopy)
117+
_UR_API(urEnqueueMemBufferMap)
118+
_UR_API(urEnqueueMemUnmap)
119+
_UR_API(urEnqueueUSMFill)
120+
_UR_API(urEnqueueUSMMemcpy)
121+
_UR_API(urEnqueueUSMPrefetch)
122+
_UR_API(urEnqueueUSMAdvise)
123+
_UR_API(urEnqueueUSMFill2D)
124+
_UR_API(urEnqueueUSMMemcpy2D)
125+
_UR_API(urEnqueueDeviceGlobalVariableWrite)
126+
_UR_API(urEnqueueDeviceGlobalVariableRead)
127+
_UR_API(urEnqueueReadHostPipe)
128+
_UR_API(urEnqueueWriteHostPipe)
129+
_UR_API(urEnqueueKernelLaunchCustomExp)
130+
_UR_API(urEnqueueCooperativeKernelLaunchExp)
131+
_UR_API(urEnqueueTimestampRecordingExp)
132+
_UR_API(urEnqueueNativeCommandExp)
133+
_UR_API(urBindlessImagesUnsampledImageHandleDestroyExp)
134+
_UR_API(urBindlessImagesSampledImageHandleDestroyExp)
135+
_UR_API(urBindlessImagesImageAllocateExp)
136+
_UR_API(urBindlessImagesImageFreeExp)
137+
_UR_API(urBindlessImagesUnsampledImageCreateExp)
138+
_UR_API(urBindlessImagesSampledImageCreateExp)
139+
_UR_API(urBindlessImagesImageCopyExp)
140+
_UR_API(urBindlessImagesImageGetInfoExp)
141+
_UR_API(urBindlessImagesMipmapGetLevelExp)
142+
_UR_API(urBindlessImagesMipmapFreeExp)
143+
_UR_API(urBindlessImagesImportExternalMemoryExp)
144+
_UR_API(urBindlessImagesMapExternalArrayExp)
145+
_UR_API(urBindlessImagesMapExternalLinearMemoryExp)
146+
_UR_API(urBindlessImagesReleaseExternalMemoryExp)
147+
_UR_API(urBindlessImagesImportExternalSemaphoreExp)
148+
_UR_API(urBindlessImagesReleaseExternalSemaphoreExp)
149+
_UR_API(urBindlessImagesWaitExternalSemaphoreExp)
150+
_UR_API(urBindlessImagesSignalExternalSemaphoreExp)
151+
_UR_API(urUSMHostAlloc)
152+
_UR_API(urUSMDeviceAlloc)
153+
_UR_API(urUSMSharedAlloc)
154+
_UR_API(urUSMFree)
155+
_UR_API(urUSMGetMemAllocInfo)
156+
_UR_API(urUSMPoolCreate)
157+
_UR_API(urUSMPoolRetain)
158+
_UR_API(urUSMPoolRelease)
159+
_UR_API(urUSMPoolGetInfo)
160+
_UR_API(urUSMPitchedAllocExp)
161+
_UR_API(urUSMImportExp)
162+
_UR_API(urUSMReleaseExp)
163+
_UR_API(urCommandBufferCreateExp)
164+
_UR_API(urCommandBufferRetainExp)
165+
_UR_API(urCommandBufferReleaseExp)
166+
_UR_API(urCommandBufferFinalizeExp)
167+
_UR_API(urCommandBufferAppendKernelLaunchExp)
168+
_UR_API(urCommandBufferAppendUSMMemcpyExp)
169+
_UR_API(urCommandBufferAppendUSMFillExp)
170+
_UR_API(urCommandBufferAppendMemBufferCopyExp)
171+
_UR_API(urCommandBufferAppendMemBufferWriteExp)
172+
_UR_API(urCommandBufferAppendMemBufferReadExp)
173+
_UR_API(urCommandBufferAppendMemBufferCopyRectExp)
174+
_UR_API(urCommandBufferAppendMemBufferWriteRectExp)
175+
_UR_API(urCommandBufferAppendMemBufferReadRectExp)
176+
_UR_API(urCommandBufferAppendMemBufferFillExp)
177+
_UR_API(urCommandBufferAppendUSMPrefetchExp)
178+
_UR_API(urCommandBufferAppendUSMAdviseExp)
179+
_UR_API(urCommandBufferEnqueueExp)
180+
_UR_API(urCommandBufferRetainCommandExp)
181+
_UR_API(urCommandBufferReleaseCommandExp)
182+
_UR_API(urCommandBufferUpdateKernelLaunchExp)
183+
_UR_API(urCommandBufferGetInfoExp)
184+
_UR_API(urCommandBufferCommandGetInfoExp)
185+
_UR_API(urUsmP2PEnablePeerAccessExp)
186+
_UR_API(urUsmP2PDisablePeerAccessExp)
187+
_UR_API(urUsmP2PPeerAccessGetInfoExp)
188+
_UR_API(urVirtualMemGranularityGetInfo)
189+
_UR_API(urVirtualMemReserve)
190+
_UR_API(urVirtualMemFree)
191+
_UR_API(urVirtualMemMap)
192+
_UR_API(urVirtualMemUnmap)
193+
_UR_API(urVirtualMemSetAccess)
194+
_UR_API(urVirtualMemGetInfo)
195+
_UR_API(urDeviceGet)
196+
_UR_API(urDeviceGetInfo)
197+
_UR_API(urDeviceRetain)
198+
_UR_API(urDeviceRelease)
199+
_UR_API(urDevicePartition)
200+
_UR_API(urDeviceSelectBinary)
201+
_UR_API(urDeviceGetNativeHandle)
202+
_UR_API(urDeviceCreateWithNativeHandle)
203+
_UR_API(urDeviceGetGlobalTimestamps)
204+
_UR_API(urLoaderConfigCreate)
205+
_UR_API(urLoaderConfigEnableLayer)
206+
_UR_API(urLoaderConfigGetInfo)
207+
_UR_API(urLoaderConfigRelease)
208+
_UR_API(urLoaderConfigRetain)
209+
_UR_API(urLoaderConfigSetCodeLocationCallback)
210+
_UR_API(urLoaderConfigSetMockingEnabled)
211+
_UR_API(urLoaderInit)
212+
_UR_API(urLoaderTearDown)

scripts/benchmarks/benches/compute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def run(self, env_vars) -> Result:
8282

8383
result = self.run_bench(command, env_vars)
8484
(label, mean) = self.parse_output(result)
85-
return Result(label=label, value=mean, command=command, env=env_vars, stdout=result)
85+
return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
8686

8787
def parse_output(self, output):
8888
csv_file = io.StringIO(output)

scripts/benchmarks/benches/velocity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def run(self, env_vars) -> Result:
6161

6262
result = self.run_bench(command, env_vars)
6363

64-
return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)
64+
return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
6565

6666
def teardown(self):
6767
return

scripts/benchmarks/main.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,46 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
5252
benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())]
5353

5454
for benchmark in benchmarks:
55-
print(f"setting up {benchmark.name()}... ", end='', flush=True)
56-
benchmark.setup()
57-
print("complete.")
55+
try:
56+
print(f"setting up {benchmark.name()}... ", end='', flush=True)
57+
benchmark.setup()
58+
print("complete.")
59+
except Exception as e:
60+
if options.exit_on_failure:
61+
raise e
62+
else:
63+
print(f"failed: {e}")
5864

5965
results = []
6066
for benchmark in benchmarks:
61-
merged_env_vars = {**additional_env_vars}
62-
iteration_results = []
63-
for iter in range(options.iterations):
64-
print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
65-
bench_results = benchmark.run(merged_env_vars)
66-
if bench_results is not None:
67-
print(f"complete ({bench_results.value} {benchmark.unit()}).")
68-
iteration_results.append(bench_results)
67+
try:
68+
merged_env_vars = {**additional_env_vars}
69+
iteration_results = []
70+
for iter in range(options.iterations):
71+
print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
72+
bench_results = benchmark.run(merged_env_vars)
73+
if bench_results is not None:
74+
print(f"complete ({bench_results.value} {benchmark.unit()}).")
75+
iteration_results.append(bench_results)
76+
else:
77+
print(f"did not finish.")
78+
79+
if len(iteration_results) == 0:
80+
continue
81+
82+
iteration_results.sort(key=lambda res: res.value)
83+
median_index = len(iteration_results) // 2
84+
median_result = iteration_results[median_index]
85+
86+
median_result.unit = benchmark.unit()
87+
median_result.name = benchmark.name()
88+
89+
results.append(median_result)
90+
except Exception as e:
91+
if options.exit_on_failure:
92+
raise e
6993
else:
70-
print(f"did not finish.")
71-
72-
if len(iteration_results) == 0:
73-
continue
74-
75-
iteration_results.sort(key=lambda res: res.value)
76-
median_index = len(iteration_results) // 2
77-
median_result = iteration_results[median_index]
78-
79-
median_result.unit = benchmark.unit()
80-
median_result.name = benchmark.name()
81-
82-
results.append(median_result)
94+
print(f"failed: {e}")
8395

8496
for benchmark in benchmarks:
8597
print(f"tearing down {benchmark.name()}... ", end='', flush=True)
@@ -126,6 +138,7 @@ def validate_and_parse_env_args(env_args):
126138
parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
127139
parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)
128140
parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true")
141+
parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true")
129142

130143
args = parser.parse_args()
131144
additional_env_vars = validate_and_parse_env_args(args.env)
@@ -137,6 +150,7 @@ def validate_and_parse_env_args(env_args):
137150
options.timeout = args.timeout
138151
options.ur_dir = args.ur_dir
139152
options.ur_adapter_name = args.ur_adapter_name
153+
options.exit_on_failure = args.exit_on_failure
140154

141155
benchmark_filter = re.compile(args.filter) if args.filter else None
142156

scripts/benchmarks/output.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def generate_summary_table(chart_data: dict[str, list[Result]]):
116116
if key in results:
117117
value = results[key].value
118118
if key == best_key:
119-
row += f" `**{value}**` |" # Highlight the best value
119+
row += f" <ins>{value}</ins> |" # Highlight the best value
120120
else:
121121
row += f" {value} |"
122122
else:
@@ -132,6 +132,7 @@ def generate_markdown(chart_data: dict[str, list[Result]]):
132132

133133
return f"""
134134
# Summary
135+
<ins>result</ins> is better\n
135136
{summary_table}
136137
# Charts
137138
{mermaid_script}

scripts/generate_code.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,26 @@ def _mako_print_cpp(path, namespace, tags, version, specs, meta):
108108
specs=specs,
109109
meta=meta)
110110

111+
112+
def _mako_api_funcs(path, namespace, tags, version, revision, specs, meta):
113+
template = "api_funcs.def.mako"
114+
fin = os.path.join(templates_dir, template)
115+
116+
name = "%s_api_funcs"%(namespace)
117+
filename = "%s.def"%(name)
118+
fout = os.path.join(path, filename)
119+
120+
print("Generating %s..."%fout)
121+
return util.makoWrite(
122+
fin, fout,
123+
name=name,
124+
ver=version,
125+
rev=revision,
126+
namespace=namespace,
127+
tags=tags,
128+
specs=specs,
129+
meta=meta)
130+
111131
"""
112132
generates c/c++ files from the specification documents
113133
"""
@@ -116,6 +136,7 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec
116136
loc += _mako_api_cpp(srcpath, namespace, tags, version, revision, specs, meta)
117137
loc += _mako_ddi_h(incpath, namespace, tags, version, revision, specs, meta)
118138
loc += _mako_print_hpp(incpath, namespace, tags, version, revision, specs, meta)
139+
loc += _mako_api_funcs(incpath, namespace, tags, version, revision, specs, meta)
119140

120141
return loc
121142

0 commit comments

Comments
 (0)