Skip to content

Commit 7d54aff

Browse files
NB4444MyNameIsTrezNaraenda
authored
SPIR-V compatibility update (#76)
The changes in this PR, should fix the SPIR-V failures and added some documentation how it works. There are also two commits for some CI changes: 2f85963 e32fd1e --------- Co-authored-by: Sander Bos <[email protected]> Co-authored-by: Nara Prasetya <[email protected]>
1 parent c8cfad1 commit 7d54aff

File tree

22 files changed

+1045
-127
lines changed

22 files changed

+1045
-127
lines changed

projects/rocprim/.gitlab-ci.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,11 @@ benchmark:
610610
--benchmark_filename_regex "${BENCHMARK_FILENAME_REGEX}"
611611
--benchmark_filter_regex "${BENCHMARK_ALGORITHM_REGEX}"
612612
--seed "${BENCHMARK_SEED}"
613+
- python3
614+
.gitlab/report_noise.py
615+
--benchmark_json_dir "${BENCHMARK_RESULT_DIR}"
616+
--noise_threshold_percentage 1.0
617+
--accept_high_noise
613618
artifacts:
614619
paths:
615620
- ${BENCHMARK_RESULT_DIR}
@@ -698,6 +703,11 @@ autotune:execute-tuning:
698703
--size="${AUTOTUNE_SIZE}"
699704
--trials="${AUTOTUNE_TRIALS}"
700705
--seed=82589933
706+
- python3
707+
.gitlab/report_noise.py
708+
--benchmark_json_dir "${AUTOTUNE_RESULT_DIR}"
709+
--noise_threshold_percentage 1.0
710+
--accept_high_noise
701711

702712
autotune:generate-config:
703713
image: python:3.10.5-buster
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
# THE SOFTWARE.
22+
23+
24+
import argparse
25+
import json
26+
import os
27+
import re
28+
import stat
29+
import statistics
30+
import sys
31+
32+
33+
class colors:
34+
OK = "\033[92m"
35+
FAIL = "\033[91m"
36+
END_COLOR = "\033[0m"
37+
38+
39+
def print_results(results):
40+
# Store the length of the longest value in a column
41+
longest = {
42+
"name": max(len(result["name"]) for result in results),
43+
"noisy_permutations": max(
44+
len(result["noisy_permutations"]) for result in results
45+
),
46+
"mean": max(len(result["mean"]) for result in results),
47+
"median": max(len(result["median"]) for result in results),
48+
"max": max(len(result["max"]) for result in results),
49+
"batch": max(len(result["batch"]) for result in results),
50+
"warmup": max(len(result["warmup"]) for result in results),
51+
"bytes": max(len(result["bytes"]) for result in results),
52+
}
53+
54+
# The name of a column can be longer than its values
55+
longest = {key: max(value, len(key)) for key, value in longest.items()}
56+
57+
printed = "name".ljust(longest["name"] + 1)
58+
printed += "noisy permutations".ljust(longest["noisy_permutations"] + 1)
59+
printed += "mean".ljust(longest["mean"] + 1)
60+
printed += "median".ljust(longest["median"] + 1)
61+
printed += "max".ljust(longest["max"] + 1)
62+
printed += "batch".ljust(longest["batch"] + 1)
63+
printed += "warmup".ljust(longest["warmup"] + 1)
64+
printed += "bytes".ljust(longest["bytes"] + 1)
65+
printed += "seed"
66+
print(printed)
67+
68+
for result in results:
69+
printed = result["name"].ljust(longest["name"])
70+
71+
printed += " "
72+
printed += colors.FAIL if result["noisy"] else colors.OK
73+
printed += (
74+
f'{result["noisy_permutations"].ljust(longest["noisy_permutations"])}'
75+
)
76+
printed += colors.END_COLOR
77+
78+
printed += " "
79+
printed += colors.FAIL if result["bad_mean"] else colors.OK
80+
printed += result["mean"].ljust(longest["mean"])
81+
printed += colors.END_COLOR
82+
83+
printed += " "
84+
printed += colors.FAIL if result["bad_median"] else colors.OK
85+
printed += result["median"].ljust(longest["median"])
86+
printed += colors.END_COLOR
87+
88+
printed += " "
89+
printed += colors.FAIL if result["bad_max"] else colors.OK
90+
printed += result["max"].ljust(longest["max"])
91+
printed += colors.END_COLOR
92+
93+
printed += " "
94+
printed += colors.FAIL if result["bad_batch"] else colors.OK
95+
printed += result["batch"].ljust(longest["batch"])
96+
printed += colors.END_COLOR
97+
98+
printed += " "
99+
printed += colors.FAIL if result["bad_warmup"] else colors.OK
100+
printed += result["warmup"].ljust(longest["warmup"])
101+
printed += colors.END_COLOR
102+
103+
printed += " "
104+
printed += colors.FAIL if result["bad_bytes"] else colors.OK
105+
printed += result["bytes"].ljust(longest["bytes"])
106+
printed += colors.END_COLOR
107+
108+
printed += " "
109+
printed += colors.FAIL if result["seed"] == "random" else colors.OK
110+
printed += result["seed"]
111+
printed += colors.END_COLOR
112+
113+
print(printed)
114+
115+
116+
def get_results(benchmarks, threshold):
117+
def get_humanized_bytes(size):
118+
for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
119+
if size < 1024.0 or unit == "PiB":
120+
break
121+
size /= 1024.0
122+
return f"{size:.1f} {unit}"
123+
124+
success = True
125+
126+
results = []
127+
128+
for benchmark in benchmarks:
129+
data = benchmark["data"]
130+
131+
name = benchmark["name"]
132+
133+
permutations = data["benchmarks"]
134+
135+
cvs = [permutation["cv"] for permutation in permutations]
136+
137+
# The cv (coefficient of variation) is a standard way of quantifying noise
138+
noises = sum(cv * 100 > threshold for cv in cvs)
139+
noisy = noises > 0
140+
141+
if noisy:
142+
success = False
143+
144+
context = data["context"]
145+
146+
noisy_permutations = f"{noises}/{len(permutations)}"
147+
148+
mean = statistics.mean(cvs)
149+
median = statistics.median(cvs)
150+
max_ = max(cvs)
151+
152+
batch = context["batch_iterations"]
153+
warmup = context["warmup_iterations"]
154+
155+
bytes_ = int(context["size"])
156+
seed = context["seed"]
157+
158+
results.append(
159+
{
160+
"name": name,
161+
"noisy": noisy,
162+
"noisy_permutations": noisy_permutations,
163+
"bad_mean": mean * 100 > threshold,
164+
"mean": f"{mean:.1%}",
165+
"bad_median": median * 100 > threshold,
166+
"median": f"{median:.1%}",
167+
"bad_max": max_ * 100 > threshold,
168+
"max": f"{max_:.1%}",
169+
"bad_batch": int(batch) < 10,
170+
"batch": batch,
171+
"bad_warmup": int(warmup) < 5,
172+
"warmup": warmup,
173+
"bad_bytes": 0 < bytes_ < 128 * 1024 * 1024, # 128 MiB
174+
"bytes": get_humanized_bytes(int(context["size"])),
175+
"seed": seed,
176+
}
177+
)
178+
179+
return results, success
180+
181+
182+
def load_benchmarks(benchmark_json_dir):
183+
def is_benchmark_json(filename):
184+
if not re.match(r".*\.json$", filename):
185+
return False
186+
path = os.path.join(benchmark_json_dir, filename)
187+
st_mode = os.stat(path).st_mode
188+
189+
# we are not interested in permissions, just whether it is a regular file (S_IFREG)
190+
return st_mode & stat.S_IFREG
191+
192+
benchmark_names = [
193+
name for name in os.listdir(benchmark_json_dir) if is_benchmark_json(name)
194+
]
195+
196+
success = True
197+
benchmarks = []
198+
for benchmark_name in benchmark_names:
199+
with open(os.path.join(benchmark_json_dir, benchmark_name)) as f:
200+
try:
201+
benchmarks.append({"name": benchmark_name, "data": json.load(f)})
202+
except json.JSONDecodeError as e:
203+
print(
204+
f"{colors.FAIL}Failed to load {benchmark_name}{colors.END_COLOR}: {e}\n",
205+
file=sys.stderr,
206+
)
207+
success = False
208+
209+
return benchmarks, success
210+
211+
212+
def main():
213+
parser = argparse.ArgumentParser()
214+
parser.add_argument(
215+
"--noise_threshold_percentage",
216+
help="The noise threshold percentage, past which benchmark permutations are considered to be too noisy",
217+
required=True,
218+
type=float,
219+
)
220+
parser.add_argument(
221+
"--benchmark_json_dir",
222+
help="The directory of benchmark JSON files, which to report the noise of",
223+
required=True,
224+
)
225+
parser.add_argument(
226+
"--accept_high_noise",
227+
help="Don't call exit(1) when there is a noisy benchmark permutation",
228+
action=argparse.BooleanOptionalAction,
229+
)
230+
args = parser.parse_args()
231+
232+
print(f"The noise threshold is {args.noise_threshold_percentage:.1f}%\n")
233+
234+
benchmarks, load_success = load_benchmarks(args.benchmark_json_dir)
235+
results, results_success = get_results(benchmarks, args.noise_threshold_percentage)
236+
237+
print_results(results)
238+
239+
if not load_success:
240+
return False
241+
if args.accept_high_noise:
242+
return True
243+
return results_success
244+
245+
246+
if __name__ == "__main__":
247+
success = main()
248+
if success:
249+
exit(0)
250+
else:
251+
exit(1)

projects/rocprim/.gitlab/run_benchmarks.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
# Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
3+
# Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
44
#
55
# Permission is hereby granted, free of charge, to any person obtaining a copy
66
# of this software and associated documentation files (the "Software"), to deal
@@ -51,9 +51,6 @@ def is_benchmark_executable(filename):
5151
results_json_path = os.path.join(benchmark_context.benchmark_output_dir, results_json_name)
5252
args = [
5353
benchmark_path,
54-
'--name_format',
55-
'json',
56-
'--benchmark_out_format=json',
5754
f'--benchmark_out={results_json_path}',
5855
f'--benchmark_filter={benchmark_context.benchmark_filter_regex}'
5956
]

projects/rocprim/benchmark/benchmark_utils.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,8 +1345,8 @@ class executor
13451345
parser.set_optional<int>("trials", "trials", default_trials, "number of iterations");
13461346
parser.set_optional<std::string>("name_format",
13471347
"name_format",
1348-
"human",
1349-
"either: json,human,txt");
1348+
"json",
1349+
"either json, human, or txt");
13501350

13511351
// Optionally run an evenly split subset of benchmarks for autotuning.
13521352
parser.set_optional<int>("parallel_instance",

projects/rocprim/docs/block_ops/data_mov_funcs.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,18 @@ Store
6767

6868
.. doxygengroup:: blockmodule_warp_store_functions
6969
:content-only:
70+
71+
Direct Blocked Cast
72+
====================
73+
74+
Load
75+
---------
76+
77+
.. doxygengroup:: blockmodule_cast_load_functions
78+
:content-only:
79+
80+
Store
81+
----------
82+
83+
.. doxygengroup:: blockmodule_cast_store_functions
84+
:content-only:

0 commit comments

Comments
 (0)