Skip to content

Commit c69e874

Browse files
committed
Properly load repo/commit information in CI
1 parent 60d80a9 commit c69e874

File tree

5 files changed

+167
-62
lines changed

5 files changed

+167
-62
lines changed

devops/actions/run-tests/benchmark_v2/action.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,19 @@ runs:
110110
pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
111111
echo "-----"
112112
113+
# clang builds have git repo / commit hashes in their --version output,
114+
# same goes for dpcpp. Obtain git repo / commit hash info this way:
115+
116+
# First line of --version is formatted 'clang version ... (<repo> <commit>)'
117+
# thus we parse for (<repo> <commit>):
118+
sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()'
119+
if [ -z "$sycl_git_info" ]; then
120+
echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
121+
exit 1
122+
fi
123+
sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
124+
sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
125+
113126
case "$ONEAPI_DEVICE_SELECTOR" in
114127
level_zero:*) SAVE_SUFFIX="L0" ;;
115128
level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
@@ -128,10 +141,13 @@ runs:
128141
--results-dir "./llvm-ci-perf-results/" \
129142
--output-dir "./llvm-ci-perf-results/" \
130143
--preset "$PRESET" \
131-
--timestamp-override "$SAVE_TIMESTAMP"
144+
# CI options:
145+
--timestamp-override "$SAVE_TIMESTAMP" \
146+
--sycl-github-repo "$sycl_git_repo" \
147+
--sycl-commit "$sycl_git_commit"
132148
echo "-----"
133149
python3 ./devops/scripts/benchmarks/compare.py to_hist \
134-
--name Baseline_PVC_L0 \
150+
--name "$SAVE_NAME" \
135151
--compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
136152
--results-dir "./llvm-ci-perf-results/results/"
137153

devops/scripts/benchmarks/history.py

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -58,45 +58,58 @@ def extract_timestamp(file_path: Path) -> str:
5858
self.runs = benchmark_runs
5959

6060
def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
61-
try:
62-
script_dir = os.path.dirname(os.path.abspath(__file__))
63-
result = run("git rev-parse --short HEAD", cwd=script_dir)
64-
git_hash = result.stdout.decode().strip()
65-
66-
# Get the GitHub repo URL from git remote
67-
remote_result = run("git remote get-url origin", cwd=script_dir)
68-
remote_url = remote_result.stdout.decode().strip()
69-
70-
# Convert SSH or HTTPS URL to owner/repo format
71-
if remote_url.startswith("[email protected]:"):
72-
# SSH format: [email protected]:owner/repo.git
73-
github_repo = remote_url.split("[email protected]:")[1].rstrip(".git")
74-
elif remote_url.startswith("https://github.com/"):
75-
# HTTPS format: https://github.com/owner/repo.git
76-
github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
77-
else:
61+
62+
def git_info_from_path(path: Path) -> (str, str):
63+
"""
64+
Derives git repo, commit information from git repo located in path.
65+
66+
Returns:
67+
(str, str): git_hash, github_repo
68+
"""
69+
try:
70+
result = run("git rev-parse --short HEAD", cwd=path)
71+
git_hash = result.stdout.decode().strip()
72+
73+
# Get the GitHub repo URL from git remote
74+
remote_result = run("git remote get-url origin", cwd=path)
75+
remote_url = remote_result.stdout.decode().strip()
76+
77+
# Convert SSH or HTTPS URL to owner/repo format
78+
if remote_url.startswith("[email protected]:"):
79+
# SSH format: [email protected]:owner/repo.git
80+
github_repo = remote_url.split("[email protected]:")[1].rstrip(".git")
81+
elif remote_url.startswith("https://github.com/"):
82+
# HTTPS format: https://github.com/owner/repo.git
83+
github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
84+
else:
85+
github_repo = None
86+
87+
except:
88+
git_hash = "unknown"
7889
github_repo = None
90+
91+
return git_hash, github_repo
7992

80-
except:
81-
git_hash = "unknown"
82-
github_repo = None
93+
if options.sycl_commit is None or options.sycl_github_repo is None:
94+
git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__)))
95+
else:
96+
git_hash, github_repo = options.sycl_commit, options.sycl_github_repo
8397

8498
# Check if RUNNER_NAME environment variable has been declared.
8599
#
86-
# RUNNER_NAME is always present in github runner environments. Because
87-
# github runners obfusicate hostnames, using socket.gethostname()
88-
# produces different hostnames when ran on the same machine multiple
89-
# times. Thus, we rely on the RUNNER_NAME variable when running on
90-
# github runners.
100+
# Github runners obfusicate hostnames, thus running socket.gethostname()
101+
# twice produces two different hostnames. Since github runners always
102+
# define a RUNNER_NAME variable, use RUNNER_NAME instead if it exists:
91103
hostname = os.getenv("RUNNER_NAME")
92104
if hostname is None:
93105
hostname = socket.gethostname()
94-
elif not Validate.runner_name(hostname):
95-
# However, nothing stops github runner env variables (including
96-
# RUNNER_NAME) from being modified by external actors. Ensure
97-
# RUNNER_NAME contains nothing malicious:
106+
else:
107+
# Ensure RUNNER_NAME has not been tampered with:
98108
# TODO is this overkill?
99-
raise ValueError("Illegal characters found in specified RUNNER_NAME.")
109+
Validate.runner_name(
110+
hostname,
111+
throw=ValueError("Illegal characters found in specified RUNNER_NAME.")
112+
)
100113

101114
return BenchmarkRun(
102115
name=name,

devops/scripts/benchmarks/main.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -476,29 +476,52 @@ def validate_and_parse_env_args(env_args):
476476
help="Benchmark preset to run",
477477
default=options.preset,
478478
)
479-
parser.add_argument(
480-
"--results-dir",
481-
type=str,
482-
help="Specify a custom directory to load/store (historical) results from",
483-
default=options.results_directory_override,
484-
)
485479
parser.add_argument(
486480
"--build-jobs",
487481
type=int,
488482
help="Number of build jobs to run simultaneously",
489483
default=options.build_jobs,
490484
)
491485
parser.add_argument(
492-
"--timestamp-override",
486+
"--hip-arch",
493487
type=str,
494-
help="Used in CI to enforce use of same timestamp across scripts",
488+
help="HIP device architecture",
495489
default=None,
496490
)
491+
492+
# Options intended for CI:
497493
parser.add_argument(
498-
"--hip-arch",
494+
"--results-dir",
499495
type=str,
500-
help="HIP device architecture",
501-
default=None,
496+
help="Specify a custom directory to load/store (historical) results from",
497+
default=options.results_directory_override,
498+
)
499+
parser.add_argument(
500+
"--timestamp-override",
501+
type=lambda ts: Validate.timestamp(
502+
ts,
503+
throw=argparse.ArgumentTypeError("Specified timestamp not in YYYYMMDD_HHMMSS format.")
504+
),
505+
help="Manually specify timestamp used in metadata",
506+
default=options.timestamp_override,
507+
)
508+
parser.add_argument(
509+
"--sycl-github-repo",
510+
type=lambda gh_repo: Validate.github_repo(
511+
gh_repo,
512+
throw=argparse.ArgumentTypeError("Specified SYCL github repo not in <owner>/<repo> format.")
513+
),
514+
help="Manually specify SYCL github repo used in metadata",
515+
default=options.sycl_github_repo,
516+
)
517+
parser.add_argument(
518+
"--sycl-commit",
519+
type=lambda commit: Validate.commit_hash(
520+
commit,
521+
throw=argparse.ArgumentTypeError("Specified SYCL commit is not a valid commit hash.")
522+
),
523+
help="Manually specify commit hash used to build SYCL in metadata",
524+
default=options.sycl_commit,
502525
)
503526

504527
args = parser.parse_args()
@@ -539,14 +562,18 @@ def validate_and_parse_env_args(env_args):
539562
if not os.path.isdir(args.output_dir):
540563
parser.error("Specified --output-dir is not a valid path")
541564
options.output_directory = os.path.abspath(args.output_dir)
542-
if args.timestamp_override is not None:
543-
if not Validate.timestamp(args.timestamp_override):
544-
parser.error("--timestamp_override is not a valid timestamp")
545-
options.timestamp_override = args.timestamp_override
565+
566+
# Options intended for CI:
567+
options.timestamp_override = args.timestamp_override
546568
if args.results_dir is not None:
547569
if not os.path.isdir(args.results_dir):
548570
parser.error("Specified --results-dir is not a valid path")
549571
options.results_directory_override = os.path.abspath(args.results_dir)
572+
if args.sycl_github_repo is not None or args.sycl_commit is not None:
573+
if args.sycl_github_repo is None or args.sycl_commit is None:
574+
parser.error("--sycl-github-repo and --sycl-commit must both be defined together")
575+
options.sycl_github_repo = args.sycl_github_repo
576+
options.sycl_commit = args.sycl_commit
550577

551578
benchmark_filter = re.compile(args.filter) if args.filter else None
552579

devops/scripts/benchmarks/options.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,23 @@ class Options:
4646
preset: str = "Full"
4747
build_jobs: int = multiprocessing.cpu_count()
4848

49-
# Options applicable to CI only:
49+
# Options intended for CI:
5050
regression_threshold: float = 0.05
51-
# In CI, it may be necessary to e.g. compare or redo benchmark runs.
52-
# A timestamp is generated at the beginning of the CI run and used through
53-
# the entire CI process, instead of scripts generating their own timestamps
54-
# every time a script runs (default behavior).
51+
# It's necessary in CI to compare or redo benchmark runs. Instead of
52+
# generating a new timestamp each run by default, specify a single timestamp
53+
# to use across the entire CI run.
5554
timestamp_override: str = None
56-
# By default, the directory to fetch results from is the benchmark working
57-
# directory specified in the CLI args, hence a default value of "None" as
58-
# the value is decided via runtime.
55+
# The default directory to fetch results from is args.benchmark_directory,
56+
# hence a default value of "None" as the value is decided during runtime.
5957
#
6058
# However, sometimes you may want to fetch results from a different
6159
# directory, i.e. in CI when you clone the results directory elsewhere.
6260
results_directory_override: str = None
61+
# By default, we fetch SYCL commit info from the folder where main.py is
62+
# located. This doesn't work right when CI uses different commits for e.g.
63+
# CI scripts vs SYCl build source.
64+
sycl_github_repo: str = None
65+
sycl_commit: str = None
6366

6467

6568
options = Options()
Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,68 @@
11
import re
22

3+
def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None):
4+
"""
5+
Returns True if val is matched by pattern defined by regex, otherwise False.
6+
7+
If `throw` argument is not None: return val as-is if val matches regex,
8+
otherwise raise error defined by throw.
9+
"""
10+
is_matching: bool = re.compile(regex).match(val) is not None
11+
12+
if throw is None: return is_matching
13+
elif not is_matching: raise throw
14+
else: return val
15+
16+
317
class Validate:
418
"""Static class containing methods for validating various fields"""
519

620
@staticmethod
7-
def runner_name(runner_name: str) -> bool:
21+
def runner_name(runner_name: str, throw: Exception = None):
822
"""
923
Returns True if runner_name is clean (no illegal characters).
1024
"""
11-
runner_name_re = re.compile(r"[a-zA-Z0-9_]+")
12-
return runner_name_re.match(runner_name) is not None
25+
return validate_on_re(runner_name, r"^[a-zA-Z0-9_]+$", throw=throw)
1326

1427
@staticmethod
15-
def timestamp(t: str) -> bool:
28+
def timestamp(t: str, throw: Exception = None):
1629
"""
1730
Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
31+
32+
If throw argument is specified: return t as-is if t is in aforementioned
33+
format, otherwise raise error defined by throw.
34+
"""
35+
return validate_on_re(
36+
t,
37+
r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$",
38+
throw=throw
39+
)
40+
41+
@staticmethod
42+
def github_repo(repo: str, throw: Exception = None):
43+
"""
44+
Returns True if repo is of form <owner>/<repo name>
45+
46+
If throw argument is specified: return repo as-is if repo is in
47+
aforementioned format, otherwise raise error defined by throw.
1848
"""
19-
timestamp_re = re.compile(
20-
r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
49+
return validate_on_re(
50+
re.sub(r"^https?://github.com/", "", repo),
51+
r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$",
52+
throw=throw
2153
)
22-
return timestamp_re.match(t) is not None
54+
55+
@staticmethod
56+
def commit_hash(commit: str, throw: Exception = None, trunc: int = 40):
57+
"""
58+
Returns True if commit is a valid git commit hash.
59+
60+
If throw argument is specified: return commit hash (truncated to trunc
61+
chars long) if commit is a valid commit hash, otherwise raise error
62+
defined by throw.
63+
"""
64+
commit_re = r"^[a-f0-9]{7,40}$"
65+
if throw is None:
66+
return validate_on_re(commit, commit_re)
67+
else:
68+
return validate_on_re(commit, commit_re, throw=throw)[:trunc]

0 commit comments

Comments
 (0)