Skip to content

Commit de280a5

Browse files
committed
Modify save directory structure, amend hostname behavior for github runners
1 parent 186b36e commit de280a5

File tree

4 files changed

+88
-29
lines changed

4 files changed

+88
-29
lines changed

devops/actions/run-tests/benchmark_v2/action.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ runs:
103103
echo "-----"
104104
pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
105105
echo "-----"
106-
mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
107106
108107
case "$ONEAPI_DEVICE_SELECTOR" in
109108
level_zero:*) SAVE_SUFFIX="L0" ;;
@@ -120,8 +119,8 @@ runs:
120119
--sycl "$(realpath ./toolchain)" \
121120
--save "$SAVE_NAME" \
122121
--output-html remote \
123-
--results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
124-
--output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
122+
--results-dir "./llvm-ci-perf-results/" \
123+
--output-dir "./llvm-ci-perf-results/" \
125124
--preset "$PRESET" \
126125
--timestamp-override "$SAVE_TIMESTAMP"
127126
echo "-----"

devops/scripts/benchmarks/compare.py

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ class Compare:
3939
"""Class containing logic for comparisons between results"""
4040
@staticmethod
4141
def get_hist_avg(
42-
result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian,
43-
exclude: list[str] = []
42+
result_name: str, result_dir: str, hostname: str, cutoff: str,
43+
aggregator: Aggregator = SimpleMedian, exclude: list[str] = []
4444
) -> dict[str, BenchmarkHistoricAverage]:
4545
"""
4646
Create a historic average for results named result_name in result_dir
@@ -51,6 +51,7 @@ def get_hist_avg(
5151
result_dir (str): Path to folder containing benchmark results
5252
cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
5353
average calcultaion
54+
hostname (str): Hostname of machine on which results ran on
5455
aggregator (Aggregator): The aggregator to use for calculating the
5556
historic average
5657
exclude (list[str]): List of filenames (only the stem) to exclude
@@ -60,14 +61,21 @@ def get_hist_avg(
6061
A dictionary mapping benchmark names to BenchmarkHistoricAverage
6162
objects
6263
"""
64+
if not Validate.timestamp(cutoff):
65+
raise ValueError("Provided cutoff time is not a proper timestamp.")
66+
6367
def get_timestamp(f: str) -> str:
6468
"""Extract timestamp from result filename"""
6569
return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
6670

6771
def get_result_paths() -> list[str]:
6872
"""
6973
Get a list of all results matching result_name in result_dir that is
70-
newer than the timestamp specified by cutoff
74+
newer than the timestamp specified by cutoff based off of filename.
75+
76+
This function assumes filenames of benchmark result files are
77+
accurate; files returned by this function will be checked a second
78+
time once their contents are actually loaded.
7179
"""
7280
cache_dir = Path(f"{result_dir}")
7381

@@ -84,6 +92,23 @@ def get_result_paths() -> list[str]:
8492
cache_dir.glob(f"{result_name}_*_*.json")
8593
)
8694
)
95+
96+
def check_benchmark_result(result: BenchmarkRun) -> bool:
97+
"""
98+
Returns True if result file:
99+
- Was ran on the target machine/hostname specified
100+
- Sanity check: ensure metadata are all expected values:
101+
- Date is truly before cutoff timestamp
102+
- Name truly matches up with specified result_name
103+
"""
104+
if result.hostname != hostname:
105+
return False
106+
if result.name != result_name:
107+
print(f"Warning: Result file {result_path} does not match specified result name {result.name}.")
108+
return False
109+
if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S"):
110+
return False
111+
return True
87112

88113
# key: name of the benchmark test result
89114
# value: { command_args: set[str], aggregate: Aggregator }
@@ -95,9 +120,13 @@ def get_result_paths() -> list[str]:
95120
for result_path in get_result_paths():
96121
with result_path.open('r') as result_f:
97122
result = BenchmarkRun.from_json(json.load(result_f))
98-
99-
if result.name != result_name:
100-
print(f"Warning: Result file {result_path} has mismatching name {result.name}. Skipping file.")
123+
124+
# Perform another check on result file here, as get_result_paths()
125+
# only filters out result files via filename, which:
126+
# - does not contain enough information to filter out results, i.e.
127+
# no hostname information.
128+
# - information in filename may be mismatched from metadata.
129+
if not check_benchmark_result(result):
101130
continue
102131

103132
for test_run in result.results:
@@ -139,26 +168,25 @@ def reset_aggregate() -> dict:
139168

140169

141170
def to_hist_avg(
142-
hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str
171+
hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun
143172
) -> tuple:
144173
"""
145-
Compare results in compare_file to a pre-existing map of historic
146-
averages
174+
Compare results in target to a pre-existing map of historic average.
175+
176+
Caution: Ensure the generated hist_avg is for results running on the
177+
same host as target.hostname.
147178
148179
Args:
149180
hist_avg (dict): A historic average map generated from get_hist_avg
150-
compare_file (str): Full filepath of result to compare against
181+
target (BenchmarkRun): results to compare against hist_avg
151182
152183
Returns:
153184
A tuple returning (list of improved tests, list of regressed tests).
154185
"""
155-
with open(compare_file, 'r') as compare_f:
156-
compare_result = BenchmarkRun.from_json(json.load(compare_f))
157-
158186
improvement = []
159187
regression = []
160188

161-
for test in compare_result.results:
189+
for test in target.results:
162190
if test.name not in hist_avg:
163191
continue
164192
if hist_avg[test.name].command_args != set(test.command[1:]):
@@ -186,10 +214,9 @@ def perf_diff_entry() -> dict:
186214
return improvement, regression
187215

188216

189-
190217
def to_hist(
191-
avg_type: str, result_name: str, compare_file: str, result_dir: str, cutoff: str,
192-
218+
avg_type: str, result_name: str, compare_file: str, result_dir: str,
219+
cutoff: str,
193220
) -> tuple:
194221
"""
195222
Pregenerate a historic average from results named result_name in
@@ -213,17 +240,33 @@ def to_hist(
213240
"""
214241

215242
if avg_type != "median":
216-
print("Only median is currently supported: refusing to continue.")
243+
print("Only median is currently supported: Refusing to continue.")
244+
exit(1)
245+
246+
try:
247+
with open(compare_file, 'r') as compare_f:
248+
compare_result = BenchmarkRun.from_json(json.load(compare_f))
249+
except:
250+
print(f"Unable to open {compare_file}.")
251+
exit(1)
252+
253+
# Sanity checks:
254+
if compare_result.hostname == "Unknown":
255+
print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.")
256+
exit(1)
257+
if not Validate.timestamp(cutoff):
258+
print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
217259
exit(1)
218260

219-
# TODO call validator on cutoff timestamp
261+
# Build historic average and compare results against historic average:
220262
hist_avg = Compare.get_hist_avg(
221263
result_name,
222264
result_dir,
265+
compare_result.hostname,
223266
cutoff,
224267
exclude=[Path(compare_file).stem]
225268
)
226-
return Compare.to_hist_avg(hist_avg, compare_file)
269+
return Compare.to_hist_avg(hist_avg, compare_result)
227270

228271

229272
if __name__ == "__main__":

devops/scripts/benchmarks/history.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,31 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
8080
except:
8181
git_hash = "unknown"
8282
github_repo = None
83+
84+
# Check if RUNNER_NAME environment variable has been declared.
85+
#
86+
# RUNNER_NAME is always present in github runner environments. Because
87+
# github runners obfusicate hostnames, using socket.gethostname()
88+
# produces different hostnames when ran on the same machine multiple
89+
# times. Thus, we rely on the RUNNER_NAME variable when running on
90+
# github runners.
91+
hostname = os.getenv("RUNNER_NAME")
92+
if hostname is None:
93+
hostname = socket.gethostname()
94+
else if not Validate.runner_name(hostname):
95+
# However, nothing stops github runner env variables (including
96+
# RUNNER_NAME) from being modified by external actors. Ensure
97+
# RUNNER_NAME contains nothing malicious:
98+
# TODO is this overkill?
99+
raise ValueError("Illegal characters found in specified RUNNER_NAME.")
83100

84101
return BenchmarkRun(
85102
name=name,
86103
git_hash=git_hash,
87104
github_repo=github_repo,
88105
date=datetime.now(tz=timezone.utc),
89106
results=results,
90-
hostname=socket.gethostname(),
107+
hostname=hostname,
91108
)
92109

93110
def save(self, save_name, results: list[Result], to_file=True):

devops/scripts/benchmarks/utils/validate.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ class Validate:
44
"""Static class containing methods for validating various fields"""
55

66
@staticmethod
7-
def filepath(path: str) -> bool:
7+
def runner_name(runner_name: str) -> bool:
88
"""
9-
Returns True if path is clean (no illegal characters), otherwise False.
9+
Returns True if runner_name is clean (no illegal characters).
1010
"""
11-
filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
12-
return filepath_re.match(path) is not None
11+
runner_name_re = re.compile(r"[a-zA-Z0-9_]+")
12+
return runner_name_re.match(runner_name) is not None
1313

1414
@staticmethod
1515
def timestamp(t: str) -> bool:
@@ -19,4 +19,4 @@ def timestamp(t: str) -> bool:
1919
timestamp_re = re.compile(
2020
r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
2121
)
22-
return timestamp_re.match(t) is not None
22+
return timestamp_re.match(t) is not None

0 commit comments

Comments
 (0)