Skip to content

Commit f2e9360

Browse files
authored
Merge pull request #3294 from Pinata-Consulting/log-odb-hashes
logging: log hash alongside elapsed to help find divergent/non-deterministic results
2 parents 1364d16 + b63b64d commit f2e9360

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

flow/test/test_genElapsedTime.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_elapsed_time(self, mock_stdout):
3030
genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
3131
# check if output is correct
3232
expected_output = (
33-
self.tmp_dir.name + "\n1_test 5400 9440\nTotal 5400 9440\n"
33+
self.tmp_dir.name + "\n1_test 5400 9440 N/A\nTotal 5400 9440\n"
3434
).split()
3535
actual_output = mock_stdout.getvalue().split()
3636
self.assertEqual(actual_output, expected_output)
@@ -44,7 +44,7 @@ def test_zero_time(self, mock_stdout):
4444
# call the script with the test log file
4545
genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
4646
expected_output = (
47-
self.tmp_dir.name + "\n1_test 74 9440\nTotal 74 9440\n"
47+
self.tmp_dir.name + "\n1_test 74 9440 N/A\nTotal 74 9440\n"
4848
).split()
4949
actual_output = mock_stdout.getvalue().split()
5050
self.assertEqual(actual_output, expected_output)
@@ -61,7 +61,7 @@ def test_elapsed_time_longer_duration(self, mock_stdout):
6161
genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
6262
# check if output is correct
6363
expected_output = (
64-
self.tmp_dir.name + "\n1_test 744 9440 Total 744 9440"
64+
self.tmp_dir.name + "\n1_test 744 9440 N/A Total 744 9440"
6565
).split()
6666
actual_output = mock_stdout.getvalue().split()
6767
self.assertEqual(actual_output, expected_output)

flow/util/genElapsedTime.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
# in the flow and prints it in a table
55
# ---------------------------------------------------------------------------
66

7+
import argparse
8+
import hashlib
79
import pathlib
810
import os
9-
import argparse # argument parsing
1011
import sys
1112

1213
# Parse and validate arguments
@@ -61,29 +62,48 @@ def print_log_dir_times(logdir, args):
6162
int(line.split("Peak memory: ")[1].split("KB")[0]) / 1024
6263
)
6364

65+
# content hash for .odb file alongside .log file is useful to
66+
# debug divergent results under what should be identical
67+
# builds(such as local and CI builds)
68+
odb_file = pathlib.Path(
69+
str(f).replace("logs/", "results/").replace(".log", ".odb")
70+
)
71+
if odb_file.exists():
72+
hasher = hashlib.sha1()
73+
with open(odb_file, "rb") as odb_f:
74+
while chunk := odb_f.read(16 * 1024 * 1024):
75+
hasher.update(chunk)
76+
odb_hash = hasher.hexdigest()
77+
else:
78+
odb_hash = "N/A"
79+
6480
if not found:
6581
print("No elapsed time found in", str(f), file=sys.stderr)
6682
continue
6783

6884
# Print the name of the step and the corresponding elapsed time
69-
format_str = "%-25s %20s %14s"
85+
format_str = "%-25s %10s %14s %20s"
7086
if elapsedTime is not None and peak_memory is not None:
7187
if first and not args.noHeader:
72-
print(format_str % ("Log", "Elapsed seconds", "Peak Memory/MB"))
88+
print(
89+
format_str
90+
% ("Log", "Elapsed/s", "Peak Memory/MB", "sha1sum .odb [0:20)")
91+
)
7392
first = False
7493
print(
7594
format_str
7695
% (
7796
os.path.splitext(os.path.basename(str(f)))[0],
7897
elapsedTime,
7998
peak_memory,
99+
odb_hash[0:20],
80100
)
81101
)
82102
totalElapsed += elapsedTime
83103
total_max_memory = max(total_max_memory, int(peak_memory))
84104

85105
if totalElapsed != 0:
86-
print(format_str % ("Total", totalElapsed, total_max_memory))
106+
print(format_str % ("Total", totalElapsed, total_max_memory, ""))
87107

88108

89109
def scan_logs(args):

0 commit comments

Comments
 (0)