Merge pull request #3294 from Pinata-Consulting/log-odb-hashes

maliberty · web-flow · commit f2e936095c12 · 2025-07-07T18:49:38.000Z
logging: log hash alongside elapsed to help find divergent/non-deterministic results
diff --git a/flow/test/test_genElapsedTime.py b/flow/test/test_genElapsedTime.py
@@ -30,7 +30,7 @@ def test_elapsed_time(self, mock_stdout):
         genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
         # check if output is correct
         expected_output = (
-            self.tmp_dir.name + "\n1_test 5400 9440\nTotal 5400 9440\n"
+            self.tmp_dir.name + "\n1_test 5400 9440 N/A\nTotal 5400 9440\n"
         ).split()
         actual_output = mock_stdout.getvalue().split()
         self.assertEqual(actual_output, expected_output)
@@ -44,7 +44,7 @@ def test_zero_time(self, mock_stdout):
         # call the script with the test log file
         genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
         expected_output = (
-            self.tmp_dir.name + "\n1_test 74 9440\nTotal 74 9440\n"
+            self.tmp_dir.name + "\n1_test 74 9440 N/A\nTotal 74 9440\n"
         ).split()
         actual_output = mock_stdout.getvalue().split()
         self.assertEqual(actual_output, expected_output)
@@ -61,7 +61,7 @@ def test_elapsed_time_longer_duration(self, mock_stdout):
         genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"])
         # check if output is correct
         expected_output = (
-            self.tmp_dir.name + "\n1_test 744 9440 Total 744 9440"
+            self.tmp_dir.name + "\n1_test 744 9440 N/A Total 744 9440"
         ).split()
         actual_output = mock_stdout.getvalue().split()
         self.assertEqual(actual_output, expected_output)
diff --git a/flow/util/genElapsedTime.py b/flow/util/genElapsedTime.py
@@ -4,9 +4,10 @@
 # in the flow and prints it in a table
 # ---------------------------------------------------------------------------
 
+import argparse
+import hashlib
 import pathlib
 import os
-import argparse  # argument parsing
 import sys
 
 # Parse and validate arguments
@@ -61,29 +62,48 @@ def print_log_dir_times(logdir, args):
                         int(line.split("Peak memory: ")[1].split("KB")[0]) / 1024
                     )
 
+            # content hash for .odb file alongside .log file is useful to
+            # debug divergent results under what should be identical
+            # builds(such as local and CI builds)
+            odb_file = pathlib.Path(
+                str(f).replace("logs/", "results/").replace(".log", ".odb")
+            )
+            if odb_file.exists():
+                hasher = hashlib.sha1()
+                with open(odb_file, "rb") as odb_f:
+                    while chunk := odb_f.read(16 * 1024 * 1024):
+                        hasher.update(chunk)
+                odb_hash = hasher.hexdigest()
+            else:
+                odb_hash = "N/A"
+
             if not found:
                 print("No elapsed time found in", str(f), file=sys.stderr)
                 continue
 
         # Print the name of the step and the corresponding elapsed time
-        format_str = "%-25s %20s %14s"
+        format_str = "%-25s %10s %14s %20s"
         if elapsedTime is not None and peak_memory is not None:
             if first and not args.noHeader:
-                print(format_str % ("Log", "Elapsed seconds", "Peak Memory/MB"))
+                print(
+                    format_str
+                    % ("Log", "Elapsed/s", "Peak Memory/MB", "sha1sum .odb [0:20)")
+                )
                 first = False
             print(
                 format_str
                 % (
                     os.path.splitext(os.path.basename(str(f)))[0],
                     elapsedTime,
                     peak_memory,
+                    odb_hash[0:20],
                 )
             )
         totalElapsed += elapsedTime
         total_max_memory = max(total_max_memory, int(peak_memory))
 
     if totalElapsed != 0:
-        print(format_str % ("Total", totalElapsed, total_max_memory))
+        print(format_str % ("Total", totalElapsed, total_max_memory, ""))
 
 
 def scan_logs(args):