[libc++] Improve handling of runtime errors inside SPEC benchmarks

ldionne · mahesh-attarde · commit b2ce8533e4bc · 2025-10-02T21:13:44.000-07:00
Previously, we would report a successful run if the benchmark exited
with an error, and we would produce a timing for the benchmark. After
this patch, we consider an error in the benchmark to be a failed LIT
test and we don't produce any benchmark data for it.
diff --git a/libcxx/test/benchmarks/spec.gen.py b/libcxx/test/benchmarks/spec.gen.py
@@ -72,7 +72,12 @@
     print(f'RUN: %{{spec_dir}}/bin/runcpu --config %T/spec-config.cfg --size train --output-root %T --rebuild {benchmark}')
     print(f'RUN: rm -rf %T/benchspec') # remove the temporary directory, which can become quite large
 
-    # Parse the results into a LNT-compatible format. This also errors out if there are no CSV files, which
-    # means that the benchmark didn't run properly (the `runcpu` command above never reports a failure).
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt || ! cat %T/result/*.log')
+    # The `runcpu` command above doesn't fail even if the benchmark fails to run. To determine failure, parse the CSV
+    # results and ensure there are no compilation errors or runtime errors in the status row. Also print the logs and
+    # fail if there are no CSV files at all, which implies a SPEC error.
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results --extract "Base Status" --keep-failed %T/result/*.train.csv > %T/status || ! cat %T/result/*.log')
+    print(f'RUN: ! grep -E "CE|RE" %T/status || ! cat %T/result/*.log')
+
+    # If there were no errors, parse the results into LNT-compatible format and print them.
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/*.train.csv --output-format=lnt > %T/results.lnt')
     print(f'RUN: cat %T/results.lnt')
diff --git a/libcxx/utils/parse-spec-results b/libcxx/utils/parse-spec-results
@@ -58,7 +58,10 @@ def main(argv):
              'sure to use appropriate quoting for header names that contain spaces. This option only makes sense '
              'when the output format is CSV.')
     parser.add_argument('--keep-not-run', action='store_true',
-        help='Keep entries whose \'Base Status\' is marked as \'NR\', aka \'Not Run\'. By default, such entries are discarded.')
+        help='Keep entries whose "Base Status" is marked as "NR" (aka "Not Run"). By default, such entries are discarded.')
+    parser.add_argument('--keep-failed', action='store_true',
+        help='Keep entries whose "Base Status" is marked as "CE" (aka "Compilation Error") or "RE" (aka "Runtime Error"). '
+             'By default, such entries are discarded.')
     args = parser.parse_args(argv)
 
     if args.table == 'full':
@@ -76,10 +79,12 @@ def main(argv):
         headers = parsed_headers
         rows.extend(parsed_rows)
 
-    # Remove rows that were not run unless we were asked to keep them
+    # Remove rows that were not run (or failed) unless we were asked to keep them
+    status = headers.index('Base Status')
     if not args.keep_not_run:
-        not_run = headers.index('Base Status')
-        rows = [row for row in rows if row[not_run] != 'NR']
+        rows = [row for row in rows if row[status] != 'NR']
+    if not args.keep_failed:
+        rows = [row for row in rows if row[status] not in ('CE', 'RE')]
 
     if args.extract is not None:
         if args.output_format != 'csv':