Skip to content

Commit 8b4077c

Browse files
committed
Add python crash wrapper
1 parent 3ca303e commit 8b4077c

File tree

2 files changed

+236
-25
lines changed

2 files changed

+236
-25
lines changed

.github/actions/on_host_tests/action.yaml

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -86,35 +86,33 @@ runs:
8686
export GTEST_TOTAL_SHARDS=${{ inputs.num_gtest_shards }}
8787
export GTEST_SHARD_INDEX=${{ matrix.shard }}
8888
89-
crash_filter="-crash_detector"
90-
for i in {1..100}; do
91-
set +e
89+
# TODO: Only assign here if $test_filter != '*', else assign empty.
90+
if [ "${test_filter}" == "*" ]; then
91+
crash_filter=""
92+
else
93+
crash_filter="${test_filter}"
94+
fi
95+
96+
if [ -n "${crash_filter}" ]; then
97+
gtest_filter_arg="--gtest_filter=${crash_filter}"
98+
else
99+
gtest_filter_arg=""
100+
fi
101+
102+
python3 ${GITHUB_WORKSPACE}/.github/scripts/run_test_with_retries.py \
103+
--xml_output_file="${xml_path}" \
104+
--log_file="${log_path}" \
105+
--filter_file="${test_output}/${test_binary}_crash.filter" \
106+
--max_retries=100 \
107+
--test_binary="${test_binary}" \
108+
-- \
92109
/usr/bin/xvfb-run -a --server-args="${XVFB_SERVER_ARGS}" \
93110
stdbuf -i0 -o0 -e0 $ENTRYPOINT \
94111
--single-process-tests \
95112
--gtest_output="xml:${xml_path}" \
96-
--gtest_filter="${test_filter}:${crash_filter}" 2>&1 | tee ${log_path} || {
97-
failed_suites="${failed_suites} ${test_binary}"
98-
}
99-
100-
if [[ ! -f ${xml_path} ]]; then
101-
# Test binary crashed. Amend test filter and retry
102-
crashed_test=$(grep "\[ RUN \]" "${log_path}" | tail -n 1 | awk '{print $NF}')
103-
if [[ -z "${crashed_test}" ]]; then
104-
echo "Extraction failed or crash before tests started."
105-
break
106-
fi
107-
crash_filter="${crash_filter}:-${crashed_test}"
108-
echo "Retrying with filter: '${crash_filter}'"
109-
echo "${crash_filter}" > "${test_output}/${test_binary}_crash.filter"
110-
else
111-
break
112-
fi
113-
done
114-
if [[ ! -f ${xml_path} ]]; then
115-
# Test binary crashed. Generate a fake JUnit XML report with the last run test.
116-
python3 ${GITHUB_WORKSPACE}/.github/scripts/generate_crash_report.py "${log_path}" > "${xml_path}"
117-
fi
113+
${gtest_filter_arg} || {
114+
failed_suites="${failed_suites} ${test_binary}"
115+
}
118116
fi
119117
done
120118
echo "Finished running tests..."

.github/scripts/crash_wrapper.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 The Cobalt Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Runs a test binary with retries on crash."""
16+
17+
import argparse
18+
import datetime
19+
import html
20+
import os
21+
import pathlib
22+
import re
23+
import subprocess
24+
import sys
25+
import time
26+
27+
RUN_MARKER = '[ RUN ]'
28+
END_MARKERS = (
29+
'[ OK ]',
30+
'[ FAILED ]',
31+
'[ SKIPPED ]',
32+
)
33+
34+
def _get_test_name_from_run_line(line: str) -> str:
35+
"""Extracts test name like 'Suite.Test' from a gtest marker line."""
36+
match = re.search(rf"{re.escape(RUN_MARKER)}\s*([^\s]+)$", line)
37+
return match.group(1) if match else 'UnknownSuite.UnknownTest'
38+
39+
def _extract_crash_info(log_path: pathlib.Path):
40+
"""
41+
Identifies the crashed test and its log output from a gtest log file.
42+
A crashed test will have a run marker but no end marker.
43+
44+
Returns:
45+
A tuple `(test_suite, test_name, log_output_for_crashed_test)`.
46+
Returns `("UnknownSuite", "UnknownTest", "")` if no crash is detected.
47+
"""
48+
if not log_path.exists():
49+
return 'UnknownSuite', 'UnknownTest', ''
50+
51+
with log_path.open('r', encoding='utf-8', errors='replace') as f:
52+
lines = f.readlines()
53+
54+
for i, line in reversed(list(enumerate(lines))):
55+
if RUN_MARKER in line:
56+
log = ''.join(lines[i+1:])
57+
if not any(marker in log for marker in END_MARKERS):
58+
test_name = _get_test_name_from_run_line(line)
59+
suite, name = test_name.split('.', 1) if '.' in test_name else ('UnknownSuite', test_name)
60+
return suite, name, log
61+
break
62+
63+
return 'UnknownSuite', 'UnknownTest', ''
64+
65+
def print_junit_xml(xml_path: pathlib.Path, crashed_test: list[tuple[str, str, str]]):
66+
now = datetime.datetime.now(datetime.timezone.utc)
67+
with xml_path.open('w', encoding='utf-8') as f:
68+
f.write(f"""<?xml version="1.0" encoding="UTF-8"?>
69+
<testsuites tests="1" failures="0" disabled="0" errors="1" time="0">
70+
<testsuite name="{html.escape(suite)}" tests="1" failures="0" disabled="0" errors="1" time="0" timestamp="{now.strftime('%Y-%m-%dT%H:%M:%SZ')}">
71+
<testcase name="{html.escape(name)}" classname="{html.escape(suite)}" time="0">
72+
<error message="Test crashed">
73+
<![CDATA[ {log} ]]>
74+
</error>
75+
</testcase>
76+
</testsuite>
77+
</testsuites>
78+
""")
79+
80+
def main():
81+
parser = argparse.ArgumentParser(
82+
description='Runs a test command with retries on crash.')
83+
parser.add_argument(
84+
'--xml_output_file',
85+
required=True,
86+
help='Path to the JUnit XML output file.')
87+
parser.add_argument(
88+
'--log_file',
89+
required=True,
90+
help='Path to the test log file.')
91+
parser.add_argument(
92+
'--max_retries',
93+
type=int,
94+
default=100,
95+
help='Maximum number of retries.')
96+
parser.add_argument(
97+
'--filter_file',
98+
help='Path to a file to store the crash filter.')
99+
parser.add_argument(
100+
'command',
101+
nargs=argparse.REMAINDER,
102+
help='The command to run.')
103+
104+
args = parser.parse_args()
105+
106+
# Ensure command is not empty
107+
if not args.command:
108+
print("Error: No command provided.", file=sys.stderr)
109+
sys.exit(1)
110+
111+
# Check if -- is passed as the first argument in command and remove it
112+
command = args.command
113+
if command and command[0] == '--':
114+
command = command[1:]
115+
116+
xml_path = pathlib.Path(args.xml_output_file)
117+
log_path = pathlib.Path(args.log_file)
118+
max_retries = args.max_retries
119+
current_filter = None
120+
121+
# Initial filter scan to find existing filter in command
122+
# We assume the filter is passed as --gtest_filter=...
123+
# If not present, we will inject it.
124+
filter_arg_index = -1
125+
for i, arg in enumerate(command):
126+
if arg.startswith('--gtest_filter='):
127+
current_filter = arg.split('=', 1)[1]
128+
filter_arg_index = i
129+
break
130+
131+
if current_filter == '*':
132+
current_filter = ''
133+
if filter_arg_index != -1:
134+
del command[filter_arg_index] # Remove the * filter argument
135+
filter_arg_index = -1
136+
137+
for attempt in range(max_retries + 1):
138+
# Construct command with current filter
139+
cmd = list(command)
140+
if current_filter:
141+
if filter_arg_index != -1 and filter_arg_index < len(cmd):
142+
cmd[filter_arg_index] = f'--gtest_filter={current_filter}'
143+
else:
144+
cmd.append(f'--gtest_filter={current_filter}')
145+
filter_arg_index = len(cmd) - 1
146+
147+
# Ensure output directory exists
148+
xml_path.parent.mkdir(parents=True, exist_ok=True)
149+
if xml_path.exists():
150+
xml_path.unlink()
151+
152+
with log_path.open('w', encoding='utf-8') as log_file:
153+
# We use bufsize=0 (unbuffered) equivalent or line buffered to capture output live if needed,
154+
# but here we just redirect to file.
155+
# Bash used: ... 2>&1 | tee ${log_path}
156+
# We will pipe to stdout and file.
157+
process = subprocess.Popen(
158+
cmd,
159+
stdout=subprocess.PIPE,
160+
stderr=subprocess.STDOUT,
161+
text=True,
162+
bufsize=1 # Line buffered
163+
)
164+
165+
while True:
166+
line = process.stdout.readline()
167+
if not line and process.poll() is not None:
168+
break
169+
if line:
170+
sys.stdout.write(line)
171+
log_file.write(line)
172+
173+
if xml_path.exists():
174+
sys.exit(process.poll())
175+
176+
suite, name, _ = _extract_crash_info(log_path)
177+
178+
if suite == 'UnknownSuite' and name == 'UnknownTest':
179+
print("Could not identify crashed test. Aborting retries.")
180+
sys.exit(1) # Unknown crash, cannot filter
181+
182+
crashed_test = f"{suite}.{name}"
183+
print(f"Identified crashed test: {crashed_test}")
184+
185+
# Update filter
186+
if not current_filter:
187+
current_filter = f"-{crashed_test}"
188+
elif '-' in current_filter:
189+
# Already has negative filter, append
190+
current_filter += f":{crashed_test}"
191+
else:
192+
# Has positive filter, assume we want to keep it and exclude crashed?
193+
# GTest filter syntax: Positive patterns [-Negative patterns]
194+
# If current_filter has no '-', acts as positive.
195+
current_filter += f"-{crashed_test}"
196+
197+
print(f"Updated filter: {current_filter}")
198+
if args.filter_file:
199+
with open(args.filter_file, 'w') as f:
200+
f.write(current_filter)
201+
202+
# If we reached here, we ran out of retries
203+
print("Max retries reached.")
204+
205+
# Generate fake XML for the last crash
206+
# We reuse the last log analysis
207+
suite, name, log_content = _extract_crash_info(log_path)
208+
print_junit_xml(xml_path, suite, name, log_content)
209+
210+
sys.exit(1)
211+
212+
if __name__ == '__main__':
213+
main()

0 commit comments

Comments
 (0)