Skip to content

Commit 1dfe716

Browse files
authored
Highlight known failures in PR tests summary (#3740)
## Changes The gh_parse.py / gh_report.py scripts now download and parse known_failures.txt and highlight matching tests https://github.com/databricks/cli/blob/ciconfig/known_failures.txt See #3733 ## Why Additional context on test status. ## Tests Manually: `~/work/cli-main % ./tools/gh_report.py --run 18343557422 --markdown` | | Env | ✅pass | 💚RECOVERED | 🙈skip | 🟨KNOWN | | -- | ------------------ | ------ | ----------- | ------ | ------- | | 🟨 | aws linux | 321 | | 543 | 1 | | 🟨 | aws windows | 322 | | 542 | 1 | | 🟨 | aws-ucws linux | 437 | | 439 | 1 | | 🟨 | aws-ucws windows | 438 | | 438 | 1 | | 🟨 | azure linux | 319 | 2 | 542 | 1 | | 🟨 | azure windows | 320 | 2 | 541 | 1 | | 🟨 | azure-ucws linux | 435 | 2 | 438 | 1 | | 🟨 | azure-ucws windows | 436 | 2 | 437 | 1 | | 🟨 | gcp linux | 320 | | 544 | 1 | | 🟨 | gcp windows | 321 | | 543 | 1 | | Test Name | aws linux | aws windows | aws-ucws linux | aws-ucws windows | azure linux | azure windows | azure-ucws linux | azure-ucws windows | gcp linux | gcp windows | | --------------------- | --------- | ----------- | -------------- | ---------------- | ----------- | ------------- | ---------------- | ------------------ | --------- | ----------- | | TestTagKeyAzure | 🙈skip | 🙈skip | 🙈skip | 🙈skip | 💚RECOVERED | 💚RECOVERED | 💚RECOVERED | 💚RECOVERED | 🙈skip | 🙈skip | | TestTagKeyAzure/empty | | | | | 💚RECOVERED | 💚RECOVERED | 💚RECOVERED | 💚RECOVERED | | | | TestTelemetryEndpoint | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN | 🟨KNOWN |
1 parent fd90e55 commit 1dfe716

File tree

2 files changed

+277
-34
lines changed

2 files changed

+277
-34
lines changed

tools/gh_parse.py

Lines changed: 251 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@
3030
MISSING = "🤯\u200bMISS"
3131
PANIC = "💥\u200bPANIC"
3232

33-
INTERESTING_ACTIONS = (FAIL, BUG, FLAKY, PANIC, MISSING)
33+
# These happen if test matches known_failures.txt
34+
KNOWN_FAILURE = "🟨\u200bKNOWN"
35+
RECOVERED = "💚\u200bRECOVERED"
36+
37+
INTERESTING_ACTIONS = (FAIL, BUG, FLAKY, PANIC, MISSING, KNOWN_FAILURE, RECOVERED)
3438
ACTIONS_WITH_ICON = INTERESTING_ACTIONS + (PASS, SKIP)
3539

3640
ACTION_MESSAGES = {
@@ -40,6 +44,196 @@
4044
}
4145

4246

47+
class KnownFailuresConfig:
48+
def __init__(self, rules):
49+
self.rules = rules
50+
51+
def matches(self, package_name, test_name):
52+
for rule in self.rules:
53+
if rule.matches(package_name, test_name):
54+
return rule.original_line
55+
return ""
56+
57+
58+
class KnownFailuresRule:
59+
def __init__(self, package_pattern, test_pattern, package_prefix, test_prefix, original_line):
60+
self.package_pattern = package_pattern
61+
self.test_pattern = test_pattern
62+
self.package_prefix = package_prefix
63+
self.test_prefix = test_prefix
64+
self.original_line = original_line
65+
66+
def matches(self, package_name, test_name):
67+
# Check package pattern
68+
if self.package_prefix:
69+
package_match = self._matches_path_prefix(package_name, self.package_pattern)
70+
else:
71+
package_match = package_name == self.package_pattern
72+
73+
if not package_match:
74+
return False
75+
76+
# Check test pattern - this matches the Go logic
77+
if self.test_prefix:
78+
return self._matches_path_prefix(test_name, self.test_pattern) or self._matches_path_prefix(
79+
self.test_pattern, test_name
80+
)
81+
else:
82+
return test_name == self.test_pattern or self._matches_path_prefix(self.test_pattern, test_name)
83+
84+
def _matches_path_prefix(self, s, pattern):
85+
if pattern == "":
86+
return True
87+
if s == pattern:
88+
return True
89+
return s.startswith(pattern + "/")
90+
91+
92+
def parse_known_failures(content):
93+
"""
94+
Parse known failures config content.
95+
96+
>>> _test_parse_known_failures()
97+
"""
98+
rules = []
99+
for line_num, line in enumerate(content.splitlines(), 1):
100+
line = line.strip()
101+
if not line or line.startswith("#"):
102+
continue
103+
104+
# Remove comments
105+
if "#" in line:
106+
line = line[: line.index("#")].strip()
107+
if not line:
108+
continue
109+
110+
parts = line.split()
111+
if len(parts) != 2:
112+
continue
113+
114+
package_pattern, test_pattern = parts
115+
package_pattern, package_prefix = _parse_pattern(package_pattern)
116+
test_pattern, test_prefix = _parse_pattern(test_pattern)
117+
118+
rule = KnownFailuresRule(package_pattern, test_pattern, package_prefix, test_prefix, line)
119+
rules.append(rule)
120+
121+
return KnownFailuresConfig(rules)
122+
123+
124+
def _parse_pattern(pattern):
125+
if pattern == "*":
126+
return "", True
127+
if pattern.endswith("/"):
128+
return pattern[:-1], True
129+
return pattern, False
130+
131+
132+
def _test_parse_known_failures():
133+
"""Test cases from Go testrunner/main_test.go as table tests."""
134+
# Table of test cases: (input, package_name, testcase, expected_match)
135+
test_cases = [
136+
# Exact matches
137+
("bundle TestDeploy", "bundle", "TestDeploy", True),
138+
("bundle TestDeploy", "libs", "TestDeploy", False),
139+
("bundle TestDeploy", "bundle", "TestSomethingElse", False),
140+
# Package prefix matches
141+
("libs/ TestSomething", "libs/auth", "TestSomething", True),
142+
("libs/ TestSomething", "libs", "TestSomething", True),
143+
("libs/ TestSomething", "libsother", "TestSomething", False),
144+
# Test prefix matches
145+
("bundle TestAccept/", "bundle", "TestAcceptDeploy", False),
146+
("bundle TestAccept/", "bundle", "TestAccept", True),
147+
("bundle TestAccept/", "bundle", "TestAccept/Deploy", True),
148+
# Wildcard matches
149+
("* *", "any/package", "AnyTest", True),
150+
("* TestAccept/", "any/package", "TestAcceptDeploy", False),
151+
("* TestAccept/", "any/package", "TestAccept/Deploy", True),
152+
("libs/ *", "libs/auth", "AnyTest", True),
153+
# Path prefix edge cases
154+
("TestAccept/ TestAccept/", "TestAccept", "TestAccept", True),
155+
("TestAccept/ TestAccept/", "TestAccept/bundle", "TestAccept/deploy", True),
156+
("TestAccept/ TestAccept/", "TestAcceptSomething", "TestAcceptSomething", False),
157+
# Empty values cases
158+
("* TestDeploy", "", "TestDeploy", True),
159+
("bundle *", "bundle", "", True),
160+
# Subtest failure results in parent test failure as well
161+
(
162+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic",
163+
"acceptance",
164+
"TestAccept",
165+
True,
166+
),
167+
(
168+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic",
169+
"acceptance",
170+
"TestAnother",
171+
False,
172+
),
173+
(
174+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic",
175+
"acceptance",
176+
"TestAccept/bundle/templates/default-python/combinations/classic/x",
177+
False,
178+
),
179+
# Pattern version
180+
(
181+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
182+
"acceptance",
183+
"TestAccept",
184+
True,
185+
),
186+
(
187+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
188+
"acceptance",
189+
"TestAnother",
190+
False,
191+
),
192+
(
193+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
194+
"acceptance",
195+
"TestAccept/bundle/templates/default-python/combinations/classic/x",
196+
True,
197+
),
198+
(
199+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
200+
"acceptance",
201+
"TestAccept/bundle/templates/default-python/combinations/classic",
202+
True,
203+
),
204+
(
205+
"acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
206+
"acceptance",
207+
"TestAccept/bundle/templates/default-python/combinations",
208+
True,
209+
),
210+
]
211+
212+
for input_str, package_name, testcase, expected_match in test_cases:
213+
config = parse_known_failures(input_str)
214+
result = config.matches(package_name, testcase)
215+
216+
# Convert result to boolean for comparison
217+
actual_match = bool(result)
218+
219+
if actual_match != expected_match:
220+
raise AssertionError(
221+
f"Test failed for input='{input_str}', package='{package_name}', test='{testcase}': "
222+
f"expected {expected_match}, got {actual_match} (result: '{result}')"
223+
)
224+
225+
226+
def load_known_failures():
227+
try:
228+
known_failures_path = Path(".gh-logs/known_failures.txt")
229+
if known_failures_path.exists():
230+
content = known_failures_path.read_text()
231+
return parse_known_failures(content)
232+
except Exception:
233+
pass
234+
return None
235+
236+
43237
def cleanup_env(name):
44238
"""
45239
>>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
@@ -93,36 +287,54 @@ def parse_file(path, filter):
93287
continue
94288
if filter and filter not in testname:
95289
continue
96-
action = data.get("Action")
97290

291+
package_name = data.get("Package", "").removeprefix("github.com/databricks/cli/")
292+
test_key = (package_name, testname)
293+
294+
action = data.get("Action")
98295
action = ACTION_MESSAGES.get(action, action)
99296

100297
if action in (FAIL, PASS, SKIP):
101-
prev = results.get(testname)
298+
prev = results.get(test_key)
102299
if prev == FAIL and action == PASS:
103-
results[testname] = FLAKY
300+
results[test_key] = FLAKY
104301
else:
105-
results[testname] = action
302+
results[test_key] = action
106303

107304
out = data.get("Output")
108305
if out:
109-
outputs.setdefault(testname, []).append(out.rstrip())
306+
outputs.setdefault(test_key, []).append(out.rstrip())
110307

111-
for testname, lines in outputs.items():
112-
if testname in results:
308+
for test_key, lines in outputs.items():
309+
if test_key in results:
113310
continue
114311
if "panic: " in str(lines):
115-
results.setdefault(testname, PANIC)
312+
results.setdefault(test_key, PANIC)
116313
else:
117-
results.setdefault(testname, MISSING)
314+
results.setdefault(test_key, MISSING)
118315

119316
return results, outputs
120317

121318

319+
def mark_known_failures(results, known_failures_config):
320+
"""Mark tests as KNOWN_FAILURE or RECOVERED based on known failures config."""
321+
marked_results = {}
322+
for test_key, action in results.items():
323+
package_name, testname = test_key
324+
if known_failures_config and action == FAIL and known_failures_config.matches(package_name, testname):
325+
marked_results[test_key] = KNOWN_FAILURE
326+
elif known_failures_config and action == PASS and known_failures_config.matches(package_name, testname):
327+
marked_results[test_key] = RECOVERED
328+
else:
329+
marked_results[test_key] = action
330+
return marked_results
331+
332+
122333
def print_report(filenames, filter, filter_env, show_output, markdown=False, omit_repl=False):
123-
outputs = {} # testname -> env -> [output]
124-
per_test_per_env_stats = {} # testname -> env -> action -> count
125-
all_testnames = set()
334+
known_failures_config = load_known_failures()
335+
outputs = {} # test_key -> env -> [output]
336+
per_test_per_env_stats = {} # test_key -> env -> action -> count
337+
all_test_keys = set()
126338
all_envs = set()
127339
count_files = 0
128340
count_results = 0
@@ -136,26 +348,28 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False, omi
136348
continue
137349
all_envs.add(env)
138350
test_results, test_outputs = parse_file(p, filter)
351+
test_results = mark_known_failures(test_results, known_failures_config)
139352
count_files += 1
140353
count_results += len(test_results)
141-
for testname, action in test_results.items():
142-
per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
143-
for testname, output in test_outputs.items():
144-
outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
145-
all_testnames.update(test_results)
354+
for test_key, action in test_results.items():
355+
per_test_per_env_stats.setdefault(test_key, {}).setdefault(env, Counter())[action] += 1
356+
for test_key, output in test_outputs.items():
357+
outputs.setdefault(test_key, {}).setdefault(env, []).extend(output)
358+
all_test_keys.update(test_results)
146359

147360
print(f"Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
148361

149362
# Check for missing tests
150-
for testname in all_testnames:
363+
for test_key in all_test_keys:
364+
package_name, testname = test_key
151365
# It is possible for test to be missing if it's parent is skipped, ignore test cases with a parent.
152366
# For acceptance tests, ignore tests with subtests produced via EnvMatrix
153367
if testname.startswith("TestAccept/") and "=" in testname:
154368
continue
155369
# For non-acceptance tests ignore all subtests.
156370
if not testname.startswith("TestAccept/") and "/" in testname:
157371
continue
158-
test_results = per_test_per_env_stats.get(testname, {})
372+
test_results = per_test_per_env_stats.get(test_key, {})
159373
for e in all_envs:
160374
if e not in test_results:
161375
test_results.setdefault(e, Counter())[MISSING] += 1
@@ -177,8 +391,8 @@ def is_bug(test_results):
177391
count += 1
178392
return count >= 0
179393

180-
for testname in all_testnames:
181-
test_results = per_test_per_env_stats.get(testname, {})
394+
for test_key in all_test_keys:
395+
test_results = per_test_per_env_stats.get(test_key, {})
182396
if not is_bug(test_results):
183397
continue
184398
for e, env_results in sorted(test_results.items()):
@@ -189,7 +403,7 @@ def is_bug(test_results):
189403
env_results[BUG] += 1
190404

191405
per_env_stats = {} # env -> action -> count
192-
for testname, items in per_test_per_env_stats.items():
406+
for test_key, items in per_test_per_env_stats.items():
193407
for env, stats in items.items():
194408
per_env_stats.setdefault(env, Counter()).update(stats)
195409

@@ -217,31 +431,33 @@ def is_bug(test_results):
217431
interesting_envs.add(env)
218432
break
219433

220-
simplified_results = {} # testname -> env -> action
221-
for testname, items in sorted(per_test_per_env_stats.items()):
222-
per_testname_result = simplified_results.setdefault(testname, {})
434+
simplified_results = {} # test_key -> env -> action
435+
for test_key, items in sorted(per_test_per_env_stats.items()):
436+
package_name, testname = test_key
437+
per_testkey_result = simplified_results.setdefault(test_key, {})
223438
# first select tests with interesting actions (anything but pass or skip)
224439
for env, counts in items.items():
225440
for action in INTERESTING_ACTIONS:
226441
if action in counts:
227-
per_testname_result.setdefault(env, action)
442+
per_testkey_result.setdefault(env, action)
228443
break
229444

230445
# Once we know test is interesting, complete the row
231-
if per_testname_result:
446+
if per_testkey_result:
232447
for env, counts in items.items():
233448
if env not in interesting_envs:
234449
continue
235450
for action in (PASS, SKIP):
236451
if action in counts:
237-
per_testname_result.setdefault(env, action)
452+
per_testkey_result.setdefault(env, action)
238453
break
239454

240-
if not per_testname_result:
241-
per_testname_result = simplified_results.pop(testname)
455+
if not per_testkey_result:
456+
per_testkey_result = simplified_results.pop(test_key)
242457

243458
table = []
244-
for testname, items in simplified_results.items():
459+
for test_key, items in simplified_results.items():
460+
package_name, testname = test_key
245461
table.append(
246462
{
247463
"Test Name": testname,
@@ -255,11 +471,12 @@ def is_bug(test_results):
255471
print(table_txt)
256472

257473
if show_output:
258-
for testname, stats in simplified_results.items():
474+
for test_key, stats in simplified_results.items():
475+
package_name, testname = test_key
259476
for env, action in stats.items():
260477
if action not in INTERESTING_ACTIONS:
261478
continue
262-
output_lines = outputs.get(testname, {}).get(env, [])
479+
output_lines = outputs.get(test_key, {}).get(env, [])
263480
if omit_repl:
264481
output_lines = [
265482
line

0 commit comments

Comments
 (0)