databricks
diff --git a/‎tools/gh_parse.py‎
Lines changed: 251 additions & 34 deletions b/‎tools/gh_parse.py‎
Lines changed: 251 additions & 34 deletions
@@ -30,7 +30,11 @@
 MISSING = "🤯\u200bMISS"
 PANIC = "💥\u200bPANIC"
 
-INTERESTING_ACTIONS = (FAIL, BUG, FLAKY, PANIC, MISSING)
+# These happen if test matches known_failures.txt
+KNOWN_FAILURE = "🟨\u200bKNOWN"
+RECOVERED = "💚\u200bRECOVERED"
+
+INTERESTING_ACTIONS = (FAIL, BUG, FLAKY, PANIC, MISSING, KNOWN_FAILURE, RECOVERED)
 ACTIONS_WITH_ICON = INTERESTING_ACTIONS + (PASS, SKIP)
 
 ACTION_MESSAGES = {
@@ -40,6 +44,196 @@
 }
 
 
+class KnownFailuresConfig:
+    def __init__(self, rules):
+        self.rules = rules
+
+    def matches(self, package_name, test_name):
+        for rule in self.rules:
+            if rule.matches(package_name, test_name):
+                return rule.original_line
+        return ""
+
+
+class KnownFailuresRule:
+    def __init__(self, package_pattern, test_pattern, package_prefix, test_prefix, original_line):
+        self.package_pattern = package_pattern
+        self.test_pattern = test_pattern
+        self.package_prefix = package_prefix
+        self.test_prefix = test_prefix
+        self.original_line = original_line
+
+    def matches(self, package_name, test_name):
+        # Check package pattern
+        if self.package_prefix:
+            package_match = self._matches_path_prefix(package_name, self.package_pattern)
+        else:
+            package_match = package_name == self.package_pattern
+
+        if not package_match:
+            return False
+
+        # Check test pattern - this matches the Go logic
+        if self.test_prefix:
+            return self._matches_path_prefix(test_name, self.test_pattern) or self._matches_path_prefix(
+                self.test_pattern, test_name
+            )
+        else:
+            return test_name == self.test_pattern or self._matches_path_prefix(self.test_pattern, test_name)
+
+    def _matches_path_prefix(self, s, pattern):
+        if pattern == "":
+            return True
+        if s == pattern:
+            return True
+        return s.startswith(pattern + "/")
+
+
+def parse_known_failures(content):
+    """
+    Parse known failures config content.
+
+    >>> _test_parse_known_failures()
+    """
+    rules = []
+    for line_num, line in enumerate(content.splitlines(), 1):
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+
+        # Remove comments
+        if "#" in line:
+            line = line[: line.index("#")].strip()
+            if not line:
+                continue
+
+        parts = line.split()
+        if len(parts) != 2:
+            continue
+
+        package_pattern, test_pattern = parts
+        package_pattern, package_prefix = _parse_pattern(package_pattern)
+        test_pattern, test_prefix = _parse_pattern(test_pattern)
+
+        rule = KnownFailuresRule(package_pattern, test_pattern, package_prefix, test_prefix, line)
+        rules.append(rule)
+
+    return KnownFailuresConfig(rules)
+
+
+def _parse_pattern(pattern):
+    if pattern == "*":
+        return "", True
+    if pattern.endswith("/"):
+        return pattern[:-1], True
+    return pattern, False
+
+
+def _test_parse_known_failures():
+    """Test cases from Go testrunner/main_test.go as table tests."""
+    # Table of test cases: (input, package_name, testcase, expected_match)
+    test_cases = [
+        # Exact matches
+        ("bundle TestDeploy", "bundle", "TestDeploy", True),
+        ("bundle TestDeploy", "libs", "TestDeploy", False),
+        ("bundle TestDeploy", "bundle", "TestSomethingElse", False),
+        # Package prefix matches
+        ("libs/ TestSomething", "libs/auth", "TestSomething", True),
+        ("libs/ TestSomething", "libs", "TestSomething", True),
+        ("libs/ TestSomething", "libsother", "TestSomething", False),
+        # Test prefix matches
+        ("bundle TestAccept/", "bundle", "TestAcceptDeploy", False),
+        ("bundle TestAccept/", "bundle", "TestAccept", True),
+        ("bundle TestAccept/", "bundle", "TestAccept/Deploy", True),
+        # Wildcard matches
+        ("* *", "any/package", "AnyTest", True),
+        ("* TestAccept/", "any/package", "TestAcceptDeploy", False),
+        ("* TestAccept/", "any/package", "TestAccept/Deploy", True),
+        ("libs/ *", "libs/auth", "AnyTest", True),
+        # Path prefix edge cases
+        ("TestAccept/ TestAccept/", "TestAccept", "TestAccept", True),
+        ("TestAccept/ TestAccept/", "TestAccept/bundle", "TestAccept/deploy", True),
+        ("TestAccept/ TestAccept/", "TestAcceptSomething", "TestAcceptSomething", False),
+        # Empty values cases
+        ("* TestDeploy", "", "TestDeploy", True),
+        ("bundle *", "bundle", "", True),
+        # Subtest failure results in parent test failure as well
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic",
+            "acceptance",
+            "TestAccept",
+            True,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic",
+            "acceptance",
+            "TestAnother",
+            False,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic",
+            "acceptance",
+            "TestAccept/bundle/templates/default-python/combinations/classic/x",
+            False,
+        ),
+        # Pattern version
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
+            "acceptance",
+            "TestAccept",
+            True,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
+            "acceptance",
+            "TestAnother",
+            False,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
+            "acceptance",
+            "TestAccept/bundle/templates/default-python/combinations/classic/x",
+            True,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
+            "acceptance",
+            "TestAccept/bundle/templates/default-python/combinations/classic",
+            True,
+        ),
+        (
+            "acceptance TestAccept/bundle/templates/default-python/combinations/classic/",
+            "acceptance",
+            "TestAccept/bundle/templates/default-python/combinations",
+            True,
+        ),
+    ]
+
+    for input_str, package_name, testcase, expected_match in test_cases:
+        config = parse_known_failures(input_str)
+        result = config.matches(package_name, testcase)
+
+        # Convert result to boolean for comparison
+        actual_match = bool(result)
+
+        if actual_match != expected_match:
+            raise AssertionError(
+                f"Test failed for input='{input_str}', package='{package_name}', test='{testcase}': "
+                f"expected {expected_match}, got {actual_match} (result: '{result}')"
+            )
+
+
+def load_known_failures():
+    try:
+        known_failures_path = Path(".gh-logs/known_failures.txt")
+        if known_failures_path.exists():
+            content = known_failures_path.read_text()
+            return parse_known_failures(content)
+    except Exception:
+        pass
+    return None
+
+
 def cleanup_env(name):
     """
     >>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
@@ -93,36 +287,54 @@ def parse_file(path, filter):
             continue
         if filter and filter not in testname:
             continue
-        action = data.get("Action")
 
+        package_name = data.get("Package", "").removeprefix("github.com/databricks/cli/")
+        test_key = (package_name, testname)
+
+        action = data.get("Action")
         action = ACTION_MESSAGES.get(action, action)
 
         if action in (FAIL, PASS, SKIP):
-            prev = results.get(testname)
+            prev = results.get(test_key)
             if prev == FAIL and action == PASS:
-                results[testname] = FLAKY
+                results[test_key] = FLAKY
             else:
-                results[testname] = action
+                results[test_key] = action
 
         out = data.get("Output")
         if out:
-            outputs.setdefault(testname, []).append(out.rstrip())
+            outputs.setdefault(test_key, []).append(out.rstrip())
 
-    for testname, lines in outputs.items():
-        if testname in results:
+    for test_key, lines in outputs.items():
+        if test_key in results:
             continue
         if "panic: " in str(lines):
-            results.setdefault(testname, PANIC)
+            results.setdefault(test_key, PANIC)
         else:
-            results.setdefault(testname, MISSING)
+            results.setdefault(test_key, MISSING)
 
     return results, outputs
 
 
+def mark_known_failures(results, known_failures_config):
+    """Mark tests as KNOWN_FAILURE or RECOVERED based on known failures config."""
+    marked_results = {}
+    for test_key, action in results.items():
+        package_name, testname = test_key
+        if known_failures_config and action == FAIL and known_failures_config.matches(package_name, testname):
+            marked_results[test_key] = KNOWN_FAILURE
+        elif known_failures_config and action == PASS and known_failures_config.matches(package_name, testname):
+            marked_results[test_key] = RECOVERED
+        else:
+            marked_results[test_key] = action
+    return marked_results
+
+
 def print_report(filenames, filter, filter_env, show_output, markdown=False, omit_repl=False):
-    outputs = {}  # testname -> env -> [output]
-    per_test_per_env_stats = {}  # testname -> env -> action -> count
-    all_testnames = set()
+    known_failures_config = load_known_failures()
+    outputs = {}  # test_key -> env -> [output]
+    per_test_per_env_stats = {}  # test_key -> env -> action -> count
+    all_test_keys = set()
     all_envs = set()
     count_files = 0
     count_results = 0
@@ -136,26 +348,28 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False, omi
             continue
         all_envs.add(env)
         test_results, test_outputs = parse_file(p, filter)
+        test_results = mark_known_failures(test_results, known_failures_config)
         count_files += 1
         count_results += len(test_results)
-        for testname, action in test_results.items():
-            per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
-        for testname, output in test_outputs.items():
-            outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
-        all_testnames.update(test_results)
+        for test_key, action in test_results.items():
+            per_test_per_env_stats.setdefault(test_key, {}).setdefault(env, Counter())[action] += 1
+        for test_key, output in test_outputs.items():
+            outputs.setdefault(test_key, {}).setdefault(env, []).extend(output)
+        all_test_keys.update(test_results)
 
     print(f"Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
 
     # Check for missing tests
-    for testname in all_testnames:
+    for test_key in all_test_keys:
+        package_name, testname = test_key
         # It is possible for test to be missing if it's parent is skipped, ignore test cases with a parent.
         # For acceptance tests, ignore tests with subtests produced via EnvMatrix
         if testname.startswith("TestAccept/") and "=" in testname:
             continue
         # For non-acceptance tests ignore all subtests.
         if not testname.startswith("TestAccept/") and "/" in testname:
             continue
-        test_results = per_test_per_env_stats.get(testname, {})
+        test_results = per_test_per_env_stats.get(test_key, {})
         for e in all_envs:
             if e not in test_results:
                 test_results.setdefault(e, Counter())[MISSING] += 1
@@ -177,8 +391,8 @@ def is_bug(test_results):
                 count += 1
         return count >= 0
 
-    for testname in all_testnames:
-        test_results = per_test_per_env_stats.get(testname, {})
+    for test_key in all_test_keys:
+        test_results = per_test_per_env_stats.get(test_key, {})
         if not is_bug(test_results):
             continue
         for e, env_results in sorted(test_results.items()):
@@ -189,7 +403,7 @@ def is_bug(test_results):
                 env_results[BUG] += 1
 
     per_env_stats = {}  # env -> action -> count
-    for testname, items in per_test_per_env_stats.items():
+    for test_key, items in per_test_per_env_stats.items():
         for env, stats in items.items():
             per_env_stats.setdefault(env, Counter()).update(stats)
 
@@ -217,31 +431,33 @@ def is_bug(test_results):
                 interesting_envs.add(env)
                 break
 
-    simplified_results = {}  # testname -> env -> action
-    for testname, items in sorted(per_test_per_env_stats.items()):
-        per_testname_result = simplified_results.setdefault(testname, {})
+    simplified_results = {}  # test_key -> env -> action
+    for test_key, items in sorted(per_test_per_env_stats.items()):
+        package_name, testname = test_key
+        per_testkey_result = simplified_results.setdefault(test_key, {})
         # first select tests with interesting actions (anything but pass or skip)
         for env, counts in items.items():
             for action in INTERESTING_ACTIONS:
                 if action in counts:
-                    per_testname_result.setdefault(env, action)
+                    per_testkey_result.setdefault(env, action)
                     break
 
         # Once we know test is interesting, complete the row
-        if per_testname_result:
+        if per_testkey_result:
             for env, counts in items.items():
                 if env not in interesting_envs:
                     continue
                 for action in (PASS, SKIP):
                     if action in counts:
-                        per_testname_result.setdefault(env, action)
+                        per_testkey_result.setdefault(env, action)
                         break
 
-        if not per_testname_result:
-            per_testname_result = simplified_results.pop(testname)
+        if not per_testkey_result:
+            per_testkey_result = simplified_results.pop(test_key)
 
     table = []
-    for testname, items in simplified_results.items():
+    for test_key, items in simplified_results.items():
+        package_name, testname = test_key
         table.append(
             {
                 "Test Name": testname,
@@ -255,11 +471,12 @@ def is_bug(test_results):
         print(table_txt)
 
     if show_output:
-        for testname, stats in simplified_results.items():
+        for test_key, stats in simplified_results.items():
+            package_name, testname = test_key
             for env, action in stats.items():
                 if action not in INTERESTING_ACTIONS:
                     continue
-                output_lines = outputs.get(testname, {}).get(env, [])
+                output_lines = outputs.get(test_key, {}).get(env, [])
                 if omit_repl:
                     output_lines = [
                         line