Altinity
diff --git a/‎.github/actions/create_workflow_report/action.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/actions/create_workflow_report/action.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/actions/create_workflow_report/create_workflow_report.py‎
Lines changed: 70 additions & 14 deletions b/‎.github/actions/create_workflow_report/create_workflow_report.py‎
Lines changed: 70 additions & 14 deletions
diff --git a/‎.github/actions/create_workflow_report/workflow_report_hook.sh‎
Lines changed: 1 addition & 1 deletion b/‎.github/actions/create_workflow_report/workflow_report_hook.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/defs/job_configs.py‎
Lines changed: 5 additions & 5 deletions b/‎ci/defs/job_configs.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎ci/jobs/scripts/functional_tests_results.py‎
Lines changed: 103 additions & 30 deletions b/‎ci/jobs/scripts/functional_tests_results.py‎
Lines changed: 103 additions & 30 deletions
@@ -30,7 +30,7 @@ runs:
         pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 jinja2==3.1.5
 
         CMD="python3 .github/actions/create_workflow_report/create_workflow_report.py"
-        ARGS="--actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves --pr-number $PR_NUMBER"
+        ARGS="--actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.yaml --cves --pr-number $PR_NUMBER"
 
         set +e -x
         if [[ "$FINAL" == "false" ]]; then
 
@@ -8,13 +8,16 @@
 from functools import lru_cache
 from glob import glob
 import urllib.parse
+import re
 
 import pandas as pd
 from jinja2 import Environment, FileSystemLoader
 import requests
 from clickhouse_driver import Client
 import boto3
 from botocore.exceptions import NoCredentialsError
+import yaml
+
 
 DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
 DATABASE_USER_VAR = "CLICKHOUSE_TEST_STAT_LOGIN"
@@ -166,6 +169,59 @@ def get_checks_fails(client: Client, commit_sha: str, branch_name: str):
     return client.query_dataframe(query)
 
 
+def get_broken_tests_rules(broken_tests_file_path):
+    with open(broken_tests_file_path, "r", encoding="utf-8") as broken_tests_file:
+        broken_tests = yaml.safe_load(broken_tests_file)
+
+    compiled_rules = {"exact": {}, "pattern": {}}
+
+    for test in broken_tests:
+        regex = test.get("regex") is True
+        rule = {
+            "reason": test["reason"],
+        }
+
+        if test.get("check_types"):
+            rule["check_types"] = test["check_types"]
+
+        if regex:
+            rule["regex"] = True
+            compiled_rules["pattern"][re.compile(test["name"])] = rule
+        else:
+            compiled_rules["exact"][test["name"]] = rule
+
+    return compiled_rules
+
+
+def get_known_fail_reason(test_name: str, check_name: str, known_fails: dict):
+    """
+    Returns the reason why a test is known to fail based on its name and build context.
+
+    - Exact-name rules are checked first.
+    - Pattern-name rules are checked next (first match wins).
+    - Message/not_message conditions are ignored.
+    """
+    # 1. Exact-name rules
+    rule_data = known_fails["exact"].get(test_name)
+    if rule_data:
+        check_types = rule_data.get("check_types", [])
+        if not check_types or any(
+            check_type in check_name for check_type in check_types
+        ):
+            return rule_data["reason"]
+
+    # 2. Pattern-name rules
+    for name_re, rule_data in known_fails["pattern"].items():
+        if name_re.fullmatch(test_name):
+            check_types = rule_data.get("check_types", [])
+            if not check_types or any(
+                check_type in check_name for check_type in check_types
+            ):
+                return rule_data["reason"]
+
+    return "No reason given"
+
+
 def get_checks_known_fails(
     client: Client, commit_sha: str, branch_name: str, known_fails: dict
 ):
@@ -189,19 +245,22 @@ def get_checks_known_fails(
             GROUP BY check_name, test_name, report_url, task_url
         )
         WHERE test_status='BROKEN'
-        AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
         ORDER BY job_name, test_name
         """
 
     df = client.query_dataframe(query)
 
+    if df.shape[0] == 0:
+        return df
+
     df.insert(
         len(df.columns) - 1,
         "reason",
-        df["test_name"]
-        .astype(str)
-        .apply(
-            lambda test_name: known_fails[test_name].get("reason", "No reason given")
+        df.apply(
+            lambda row: get_known_fail_reason(
+                row["test_name"], row["job_name"], known_fails
+            ),
+            axis=1,
         ),
     )
 
@@ -654,7 +713,7 @@ def create_workflow_report(
     pr_number: int = None,
     commit_sha: str = None,
     no_upload: bool = False,
-    known_fails: str = None,
+    known_fails_file_path: str = None,
     check_cves: bool = False,
     mark_preview: bool = False,
 ) -> str:
@@ -710,15 +769,12 @@ def create_workflow_report(
     # This might occur when run in preview mode.
     cves_not_checked = not check_cves or fail_results["docker_images_cves"] is ...
 
-    if known_fails:
-        if not os.path.exists(known_fails):
-            print(f"Known fails file {known_fails} not found.")
-            exit(1)
-
-        with open(known_fails) as f:
-            known_fails = json.load(f)
+    if known_fails_file_path:
+        if not os.path.exists(known_fails_file_path):
+            print(f"WARNING:Known fails file {known_fails_file_path} not found.")
+        else:
+            known_fails = get_broken_tests_rules(known_fails_file_path)
 
-        if known_fails:
             fail_results["checks_known_fails"] = get_checks_known_fails(
                 db_client, commit_sha, branch_name, known_fails
             )
 
@@ -1,7 +1,7 @@
 #!/bin/bash
 # This script is for generating preview reports when invoked as a post-hook from a praktika job
 pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 jinja2==3.1.5
-ARGS="--mark-preview --known-fails tests/broken_tests.json --cves --actions-run-url $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID --pr-number $PR_NUMBER"
+ARGS="--mark-preview --known-fails tests/broken_tests.yaml --cves --actions-run-url $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID --pr-number $PR_NUMBER"
 CMD="python3 .github/actions/create_workflow_report/create_workflow_report.py"
 $CMD $ARGS
 
@@ -39,7 +39,7 @@
             "./tests/config",
             "./tests/*.txt",
             "./ci/docker/stateless-test",
-            "./tests/broken_tests.json",
+            "./tests/broken_tests.yaml",
         ],
     ),
     result_name_for_cidb="Tests",
@@ -686,7 +686,7 @@ class JobConfigs:
                 "./ci/jobs/scripts/integration_tests_runner.py",
                 "./tests/integration/",
                 "./ci/docker/integration",
-                "./tests/broken_tests.json",
+                "./tests/broken_tests.yaml",
             ],
         ),
     ).parametrize(
@@ -710,7 +710,7 @@ class JobConfigs:
                 "./ci/jobs/scripts/integration_tests_runner.py",
                 "./tests/integration/",
                 "./ci/docker/integration",
-                "./tests/broken_tests.json",
+                "./tests/broken_tests.yaml",
             ],
         ),
     ).parametrize(
@@ -752,7 +752,7 @@ class JobConfigs:
                 "./ci/jobs/scripts/integration_tests_runner.py",
                 "./tests/integration/",
                 "./ci/docker/integration",
-                "./tests/broken_tests.json",
+                "./tests/broken_tests.yaml",
             ],
         ),
         allow_merge_on_failure=True,
@@ -777,7 +777,7 @@ class JobConfigs:
                 "./ci/jobs/scripts/integration_tests_runner.py",
                 "./tests/integration/",
                 "./ci/docker/integration",
-                "./tests/broken_tests.json",
+                "./tests/broken_tests.yaml",
             ],
         ),
         requires=[ArtifactNames.CH_AMD_ASAN],
 
@@ -3,6 +3,9 @@
 import os
 import traceback
 from typing import List
+import re
+
+import yaml
 
 from praktika.result import Result
 
@@ -30,14 +33,99 @@
 #         out.writerow(status)
 
 
-def get_broken_tests_list() -> dict:
-    file_path = "tests/broken_tests.json"
-    if not os.path.isfile(file_path) or os.path.getsize(file_path) == 0:
-        return {}
+def get_broken_tests_rules() -> dict:
+    broken_tests_file_path = "tests/broken_tests.yaml"
+    if (
+        not os.path.isfile(broken_tests_file_path)
+        or os.path.getsize(broken_tests_file_path) == 0
+    ):
+        raise ValueError(
+            "There is something wrong with getting broken tests rules: "
+            f"file '{broken_tests_file_path}' is empty or does not exist."
+        )
+
+    with open(broken_tests_file_path, "r", encoding="utf-8") as broken_tests_file:
+        broken_tests = yaml.safe_load(broken_tests_file)
+
+    compiled_rules = {"exact": {}, "pattern": {}}
+
+    for test in broken_tests:
+        regex = test.get("regex") is True
+        rule = {
+            "reason": test["reason"],
+        }
+
+        if test.get("message"):
+            rule["message"] = re.compile(test["message"]) if regex else test["message"]
+
+        if test.get("not_message"):
+            rule["not_message"] = (
+                re.compile(test["not_message"]) if regex else test["not_message"]
+            )
+        if test.get("check_types"):
+            rule["check_types"] = test["check_types"]
+
+        if regex:
+            rule["regex"] = True
+            compiled_rules["pattern"][re.compile(test["name"])] = rule
+        else:
+            compiled_rules["exact"][test["name"]] = rule
+
+    print(
+        f"INFO: Compiled {len(compiled_rules['exact'])} exact rules and {len(compiled_rules['pattern'])} pattern rules"
+    )
+
+    return compiled_rules
+
+
+def test_is_known_fail(test_name, test_logs, known_broken_tests, test_options_string):
+    matching_rules = []
+
+    print(f"Checking known broken tests for failed test: {test_name}")
+    print("Potential matching rules:")
+    exact_rule = known_broken_tests["exact"].get(test_name)
+    if exact_rule:
+        print(f"{test_name} - {exact_rule}")
+        matching_rules.append(exact_rule)
+
+    for name_re, data in known_broken_tests["pattern"].items():
+        if name_re.fullmatch(test_name):
+            print(f"{name_re} - {data}")
+            matching_rules.append(data)
+
+    if not matching_rules:
+        return False
+
+    def matches_substring(substring, log, is_regex):
+        if log is None:
+            return False
+        if is_regex:
+            return bool(substring.search(log))
+        return substring in log
+
+    for rule_data in matching_rules:
+        if rule_data.get("check_types") and not any(
+            ct in test_options_string for ct in rule_data["check_types"]
+        ):
+            print(
+                f"Check types didn't match: '{rule_data['check_types']}' not in '{test_options_string}'"
+            )
+            continue  # check_types didn't match → skip rule
 
-    with open(file_path, "r", encoding="utf-8") as skip_list_file:
-        skip_list_tests = json.load(skip_list_file)
-    return skip_list_tests
+        is_regex = rule_data.get("regex", False)
+        not_message = rule_data.get("not_message")
+        if not_message and matches_substring(not_message, test_logs, is_regex):
+            print(f"Skip rule: Not message matched: '{rule_data['not_message']}'")
+            continue  # not_message matched → skip rule
+        message = rule_data.get("message")
+        if message and not matches_substring(message, test_logs, is_regex):
+            print(f"Skip rule: Message didn't match: '{rule_data['message']}'")
+            continue
+
+        print(f"Test {test_name} matched rule: {rule_data}")
+        return rule_data["reason"]
+
+    return False
 
 
 class FTResultsProcessor:
@@ -75,7 +163,7 @@ def _process_test_output(self):
         test_results = []
         test_end = True
 
-        known_broken_tests = get_broken_tests_list()
+        known_broken_tests = get_broken_tests_rules()
 
         with open(self.tests_output_file, "r", encoding="utf-8") as test_file:
             for line in test_file:
@@ -161,34 +249,19 @@ def _process_test_output(self):
                 )
 
                 if test[1] == "FAIL":
-                    broken_message = None
-                    if test[0] in known_broken_tests.keys():
-                        message = known_broken_tests[test[0]].get("message")
-                        check_types = known_broken_tests[test[0]].get("check_types")
-                        if check_types and not any(
-                            check_type in test_options_string
-                            for check_type in check_types
-                        ):
-                            broken_message = None
-                        elif message:
-                            if message in test_results_[-1].info:
-                                broken_message = (
-                                    f"\nMarked as broken, matched message: '{message}'"
-                                )
-                        else:
-                            broken_message = f"\nMarked as broken, no message specified"
-
-                        if broken_message and check_types:
-                            broken_message += (
-                                f", matched one or more check types {check_types}"
-                            )
+                    broken_message = test_is_known_fail(
+                        test[0],
+                        test_results_[-1].info,
+                        known_broken_tests,
+                        test_options_string,
+                    )
 
                     if broken_message:
                         broken += 1
                         failed -= 1
                         test_results_[-1].set_status(Result.StatusExtended.BROKEN)
                         test_results_[-1].set_label(Result.Label.BROKEN)
-                        test_results_[-1].info += broken_message
+                        test_results_[-1].info += "\nMarked as broken: " + broken_message
 
             except Exception as e:
                 print(f"ERROR: Failed to parse test results: {test}")