88from functools import lru_cache
99from glob import glob
1010import urllib .parse
11+ import re
1112
1213import pandas as pd
1314from jinja2 import Environment , FileSystemLoader
1415import requests
1516from clickhouse_driver import Client
1617import boto3
1718from botocore .exceptions import NoCredentialsError
19+ import yaml
20+
1821
1922DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
2023DATABASE_USER_VAR = "CLICKHOUSE_TEST_STAT_LOGIN"
@@ -166,6 +169,58 @@ def get_checks_fails(client: Client, commit_sha: str, branch_name: str):
166169 return client .query_dataframe (query )
167170
168171
172+ def get_broken_tests_rules (broken_tests_file_path ):
173+ with open (broken_tests_file_path , "r" , encoding = "utf-8" ) as broken_tests_file :
174+ broken_tests = yaml .safe_load (broken_tests_file )
175+
176+ compiled_rules = {"exact" : {}, "pattern" : {}}
177+
178+ for test in broken_tests :
179+ regex = test .get ("regex" ) is True
180+ rule = {
181+ "reason" : test ["reason" ],
182+ }
183+
184+ if test .get ("check_types" ):
185+ rule ["check_types" ] = test ["check_types" ]
186+
187+ if regex :
188+ rule ["regex" ] = True
189+ compiled_rules ["pattern" ][re .compile (test ["name" ])] = rule
190+ else :
191+ compiled_rules ["exact" ][test ["name" ]] = rule
192+
193+ return compiled_rules
194+
195+
196+ def get_known_fail_reason (test_name : str , check_name : str , known_fails : dict ):
197+ """
198+ Returns the reason why a test is known to fail based on its name and build context.
199+
200+ - Exact-name rules are checked first.
201+ - Pattern-name rules are checked next (first match wins).
202+ - Message/not_message conditions are ignored.
203+ """
204+ # 1. Exact-name rules
205+ rule_data = known_fails ["exact" ].get (test_name )
206+ if rule_data :
207+ if any (
208+ check_type in check_name for check_type in rule_data .get ("check_types" , [])
209+ ):
210+ return rule_data ["reason" ]
211+
212+ # 2. Pattern-name rules
213+ for name_re , rule_data in known_fails ["pattern" ].items ():
214+ if name_re .fullmatch (test_name ):
215+ if any (
216+ check_type in check_name
217+ for check_type in rule_data .get ("check_types" , [])
218+ ):
219+ return rule_data ["reason" ]
220+
221+ return "No reason given"
222+
223+
169224def get_checks_known_fails (
170225 client : Client , commit_sha : str , branch_name : str , known_fails : dict
171226):
@@ -189,7 +244,6 @@ def get_checks_known_fails(
189244 GROUP BY check_name, test_name, report_url, task_url
190245 )
191246 WHERE test_status='BROKEN'
192- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
193247 ORDER BY job_name, test_name
194248 """
195249
@@ -198,10 +252,11 @@ def get_checks_known_fails(
198252 df .insert (
199253 len (df .columns ) - 1 ,
200254 "reason" ,
201- df ["test_name" ]
202- .astype (str )
203- .apply (
204- lambda test_name : known_fails [test_name ].get ("reason" , "No reason given" )
255+ df .apply (
256+ lambda row : get_known_fail_reason (
257+ row ["test_name" ], row ["check_name" ], known_fails
258+ ),
259+ axis = 1 ,
205260 ),
206261 )
207262
@@ -654,7 +709,7 @@ def create_workflow_report(
654709 pr_number : int = None ,
655710 commit_sha : str = None ,
656711 no_upload : bool = False ,
657- known_fails : str = None ,
712+ known_fails_file_path : str = None ,
658713 check_cves : bool = False ,
659714 mark_preview : bool = False ,
660715) -> str :
@@ -710,15 +765,12 @@ def create_workflow_report(
710765 # This might occur when run in preview mode.
711766 cves_not_checked = not check_cves or fail_results ["docker_images_cves" ] is ...
712767
713- if known_fails :
714- if not os .path .exists (known_fails ):
715- print (f"Known fails file { known_fails } not found." )
716- exit (1 )
717-
718- with open (known_fails ) as f :
719- known_fails = json .load (f )
768+ if known_fails_file_path :
769+ if not os .path .exists (known_fails_file_path ):
770+ print (f"WARNING:Known fails file { known_fails_file_path } not found." )
771+ else :
772+ known_fails = get_broken_tests_rules (known_fails_file_path )
720773
721- if known_fails :
722774 fail_results ["checks_known_fails" ] = get_checks_known_fails (
723775 db_client , commit_sha , branch_name , known_fails
724776 )
0 commit comments