88from functools import lru_cache
99from glob import glob
1010import urllib .parse
11+ import re
1112
1213import pandas as pd
1314from jinja2 import Environment , FileSystemLoader
1415import requests
1516from clickhouse_driver import Client
1617import boto3
1718from botocore .exceptions import NoCredentialsError
19+ import yaml
20+
1821
1922DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
2023DATABASE_USER_VAR = "CLICKHOUSE_TEST_STAT_LOGIN"
@@ -119,13 +122,10 @@ def get_pr_info_from_number(pr_number: str) -> dict:
119122 return response .json ()
120123
121124
122- @lru_cache
123- def get_run_details (run_url : str ) -> dict :
125+ def get_run_details (run_id : str ) -> dict :
124126 """
125127 Fetch run details for a given run URL.
126128 """
127- run_id = run_url .split ("/" )[- 1 ]
128-
129129 headers = {
130130 "Authorization" : f"token { GITHUB_TOKEN } " ,
131131 "Accept" : "application/vnd.github.v3+json" ,
@@ -167,6 +167,59 @@ def get_checks_fails(client: Client, commit_sha: str, branch_name: str):
167167 return client .query_dataframe (query )
168168
169169
170+ def get_broken_tests_rules (broken_tests_file_path ):
171+ with open (broken_tests_file_path , "r" , encoding = "utf-8" ) as broken_tests_file :
172+ broken_tests = yaml .safe_load (broken_tests_file )
173+
174+ compiled_rules = {"exact" : {}, "pattern" : {}}
175+
176+ for test in broken_tests :
177+ regex = test .get ("regex" ) is True
178+ rule = {
179+ "reason" : test ["reason" ],
180+ }
181+
182+ if test .get ("check_types" ):
183+ rule ["check_types" ] = test ["check_types" ]
184+
185+ if regex :
186+ rule ["regex" ] = True
187+ compiled_rules ["pattern" ][re .compile (test ["name" ])] = rule
188+ else :
189+ compiled_rules ["exact" ][test ["name" ]] = rule
190+
191+ return compiled_rules
192+
193+
194+ def get_known_fail_reason (test_name : str , check_name : str , known_fails : dict ):
195+ """
196+ Returns the reason why a test is known to fail based on its name and build context.
197+
198+ - Exact-name rules are checked first.
199+ - Pattern-name rules are checked next (first match wins).
200+ - Message/not_message conditions are ignored.
201+ """
202+ # 1. Exact-name rules
203+ rule_data = known_fails ["exact" ].get (test_name )
204+ if rule_data :
205+ check_types = rule_data .get ("check_types" , [])
206+ if not check_types or any (
207+ check_type in check_name for check_type in check_types
208+ ):
209+ return rule_data ["reason" ]
210+
211+ # 2. Pattern-name rules
212+ for name_re , rule_data in known_fails ["pattern" ].items ():
213+ if name_re .fullmatch (test_name ):
214+ check_types = rule_data .get ("check_types" , [])
215+ if not check_types or any (
216+ check_type in check_name for check_type in check_types
217+ ):
218+ return rule_data ["reason" ]
219+
220+ return "No reason given"
221+
222+
170223def get_checks_known_fails (
171224 client : Client , commit_sha : str , branch_name : str , known_fails : dict
172225):
@@ -190,19 +243,22 @@ def get_checks_known_fails(
190243 GROUP BY check_name, test_name, report_url, task_url
191244 )
192245 WHERE test_status='BROKEN'
193- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
194246 ORDER BY job_name, test_name
195247 """
196248
197249 df = client .query_dataframe (query )
198250
251+ if df .shape [0 ] == 0 :
252+ return df
253+
199254 df .insert (
200255 len (df .columns ) - 1 ,
201256 "reason" ,
202- df ["test_name" ]
203- .astype (str )
204- .apply (
205- lambda test_name : known_fails [test_name ].get ("reason" , "No reason given" )
257+ df .apply (
258+ lambda row : get_known_fail_reason (
259+ row ["test_name" ], row ["job_name" ], known_fails
260+ ),
261+ axis = 1 ,
206262 ),
207263 )
208264
@@ -655,19 +711,10 @@ def create_workflow_report(
655711 pr_number : int = None ,
656712 commit_sha : str = None ,
657713 no_upload : bool = False ,
658- known_fails : str = None ,
714+ known_fails_file_path : str = None ,
659715 check_cves : bool = False ,
660716 mark_preview : bool = False ,
661717) -> str :
662- if pr_number is None or commit_sha is None :
663- run_details = get_run_details (actions_run_url )
664- if pr_number is None :
665- if len (run_details ["pull_requests" ]) > 0 :
666- pr_number = run_details ["pull_requests" ][0 ]["number" ]
667- else :
668- pr_number = 0
669- if commit_sha is None :
670- commit_sha = run_details ["head_commit" ]["id" ]
671718
672719 host = os .getenv (DATABASE_HOST_VAR )
673720 if not host :
@@ -683,6 +730,19 @@ def create_workflow_report(
683730 if not all ([host , user , password , GITHUB_TOKEN ]):
684731 raise Exception ("Required environment variables are not set" )
685732
733+ run_id = actions_run_url .split ("/" )[- 1 ]
734+
735+ run_details = get_run_details (run_id )
736+ branch_name = run_details .get ("head_branch" , "unknown branch" )
737+ if pr_number is None or commit_sha is None :
738+ if pr_number is None :
739+ if len (run_details ["pull_requests" ]) > 0 :
740+ pr_number = run_details ["pull_requests" ][0 ]["number" ]
741+ else :
742+ pr_number = 0
743+ if commit_sha is None :
744+ commit_sha = run_details ["head_commit" ]["id" ]
745+
686746 db_client = Client (
687747 host = host ,
688748 user = user ,
@@ -693,9 +753,6 @@ def create_workflow_report(
693753 settings = {"use_numpy" : True },
694754 )
695755
696- run_details = get_run_details (actions_run_url )
697- branch_name = run_details .get ("head_branch" , "unknown branch" )
698-
699756 fail_results = {
700757 "job_statuses" : get_commit_statuses (commit_sha ),
701758 "checks_fails" : get_checks_fails (db_client , commit_sha , branch_name ),
@@ -712,15 +769,12 @@ def create_workflow_report(
712769 # This might occur when run in preview mode.
713770 cves_not_checked = not check_cves or fail_results ["docker_images_cves" ] is ...
714771
715- if known_fails :
716- if not os .path .exists (known_fails ):
717- print (f"Known fails file { known_fails } not found." )
718- exit (1 )
719-
720- with open (known_fails ) as f :
721- known_fails = json .load (f )
772+ if known_fails_file_path :
773+ if not os .path .exists (known_fails_file_path ):
774+ print (f"WARNING:Known fails file { known_fails_file_path } not found." )
775+ else :
776+ known_fails = get_broken_tests_rules (known_fails_file_path )
722777
723- if known_fails :
724778 fail_results ["checks_known_fails" ] = get_checks_known_fails (
725779 db_client , commit_sha , branch_name , known_fails
726780 )
@@ -755,13 +809,10 @@ def create_workflow_report(
755809 .sum ()
756810 )
757811
758- # Set up the Jinja2 environment
759- template_dir = os .path .dirname (__file__ )
760-
761812 # Load the template
762- template = Environment (loader = FileSystemLoader ( template_dir )). get_template (
763- "ci_run_report.html.jinja"
764- )
813+ template = Environment (
814+ loader = FileSystemLoader ( os . path . dirname ( __file__ ))
815+ ). get_template ( "ci_run_report.html.jinja" )
765816
766817 # Define the context for rendering
767818 context = {
@@ -770,7 +821,7 @@ def create_workflow_report(
770821 "s3_bucket" : S3_BUCKET ,
771822 "pr_info_html" : pr_info_html ,
772823 "pr_number" : pr_number ,
773- "workflow_id" : actions_run_url . split ( "/" )[ - 1 ] ,
824+ "workflow_id" : run_id ,
774825 "commit_sha" : commit_sha ,
775826 "base_sha" : "" if pr_number == 0 else pr_info .get ("base" , {}).get ("sha" ),
776827 "date" : f"{ datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S' )} UTC" ,
@@ -824,9 +875,11 @@ def create_workflow_report(
824875 exit (0 )
825876
826877 if pr_number == 0 :
827- report_destination_key = f"REFs/{ branch_name } /{ commit_sha } / { report_name } "
878+ report_destination_key = f"REFs/{ branch_name } /{ commit_sha } "
828879 else :
829- report_destination_key = f"PRs/{ pr_number } /{ commit_sha } /{ report_name } "
880+ report_destination_key = f"PRs/{ pr_number } /{ commit_sha } "
881+
882+ report_destination_key += f"/{ run_id } /{ report_name } "
830883
831884 # Upload the report to S3
832885 s3_client = boto3 .client ("s3" , endpoint_url = os .getenv ("S3_URL" ))
0 commit comments