@@ -253,11 +253,17 @@ def get_commit_statuses(sha: str) -> pd.DataFrame:
253253 for item in all_data
254254 ]
255255
256- return (
257- pd .DataFrame (parsed )
258- .sort_values (by = ["job_status" , "job_name" ], ascending = [True , True ])
259- .reset_index (drop = True )
260- )
256+ # Create DataFrame
257+ df = pd .DataFrame (parsed )
258+
259+ # Drop duplicates keeping the first occurrence (newest status for each context)
260+ # GitHub returns statuses in reverse chronological order
261+ df = df .drop_duplicates (subset = ["job_name" ], keep = "first" )
262+
263+ # Sort by status and job name
264+ return df .sort_values (
265+ by = ["job_status" , "job_name" ], ascending = [True , True ]
266+ ).reset_index (drop = True )
261267
262268
263269def get_pr_info_from_number (pr_number : str ) -> dict :
@@ -291,28 +297,50 @@ def get_checks_fails(client: Client, job_url: str):
291297 Get tests that did not succeed for the given job URL.
292298 Exclude checks that have status 'error' as they are counted in get_checks_errors.
293299 """
294- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
295- query = f"""SELECT { columns } FROM `gh-data`.checks
296- WHERE task_url LIKE '{ job_url } %'
297- AND test_status IN ('FAIL', 'ERROR')
298- AND check_status!='error'
299- ORDER BY check_name, test_name
300- """
300+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
301+ FROM (
302+ SELECT
303+ argMax(check_status, check_start_time) as job_status,
304+ check_name as job_name,
305+ argMax(test_status, check_start_time) as status,
306+ test_name,
307+ report_url as results_link,
308+ task_url
309+ FROM `gh-data`.checks
310+ GROUP BY check_name, test_name, report_url, task_url
311+ )
312+ WHERE task_url LIKE '{ job_url } %'
313+ AND test_status IN ('FAIL', 'ERROR')
314+ AND job_status!='error'
315+ ORDER BY job_name, test_name
316+ """
301317 return client .query_dataframe (query )
302318
303319
304320def get_checks_known_fails (client : Client , job_url : str , known_fails : dict ):
305321 """
306322 Get tests that are known to fail for the given job URL.
307323 """
308- assert len (known_fails ) > 0 , "cannot query the database with empty known fails"
309- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
310- query = f"""SELECT { columns } FROM `gh-data`.checks
311- WHERE task_url LIKE '{ job_url } %'
312- AND test_status='BROKEN'
313- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
314- ORDER BY test_name, check_name
315- """
324+ if len (known_fails ) == 0 :
325+ return pd .DataFrame ()
326+
327+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
328+ FROM (
329+ SELECT
330+ argMax(check_status, check_start_time) as job_status,
331+ check_name as job_name,
332+ argMax(test_status, check_start_time) as status,
333+ test_name,
334+ report_url as results_link,
335+ task_url
336+ FROM `gh-data`.checks
337+ GROUP BY check_name, test_name, report_url, task_url
338+ )
339+ WHERE task_url LIKE '{ job_url } %'
340+ AND test_status='BROKEN'
341+ AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
342+ ORDER BY job_name, test_name
343+ """
316344
317345 df = client .query_dataframe (query )
318346
@@ -333,12 +361,22 @@ def get_checks_errors(client: Client, job_url: str):
333361 """
334362 Get checks that have status 'error' for the given job URL.
335363 """
336- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
337- query = f"""SELECT { columns } FROM `gh-data`.checks
338- WHERE task_url LIKE '{ job_url } %'
339- AND check_status=='error'
340- ORDER BY check_name, test_name
341- """
364+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
365+ FROM (
366+ SELECT
367+ argMax(check_status, check_start_time) as job_status,
368+ check_name as job_name,
369+ argMax(test_status, check_start_time) as status,
370+ test_name,
371+ report_url as results_link,
372+ task_url
373+ FROM `gh-data`.checks
374+ GROUP BY check_name, test_name, report_url, task_url
375+ )
376+ WHERE task_url LIKE '{ job_url } %'
377+ AND job_status=='error'
378+ ORDER BY job_name, test_name
379+ """
342380 return client .query_dataframe (query )
343381
344382
0 commit comments