1- -- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
2- WITH all_jobs AS (
1+ -- TODO (huydhn): This query tracks the number of red commits on HUD KPIs page. This
2+ -- is not the most efficient query around both in term of speed and memory usage. So,
3+ -- a good BE task is to re-write this in a more efficient way, HUD code is also
4+ -- subjected to change if need be
5+ WITH join_with_workflow_run AS (
6+ -- Do the join with workflow_run, then workflow_job to avoid OOM
37 SELECT
4- push ._event_time as time ,
5- job .conclusion AS conclusion,
6- push .head_commit .id AS sha,
7- ROW_NUMBER() OVER(PARTITION BY job .name , push .head_commit .id ORDER BY job .run_attempt DESC ) AS row_num,
8+ w .id AS id,
9+ p .head_commit . ' timestamp' AS time ,
10+ p .head_commit . ' id' AS sha
811 FROM
9- push
10- JOIN commons .workflow_run workflow ON workflow .head_commit .id = push .head_commit .id
11- JOIN commons .workflow_job job ON workflow .id = job .run_id
12+ default .push p FINAL
13+ JOIN default .workflow_run w FINAL ON w .head_commit . ' id' = p .head_commit . ' id'
1214 WHERE
13- job .name != ' ciflow_should_run'
14- AND job .name != ' generate-test-matrix'
15- AND ( -- Limit it to workflows which block viable/strict upgrades
16- ARRAY_CONTAINS(SPLIT(:workflowNames, ' ,' ), LOWER (workflow .name ))
17- OR workflow .name like ' linux-binary%'
15+ (
16+ -- Limit it to workflows which block viable/strict upgrades
17+ has({workflowNames: Array(String) }, lower (w .name ))
18+ OR w .name like ' linux-binary%'
1819 )
19- AND job .name NOT LIKE ' %rerun_disabled_tests%'
20- AND job .name NOT LIKE ' %unstable%'
21- AND workflow .event != ' workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
22- AND push .ref IN (' refs/heads/master' , ' refs/heads/main' )
23- AND push .repository .owner .name = ' pytorch'
24- AND push .repository .name = ' pytorch'
25- AND push ._event_time >= PARSE_DATETIME_ISO8601(:startTime)
26- AND push ._event_time < PARSE_DATETIME_ISO8601(:stopTime)
27- UNION ALL
20+ AND w .event != ' workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
21+ AND p .ref = ' refs/heads/main'
22+ AND p .repository . ' owner' .' name' = ' pytorch'
23+ AND p .repository . ' name' = ' pytorch'
24+ AND p .head_commit . ' timestamp' >= {startTime: DateTime64(3 ) }
25+ AND p .head_commit . ' timestamp' < {stopTime: DateTime64(3 ) }
26+ ),
27+ all_jobs AS (
2828 SELECT
29- push ._event_time as time ,
30- CASE
31- WHEN job .job .status = ' failed' then ' failure'
32- WHEN job .job .status = ' canceled' then ' cancelled'
33- ELSE job .job .status
34- END AS conclusion,
35- push .head_commit .id AS sha,
36- ROW_NUMBER() OVER(PARTITION BY job .name , push .head_commit .id ORDER BY job .run_attempt DESC ) AS row_num,
29+ w .time AS time ,
30+ j .conclusion AS conclusion,
31+ w .sha AS sha,
32+ ROW_NUMBER() OVER(
33+ PARTITION BY j .name ,
34+ w .sha
35+ ORDER BY
36+ j .run_attempt DESC
37+ ) AS row_num
3738 FROM
38- circleci . job job
39- JOIN push ON job . pipeline . vcs . revision = push . head_commit .id
39+ join_with_workflow_run w
40+ JOIN default .workflow_job j FINAL ON w . id = j . run_id
4041 WHERE
41- push .ref IN (' refs/heads/master' , ' refs/heads/main' )
42- AND push .repository .owner .name = ' pytorch'
43- AND push .repository .name = ' pytorch'
44- AND push ._event_time >= PARSE_DATETIME_ISO8601(:startTime)
45- AND push ._event_time < PARSE_DATETIME_ISO8601(:stopTime)
42+ j .name != ' ciflow_should_run'
43+ AND j .name != ' generate-test-matrix'
44+ AND j .name NOT LIKE ' %rerun_disabled_tests%'
45+ AND j .name NOT LIKE ' %unstable%'
4646),
4747any_red AS (
4848 SELECT
49- FORMAT_TIMESTAMP( ' %Y-%m-%d' , DATE_TRUNC(:granularity, time ) ) AS granularity_bucket,
49+ formatDateTime(DATE_TRUNC({granularity: String }, time ), ' %Y-%m-%d' ) AS granularity_bucket,
5050 sha,
5151 CAST(
5252 SUM (
@@ -61,13 +61,16 @@ any_red AS (
6161 CAST(
6262 SUM (
6363 CASE
64- WHEN conclusion = ' failure' AND row_num = 1 THEN 1
65- WHEN conclusion = ' timed_out' AND row_num = 1 THEN 1
66- WHEN conclusion = ' cancelled' AND row_num = 1 THEN 1
64+ WHEN conclusion = ' failure'
65+ AND row_num = 1 THEN 1
66+ WHEN conclusion = ' timed_out'
67+ AND row_num = 1 THEN 1
68+ WHEN conclusion = ' cancelled'
69+ AND row_num = 1 THEN 1
6770 ELSE 0
6871 END
6972 ) > 0 AS int
70- ) AS broken_trunk_red,
73+ ) AS broken_trunk_red
7174 FROM
7275 all_jobs
7376 GROUP BY
@@ -80,24 +83,29 @@ any_red AS (
8083classified_red AS (
8184 SELECT
8285 granularity_bucket,
83- ARRAY_CREATE(
84- ARRAY_CREATE(' Broken trunk' , AVG (broken_trunk_red)),
85- ARRAY_CREATE(' Flaky' , AVG (all_red) - AVG (broken_trunk_red)),
86- ARRAY_CREATE(' Total' , AVG (all_red))
87- ) AS metrics,
86+ -- CH only allows data of the same type in the array
87+ arrayJoin(
88+ array(
89+ array(' Broken trunk' , toString(AVG (broken_trunk_red))),
90+ array(
91+ ' Flaky' ,
92+ toString(AVG (all_red) - AVG (broken_trunk_red))
93+ ),
94+ array(' Total' , toString(AVG (all_red)))
95+ )
96+ ) AS metrics
8897 FROM
8998 any_red
9099 GROUP BY
91100 granularity_bucket
92101),
93102avg_red AS (
94103 SELECT
95- classified_red . granularity_bucket ,
96- ELEMENT_AT( metrics . metric , 1 ) AS name,
97- ELEMENT_AT (metrics . metric , 2 ) AS metric,
104+ granularity_bucket,
105+ metrics[ 1 ] AS name,
106+ toFloat32 (metrics[ 2 ] ) AS metric
98107 FROM
99108 classified_red
100- CROSS JOIN UNNEST(classified_red .metrics AS metric) AS metrics
101109 ORDER BY
102110 granularity_bucket DESC
103111)
@@ -111,6 +119,6 @@ SELECT
111119 ORDER BY
112120 granularity_bucket ROWS 1 PRECEDING
113121 )
114- ) / 2 .0 AS metric,
122+ ) / 2 .0 AS metric
115123FROM
116124 avg_red
0 commit comments