Skip to content

Commit 5fb4c04

Browse files
authored
[CH] Migrate HUD /reliability page (#5818)
TSIA. This one is a straight forward migration ### Testing https://torchci-git-fork-huydhn-ch-migrate-commit-r-6df3aa-fbopensource.vercel.app/reliability/pytorch/pytorch
1 parent 5b770fb commit 5fb4c04

File tree

5 files changed

+107
-139
lines changed

5 files changed

+107
-139
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"startTime": "DateTime64(3)",
33
"stopTime": "DateTime64(3)",
4-
"workflowNames": "String"
5-
}
4+
"workflowNames": "Array(String)"
5+
}

torchci/clickhouse_queries/master_commit_red_jobs/query.sql

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,55 @@
1-
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
1+
-- This query is used to show failures on https://hud.pytorch.org/reliability/pytorch/pytorch
22
WITH all_jobs AS (
33
SELECT
4-
push._event_time AS time,
5-
job.conclusion AS conclusion,
6-
push.head_commit.id AS sha,
7-
push.head_commit.author.username AS author,
4+
p.head_commit. 'timestamp' AS time,
5+
j.conclusion AS conclusion,
6+
p.head_commit. 'id' AS sha,
7+
p.head_commit. 'author'.'username' AS author,
88
CONCAT(
9-
workflow.name,
9+
w.name,
1010
' / ',
11-
ELEMENT_AT(SPLIT(job.name, ' / '), 1),
12-
CONCAT(' / ', ELEMENT_AT(SPLIT(ELEMENT_AT(SPLIT(job.name, ' / '), 2), ', '), 1))
11+
arrayElement(splitByString(' / ', j.name), 1),
12+
' / ',
13+
arrayElement(
14+
splitByString(', ', arrayElement(splitByString(' / ', j.name), 2)),
15+
1
16+
)
1317
) AS name,
1418
(
1519
CASE
16-
WHEN push.head_commit.author.username = 'pytorchmergebot' THEN push.head_commit.message
17-
ELSE NULL
20+
WHEN p.head_commit. 'author'.'username' = 'pytorchmergebot' THEN p.head_commit. 'message'
21+
ELSE ''
1822
END
19-
) AS body,
23+
) AS body
2024
FROM
21-
commons.workflow_job job
22-
JOIN commons.workflow_run workflow ON workflow.id = job.run_id
23-
JOIN push on workflow.head_commit.id = push.head_commit.id
25+
default .workflow_job j FINAL
26+
JOIN default .workflow_run w FINAL ON w.id = j.run_id
27+
JOIN default .push p FINAL on w.head_commit. 'id' = p.head_commit. 'id'
2428
WHERE
25-
job.name != 'ciflow_should_run'
26-
AND job.name != 'generate-test-matrix'
27-
AND job.name NOT LIKE '%rerun_disabled_tests%'
28-
AND job.name NOT LIKE '%filter%'
29-
AND job.name NOT LIKE '%unstable%'
30-
AND job.name LIKE '%/%'
31-
AND ARRAY_CONTAINS(SPLIT(:workflowNames, ','), LOWER(workflow.name))
32-
AND workflow.event != 'workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
33-
AND push.ref = 'refs/heads/main'
34-
AND push.repository.owner.name = 'pytorch'
35-
AND push.repository.name = 'pytorch'
36-
AND push._event_time >= PARSE_DATETIME_ISO8601(:startTime)
37-
AND push._event_time < PARSE_DATETIME_ISO8601(:stopTime)
29+
j.name != 'ciflow_should_run'
30+
AND j.name != 'generate-test-matrix'
31+
AND j.name NOT LIKE '%rerun_disabled_tests%'
32+
AND j.name NOT LIKE '%filter%'
33+
AND j.name NOT LIKE '%unstable%'
34+
AND j.name LIKE '%/%'
35+
AND has({workflowNames: Array(String) }, lower(w.name))
36+
AND w.event != 'workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
37+
AND p.ref = 'refs/heads/main'
38+
AND p.repository. 'owner'.'name' = 'pytorch'
39+
AND p.repository. 'name' = 'pytorch'
40+
AND p.head_commit. 'timestamp' >= {startTime: DateTime64(3) }
41+
AND p.head_commit. 'timestamp' < {stopTime: DateTime64(3) }
3842
),
3943
filtered_jobs AS (
4044
SELECT
4145
time,
4246
sha,
43-
IF (name LIKE '%(%' AND name NOT LIKE '%)%', CONCAT(name, ')'), name) AS name,
47+
IF (
48+
name LIKE '%(%'
49+
AND name NOT LIKE '%)%',
50+
CONCAT(name, ')'),
51+
name
52+
) AS name,
4453
CAST(
4554
SUM(
4655
CASE
@@ -69,18 +78,8 @@ reds AS (
6978
SELECT
7079
time,
7180
sha,
72-
ARRAY_REMOVE(
73-
ARRAY_AGG(
74-
IF (any_red > 0, name)
75-
),
76-
NULL
77-
) AS failures,
78-
ARRAY_REMOVE(
79-
ARRAY_AGG(
80-
IF (any_red = 0, name)
81-
),
82-
NULL
83-
) AS successes,
81+
arrayFilter(x -> x != '', groupArray(IF (any_red > 0, name, ''))) AS failures,
82+
arrayFilter(x -> x != '', groupArray(IF (any_red = 0, name, ''))) AS successes,
8483
author,
8584
body
8685
FROM

torchci/clickhouse_queries/master_commit_red_percent_groups/params.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"granularity": "String",
33
"startTime": "DateTime64(3)",
44
"stopTime": "DateTime64(3)",
5-
"workflowNames": "String"
6-
}
5+
"workflowNames": "Array(String)"
6+
}

torchci/clickhouse_queries/master_commit_red_percent_groups/query.sql

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,48 @@
1-
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
1+
-- This query is used to show failures chart on https://hud.pytorch.org/reliability/pytorch/pytorch
22
WITH all_jobs AS (
33
SELECT
4-
push._event_time AS time,
5-
job.conclusion AS conclusion,
6-
push.head_commit.id AS sha,
4+
p.head_commit. 'timestamp' AS time,
5+
j.conclusion AS conclusion,
6+
p.head_commit. 'id' AS sha,
77
CONCAT(
8-
workflow.name,
8+
w.name,
99
' / ',
10-
ELEMENT_AT(SPLIT(job.name, ' / '), 1),
11-
CONCAT(' / ', ELEMENT_AT(SPLIT(ELEMENT_AT(SPLIT(job.name, ' / '), 2), ', '), 1))
12-
) AS name,
10+
arrayElement(splitByString(' / ', j.name), 1),
11+
' / ',
12+
arrayElement(
13+
splitByString(', ', arrayElement(splitByString(' / ', j.name), 2)),
14+
1
15+
)
16+
) AS name
1317
FROM
14-
commons.workflow_job job
15-
JOIN commons.workflow_run workflow ON workflow.id = job.run_id
16-
JOIN push on workflow.head_commit.id = push.head_commit.id
18+
default .workflow_job j FINAL
19+
JOIN default .workflow_run w FINAL ON w.id = j.run_id
20+
JOIN default .push p FINAL on w.head_commit. 'id' = p.head_commit. 'id'
1721
WHERE
18-
job.name != 'ciflow_should_run'
19-
AND job.name != 'generate-test-matrix'
20-
AND job.name NOT LIKE '%rerun_disabled_tests%'
21-
AND job.name NOT LIKE '%filter%'
22-
AND job.name NOT LIKE '%unstable%'
23-
AND job.name LIKE '%/%'
24-
AND ARRAY_CONTAINS(SPLIT(:workflowNames, ','), LOWER(workflow.name))
25-
AND workflow.event != 'workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
26-
AND push.ref = 'refs/heads/main'
27-
AND push.repository.owner.name = 'pytorch'
28-
AND push.repository.name = 'pytorch'
29-
AND push._event_time >= PARSE_DATETIME_ISO8601(:startTime)
30-
AND push._event_time < PARSE_DATETIME_ISO8601(:stopTime)
22+
j.name != 'ciflow_should_run'
23+
AND j.name != 'generate-test-matrix'
24+
AND j.name NOT LIKE '%rerun_disabled_tests%'
25+
AND j.name NOT LIKE '%filter%'
26+
AND j.name NOT LIKE '%unstable%'
27+
AND j.name LIKE '%/%'
28+
AND has({workflowNames: Array(String) }, lower(w.name))
29+
AND w.event != 'workflow_run' -- Filter out worflow_run-triggered jobs, which have nothing to do with the SHA
30+
AND p.ref = 'refs/heads/main'
31+
AND p.repository. 'owner'.'name' = 'pytorch'
32+
AND p.repository. 'name' = 'pytorch'
33+
AND p.head_commit. 'timestamp' >= {startTime: DateTime64(3) }
34+
AND p.head_commit. 'timestamp' < {stopTime: DateTime64(3) }
3135
),
3236
reds AS(
3337
SELECT
3438
time,
3539
sha,
36-
IF (name LIKE '%(%' AND name NOT LIKE '%)%', CONCAT(name, ')'), name) AS name,
40+
IF (
41+
name LIKE '%(%'
42+
AND name NOT LIKE '%)%',
43+
CONCAT(name, ')'),
44+
name
45+
) AS name,
3746
CAST(
3847
SUM(
3948
CASE
@@ -59,9 +68,9 @@ reds AS(
5968
),
6069
reds_percentage AS (
6170
SELECT
62-
FORMAT_TIMESTAMP('%Y-%m-%d', DATE_TRUNC(:granularity, time)) AS granularity_bucket,
71+
formatDateTime(DATE_TRUNC({granularity: String }, time), '%Y-%m-%d') AS granularity_bucket,
6372
name,
64-
ROUND(AVG(any_red) * 100, 2) AS red,
73+
ROUND(AVG(any_red) * 100, 2) AS red
6574
FROM
6675
reds
6776
GROUP BY

torchci/pages/reliability/[repoOwner]/[repoName]/[[...page]].tsx

Lines changed: 28 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import { EChartsOption } from "echarts";
1717
import ReactECharts from "echarts-for-react";
1818
import { fetcher } from "lib/GeneralUtils";
1919
import { approximateFailureByTypePercent } from "lib/metricUtils";
20-
import { RocksetParam } from "lib/rockset";
2120
import { JobAnnotation } from "lib/types";
2221
import { useRouter } from "next/router";
2322
import { useCallback, useState } from "react";
@@ -36,21 +35,19 @@ const URL_PREFIX = `/reliability/pytorch/pytorch?jobName=`;
3635
function GroupReliabilityPanel({
3736
title,
3837
queryName,
39-
queryCollection,
4038
queryParams,
4139
metricHeaderName,
4240
metricName,
4341
filter,
4442
}: {
4543
title: string;
4644
queryName: string;
47-
queryCollection: string;
48-
queryParams: RocksetParam[];
45+
queryParams: { [key: string]: any };
4946
metricHeaderName: string;
5047
metricName: string;
5148
filter: any;
5249
}) {
53-
const url = `/api/query/${queryCollection}/${queryName}?parameters=${encodeURIComponent(
50+
const url = `/api/clickhouse/${queryName}?parameters=${encodeURIComponent(
5451
JSON.stringify(queryParams)
5552
)}`;
5653

@@ -201,14 +198,14 @@ function Graphs({
201198
filter,
202199
toggleFilter,
203200
}: {
204-
queryParams: RocksetParam[];
201+
queryParams: { [key: string]: any };
205202
granularity: Granularity;
206203
checkboxRef: any;
207204
filter: any;
208205
toggleFilter: any;
209206
}) {
210207
const queryName = "master_commit_red_percent_groups";
211-
const url = `/api/query/metrics/${queryName}?parameters=${encodeURIComponent(
208+
const url = `/api/clickhouse/${queryName}?parameters=${encodeURIComponent(
212209
JSON.stringify(queryParams)
213210
)}`;
214211
const { data, error } = useSWR(url, fetcher, {
@@ -228,13 +225,9 @@ function Graphs({
228225
}
229226

230227
// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
231-
// align with the data we get from Rockset
232-
const startTime = dayjs(
233-
queryParams.find((p) => p.name === "startTime")?.value
234-
).startOf(granularity);
235-
const stopTime = dayjs(
236-
queryParams.find((p) => p.name === "stopTime")?.value
237-
).startOf(granularity);
228+
// align with the data we get from the database
229+
const startTime = dayjs(queryParams["startTime"]).startOf(granularity);
230+
const stopTime = dayjs(queryParams["stopTime"]).startOf(granularity);
238231

239232
const redFieldName = "red";
240233
const timeFieldName = "granularity_bucket";
@@ -314,28 +307,11 @@ export default function Page() {
314307
setFilter(next);
315308
}
316309

317-
const queryParams: RocksetParam[] = [
318-
{
319-
name: "timezone",
320-
type: "string",
321-
value: Intl.DateTimeFormat().resolvedOptions().timeZone,
322-
},
323-
{
324-
name: "startTime",
325-
type: "string",
326-
value: startTime,
327-
},
328-
{
329-
name: "stopTime",
330-
type: "string",
331-
value: stopTime,
332-
},
333-
{
334-
name: "granularity",
335-
type: "string",
336-
value: granularity,
337-
},
338-
];
310+
const queryParams: { [key: string]: any } = {
311+
granularity: granularity,
312+
startTime: dayjs(startTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
313+
stopTime: dayjs(stopTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
314+
};
339315

340316
const allWorkflows =
341317
PRIMARY_WORKFLOWS.concat(SECONDARY_WORKFLOWS).concat(UNSTABLE_WORKFLOWS);
@@ -348,7 +324,6 @@ export default function Page() {
348324
}, [jobName]);
349325

350326
const queryName = "master_commit_red_jobs";
351-
const queryCollection = "commons";
352327
const metricName = "red";
353328
const metricHeaderName = "Failures %";
354329

@@ -374,13 +349,10 @@ export default function Page() {
374349

375350
<Grid item xs={6} height={ROW_HEIGHT + ROW_GAP}>
376351
<Graphs
377-
queryParams={queryParams.concat([
378-
{
379-
name: "workflowNames",
380-
type: "string",
381-
value: allWorkflows.join(","),
382-
},
383-
])}
352+
queryParams={{
353+
workflowNames: allWorkflows,
354+
...queryParams,
355+
}}
384356
granularity={granularity}
385357
checkboxRef={checkboxRef}
386358
filter={filter}
@@ -393,14 +365,10 @@ export default function Page() {
393365
<GroupReliabilityPanel
394366
title={`Primary jobs (${PRIMARY_WORKFLOWS.join(", ")})`}
395367
queryName={queryName}
396-
queryCollection={queryCollection}
397-
queryParams={queryParams.concat([
398-
{
399-
name: "workflowNames",
400-
type: "string",
401-
value: PRIMARY_WORKFLOWS.join(","),
402-
},
403-
])}
368+
queryParams={{
369+
workflowNames: PRIMARY_WORKFLOWS,
370+
...queryParams,
371+
}}
404372
metricName={metricName}
405373
metricHeaderName={metricHeaderName}
406374
filter={filter}
@@ -411,14 +379,10 @@ export default function Page() {
411379
<GroupReliabilityPanel
412380
title={`Secondary jobs (${SECONDARY_WORKFLOWS.join(", ")})`}
413381
queryName={queryName}
414-
queryCollection={queryCollection}
415-
queryParams={queryParams.concat([
416-
{
417-
name: "workflowNames",
418-
type: "string",
419-
value: SECONDARY_WORKFLOWS.join(","),
420-
},
421-
])}
382+
queryParams={{
383+
workflowNames: SECONDARY_WORKFLOWS,
384+
...queryParams,
385+
}}
422386
metricName={metricName}
423387
metricHeaderName={metricHeaderName}
424388
filter={filter}
@@ -429,14 +393,10 @@ export default function Page() {
429393
<GroupReliabilityPanel
430394
title={"Unstable jobs"}
431395
queryName={queryName}
432-
queryCollection={queryCollection}
433-
queryParams={queryParams.concat([
434-
{
435-
name: "workflowNames",
436-
type: "string",
437-
value: UNSTABLE_WORKFLOWS.join(","),
438-
},
439-
])}
396+
queryParams={{
397+
workflowNames: UNSTABLE_WORKFLOWS,
398+
...queryParams,
399+
}}
440400
metricName={metricName}
441401
metricHeaderName={metricHeaderName}
442402
filter={filter}

0 commit comments

Comments
 (0)