Skip to content

Commit 28ccdef

Browse files
authored
Migrate LLM dashboards to v3 (#6057)
This migrates the queries from `oss_ci_benchmark_v2` to `oss_ci_benchmark_v3`. This will unblock the following updates: * Add delegation backend support for [ExecuTorch dashboard](https://hud.pytorch.org/benchmark/llms?repoName=pytorch%2Fexecutorch) instead of bundling it together with the model name `edsr coreml_all` (cc @guangy10) * Add LLM AO dashboard, whose data is only available in `oss_ci_benchmark_v3` (cc @jerryzh168) Some minor fixes that go with this: * Change the parameters to `benchmarks` and `models`. This is clearer than `filenames` and `names` * Remove `getJobId` param. Its origin is from the TorchInductor query when the job ID is a string. In `oss_ci_benchmark_v3`, the job is an UInt64, so there is no saving there. ### Testing * https://torchci-git-fork-huydhn-migrate-llm-dashboard-v3-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fpytorch * https://torchci-git-fork-huydhn-migrate-llm-dashboard-v3-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fexecutorch
1 parent 1577e6b commit 28ccdef

File tree

9 files changed

+193
-135
lines changed

9 files changed

+193
-135
lines changed

torchci/clickhouse_queries/oss_ci_benchmark_branches/params.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"deviceArch": "String",
33
"dtypes": "Array(String)",
44
"excludedMetrics": "Array(String)",
5-
"filenames": "Array(String)",
6-
"names": "Array(String)",
5+
"benchmarks": "Array(String)",
6+
"models": "Array(String)",
77
"repo": "String",
88
"startTime": "DateTime64(3)",
99
"stopTime": "DateTime64(3)"
Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,65 @@
11
-- This query is used to get the list of branches and commits used by different
22
-- OSS CI benchmark experiments. This powers HUD benchmarks dashboards
3+
WITH benchmarks AS (
4+
SELECT
5+
o.head_branch AS head_branch,
6+
o.head_sha AS head_sha,
7+
o.workflow_id AS id,
8+
IF(
9+
empty(o.runners),
10+
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
11+
tupleElement(o.runners [ 1 ], 'name')
12+
) AS device,
13+
IF(
14+
empty(o.runners),
15+
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
16+
tupleElement(o.runners [ 1 ], 'type')
17+
) AS arch,
18+
toStartOfDay(fromUnixTimestamp(o.timestamp)) AS event_time
19+
FROM
20+
benchmark.oss_ci_benchmark_v3 o
21+
WHERE
22+
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
23+
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
24+
AND o.repo = {repo: String }
25+
AND (
26+
has({benchmarks: Array(String) }, o.benchmark.name)
27+
OR empty({benchmarks: Array(String) })
28+
)
29+
AND (
30+
has({models: Array(String) }, o.model.name)
31+
OR empty({models: Array(String) })
32+
)
33+
AND (
34+
has({dtypes: Array(String) }, o.benchmark.dtype)
35+
OR empty({dtypes: Array(String) })
36+
)
37+
AND (
38+
NOT has({excludedMetrics: Array(String) }, o.metric.name)
39+
OR empty({excludedMetrics: Array(String) })
40+
)
41+
AND notEmpty(o.metric.name)
42+
AND notEmpty(o.benchmark.dtype)
43+
)
344
SELECT
4-
DISTINCT w.head_branch AS head_branch,
5-
w.head_sha,
6-
w.id,
7-
toStartOfDay(fromUnixTimestamp64Milli(o.timestamp)) AS event_time,
8-
o.filename
45+
DISTINCT replaceOne(head_branch, 'refs/heads/', '') AS head_branch,
46+
head_sha,
47+
id,
48+
event_time
949
FROM
10-
benchmark.oss_ci_benchmark_v2 o
11-
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
50+
benchmarks
1251
WHERE
13-
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
14-
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
15-
AND (
16-
has({filenames: Array(String) }, o.filename)
17-
OR empty({filenames: Array(String) })
18-
)
19-
AND (
20-
has({names: Array(String) }, o.name)
21-
OR empty({names: Array(String) })
22-
)
2352
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
24-
AND (
53+
(
2554
CONCAT(
26-
o.device,
55+
device,
2756
' (',
28-
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
57+
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
2958
')'
3059
) = {deviceArch: String }
3160
OR {deviceArch: String } = ''
3261
)
33-
AND (
34-
has({dtypes: Array(String) }, o.dtype)
35-
OR empty({dtypes: Array(String) })
36-
)
37-
AND (
38-
NOT has({excludedMetrics: Array(String) }, o.metric)
39-
OR empty({excludedMetrics: Array(String) })
40-
)
41-
AND notEmpty(o.metric)
42-
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
43-
AND notEmpty(o.dtype)
44-
AND notEmpty(o.device)
62+
AND notEmpty(device)
4563
ORDER BY
46-
w.head_branch,
64+
head_branch,
4765
event_time DESC

torchci/clickhouse_queries/oss_ci_benchmark_llms/params.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
"deviceArch": "String",
55
"dtypes": "Array(String)",
66
"excludedMetrics": "Array(String)",
7-
"filenames": "Array(String)",
8-
"getJobId": "Bool",
7+
"benchmarks": "Array(String)",
98
"granularity": "String",
10-
"names": "Array(String)",
9+
"models": "Array(String)",
1110
"repo": "String",
1211
"startTime": "DateTime64(3)",
1312
"stopTime": "DateTime64(3)"

torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,93 @@
11
--- This query is used to get the LLMs benchmark results from different experiments. It
22
--- queries the TPS and memory bandwidth for each model / quantization combos. This powers
33
--- the LLMs benchmark dashboard
4+
WITH benchmarks AS (
5+
SELECT
6+
replaceOne(o.head_branch, 'refs/heads/', '') AS head_branch,
7+
o.workflow_id AS workflow_id,
8+
o.job_id AS job_id,
9+
o.model.name AS model,
10+
o.model.backend AS backend,
11+
o.metric.name AS metric,
12+
floor(arrayAvg(o.metric.benchmark_values), 2) AS actual,
13+
floor(toFloat64(o.metric.target_value), 2) AS target,
14+
o.benchmark.dtype AS dtype,
15+
IF(
16+
empty(o.runners),
17+
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
18+
tupleElement(o.runners [ 1 ], 'name')
19+
) AS device,
20+
IF(
21+
empty(o.runners),
22+
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
23+
tupleElement(o.runners [ 1 ], 'type')
24+
) AS arch,
25+
DATE_TRUNC(
26+
{granularity: String },
27+
fromUnixTimestamp(o.timestamp)
28+
) AS granularity_bucket
29+
FROM
30+
benchmark.oss_ci_benchmark_v3 o
31+
WHERE
32+
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
33+
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
34+
AND o.repo = {repo: String }
35+
AND (
36+
has({commits: Array(String) }, o.head_sha)
37+
OR empty({commits: Array(String) })
38+
)
39+
AND (
40+
has({benchmarks: Array(String) }, o.benchmark.name)
41+
OR empty({benchmarks: Array(String) })
42+
)
43+
AND (
44+
has({models: Array(String) }, o.model.name)
45+
OR empty({models: Array(String) })
46+
)
47+
AND (
48+
has({dtypes: Array(String) }, o.benchmark.dtype)
49+
OR empty({dtypes: Array(String) })
50+
)
51+
AND (
52+
NOT has({excludedMetrics: Array(String) }, o.metric.name)
53+
OR empty({excludedMetrics: Array(String) })
54+
)
55+
AND notEmpty(o.metric.name)
56+
AND notEmpty(o.benchmark.dtype)
57+
)
458
SELECT
5-
DISTINCT o.workflow_id AS workflow_id,
6-
-- As the JSON response is pretty big, only return the field if it's needed
7-
IF({getJobId: Bool}, o.job_id, '') AS job_id,
8-
o.name,
9-
o.metric,
10-
floor(toFloat64(o.actual), 2) AS actual,
11-
floor(toFloat64(o.target), 2) AS target,
12-
DATE_TRUNC(
13-
{granularity: String },
14-
fromUnixTimestamp64Milli(o.timestamp)
15-
) AS granularity_bucket,
16-
o.dtype,
17-
o.device,
18-
-- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
19-
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) as arch
59+
DISTINCT workflow_id,
60+
job_id,
61+
CONCAT(model, ' ', backend) AS name,
62+
metric,
63+
actual,
64+
target,
65+
dtype,
66+
device,
67+
arch,
68+
granularity_bucket
2069
FROM
21-
benchmark.oss_ci_benchmark_v2 o
22-
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
70+
benchmarks
2371
WHERE
24-
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
25-
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
26-
AND (
27-
has({branches: Array(String) }, w.head_branch)
28-
OR empty({branches: Array(String) })
29-
)
30-
AND (
31-
has({commits: Array(String) }, w.head_sha)
32-
OR empty({commits: Array(String) })
33-
)
34-
AND (
35-
has({filenames: Array(String) }, o.filename)
36-
OR empty({filenames: Array(String) })
72+
(
73+
has({models: Array(String) }, CONCAT(model, ' ', backend))
74+
OR empty({models: Array(String) })
3775
)
3876
AND (
39-
has({names: Array(String) }, o.name)
40-
OR empty({names: Array(String) })
77+
has({branches: Array(String) }, head_branch)
78+
OR empty({branches: Array(String) })
4179
)
4280
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
4381
AND (
4482
CONCAT(
45-
o.device,
83+
device,
4684
' (',
47-
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
85+
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
4886
')'
4987
) = {deviceArch: String }
5088
OR {deviceArch: String } = ''
5189
)
52-
AND (
53-
has({dtypes: Array(String) }, o.dtype)
54-
OR empty({dtypes: Array(String) })
55-
)
56-
AND (
57-
NOT has({excludedMetrics: Array(String) }, o.metric)
58-
OR empty({excludedMetrics: Array(String) })
59-
)
60-
AND notEmpty(o.metric)
61-
AND notEmpty(o.dtype)
62-
AND notEmpty(o.device)
63-
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
90+
AND notEmpty(device)
6491
ORDER BY
6592
granularity_bucket DESC,
6693
workflow_id DESC,

torchci/clickhouse_queries/oss_ci_benchmark_names/params.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"deviceArch": "String",
33
"dtypes": "Array(String)",
44
"excludedMetrics": "Array(String)",
5-
"filenames": "Array(String)",
6-
"names": "Array(String)",
5+
"benchmarks": "Array(String)",
6+
"models": "Array(String)",
77
"repo": "String",
88
"startTime": "DateTime64(3)",
99
"stopTime": "DateTime64(3)"
Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,70 @@
11
--- This query is used by HUD benchmarks dashboards to get the list of experiment names
2+
WITH benchmarks AS (
3+
SELECT
4+
o.benchmark.name AS benchmark,
5+
o.model.name AS model,
6+
o.model.backend AS backend,
7+
o.metric.name AS metric,
8+
o.benchmark.dtype AS dtype,
9+
IF(
10+
empty(o.runners),
11+
tupleElement(o.benchmark, 'extra_info') [ 'device' ],
12+
tupleElement(o.runners [ 1 ], 'name')
13+
) AS device,
14+
IF(
15+
empty(o.runners),
16+
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
17+
tupleElement(o.runners [ 1 ], 'type')
18+
) AS arch
19+
FROM
20+
benchmark.oss_ci_benchmark_v3 o
21+
WHERE
22+
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
23+
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
24+
AND o.repo = {repo: String }
25+
AND (
26+
has({benchmarks: Array(String) }, o.benchmark.name)
27+
OR empty({benchmarks: Array(String) })
28+
)
29+
AND (
30+
has({models: Array(String) }, o.model.name)
31+
OR empty({models: Array(String) })
32+
)
33+
AND (
34+
has({dtypes: Array(String) }, o.benchmark.dtype)
35+
OR empty({dtypes: Array(String) })
36+
)
37+
AND (
38+
NOT has({excludedMetrics: Array(String) }, o.metric.name)
39+
OR empty({excludedMetrics: Array(String) })
40+
)
41+
AND notEmpty(o.metric.name)
42+
AND notEmpty(o.benchmark.dtype)
43+
)
244
SELECT
3-
DISTINCT o.filename AS filename,
4-
o.name,
5-
o.metric,
6-
o.dtype,
7-
o.device,
8-
-- NB: Default to NVIDIA A100-SXM4-40GB for old records without arch column
9-
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch) AS arch
45+
DISTINCT benchmark,
46+
CONCAT(model, ' ', backend) AS name,
47+
metric,
48+
dtype,
49+
device,
50+
arch
1051
FROM
11-
benchmark.oss_ci_benchmark_v2 o
12-
LEFT JOIN default .workflow_run w FINAL ON o.workflow_id = w.id
52+
benchmarks
1353
WHERE
14-
o.timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
15-
AND o.timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
16-
AND (
17-
has({filenames: Array(String) }, o.filename)
18-
OR empty({filenames: Array(String) })
19-
)
20-
AND (
21-
has({names: Array(String) }, o.name)
22-
OR empty({names: Array(String) })
23-
)
2454
-- NB: DEVICE (ARCH) is the display format used by HUD when grouping together these two fields
25-
AND (
55+
(
2656
CONCAT(
27-
o.device,
57+
device,
2858
' (',
29-
IF(empty(o.arch), 'NVIDIA A100-SXM4-40GB', o.arch),
59+
IF(empty(arch), 'NVIDIA A100-SXM4-40GB', arch),
3060
')'
3161
) = {deviceArch: String }
3262
OR {deviceArch: String } = ''
3363
)
34-
AND (
35-
has({dtypes: Array(String) }, o.dtype)
36-
OR empty({dtypes: Array(String) })
37-
)
38-
AND (
39-
NOT has({excludedMetrics: Array(String) }, o.metric)
40-
OR empty({excludedMetrics: Array(String) })
41-
)
42-
AND notEmpty(o.metric)
43-
AND w.html_url LIKE CONCAT('%', {repo: String }, '%')
44-
AND notEmpty(o.dtype)
45-
AND notEmpty(o.device)
64+
AND notEmpty(device)
4665
ORDER BY
47-
o.filename,
48-
o.name,
49-
o.metric,
50-
o.dtype,
51-
o.device
66+
benchmark,
67+
name,
68+
metric,
69+
dtype,
70+
device

torchci/components/benchmark/llms/common.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import { BranchAndCommit } from "lib/types";
22

33
export const REPOS = ["pytorch/pytorch", "pytorch/executorch"];
44
export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
5-
"pytorch/pytorch": ["gpt_fast_benchmark"],
6-
"pytorch/executorch": ["android-perf", "apple-perf"],
5+
"pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
6+
"pytorch/executorch": ["ExecuTorch"],
77
};
88
export const EXCLUDED_METRICS: string[] = ["load_status"];
99
export const DEFAULT_MODEL_NAME = "All Models";

0 commit comments

Comments
 (0)