Skip to content

Commit d575730

Browse files
authored
[Device ID] visualize benchmark graph with group_key (#6593)
add group_key to combine (deviceId, and display name) if any for dp visualizaition rename additional_info to metadata_info --------- Signed-off-by: Yang Wang <[email protected]>
1 parent 6762d61 commit d575730

File tree

4 files changed

+40
-13
lines changed

4 files changed

+40
-13
lines changed

torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,19 @@ WITH benchmarks AS (
6363
IF(
6464
tupleElement(o.benchmark, 'extra_info')['failure_type'] = '',
6565
'',
66-
-- Default to false
66+
-- Default to empty
6767
tupleElement(o.benchmark, 'extra_info')['failure_type']
68-
)
69-
) AS addtional_info -- additional_info for a record
68+
),
69+
'device_id',
70+
IF(
71+
tupleElement(o.benchmark, 'extra_info')['instance_arn'] = '',
72+
'',
73+
-- Default to empty
74+
tupleElement(o.benchmark, 'extra_info')['instance_arn']
75+
),
76+
'timestamp',
77+
formatDateTime(fromUnixTimestamp(o.timestamp), '%Y-%m-%dT%H:%i:%sZ')
78+
) AS metadata_info -- metadata_info for a record
7079
FROM
7180
benchmark.oss_ci_benchmark_v3 o
7281
WHERE
@@ -119,7 +128,7 @@ SELECT DISTINCT
119128
arch,
120129
granularity_bucket,
121130
extra,
122-
addtional_info
131+
metadata_info
123132
FROM
124133
benchmarks
125134
WHERE

torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
Tooltip,
1616
Typography,
1717
} from "@mui/material";
18+
import { deepClone } from "@mui/x-data-grid/internals";
1819
import {
1920
COMMIT_TO_WORKFLOW_ID,
2021
WORKFLOW_ID_TO_COMMIT,
@@ -104,10 +105,11 @@ export default function LLMsGraphPanel({
104105
const lWorkflowId = COMMIT_TO_WORKFLOW_ID[lBranchAndCommit.commit];
105106
const rWorkflowId = COMMIT_TO_WORKFLOW_ID[rBranchAndCommit.commit];
106107

107-
const groupByFieldName = "display";
108+
const groupByFieldName = "group_key";
108109

109110
const chartData: { [k: string]: any } = {};
110111
const graphSeries: { [k: string]: any } = {};
112+
111113
metricNames.forEach((metric: string) => {
112114
if (
113115
modelName === DEFAULT_MODEL_NAME &&
@@ -210,9 +212,10 @@ export default function LLMsGraphPanel({
210212

211213
return record;
212214
});
213-
215+
const graphItems = formGraphItem(chartData[metric]);
216+
// group by timestamp to identify devices with the same timestamp
214217
graphSeries[metric] = seriesWithInterpolatedTimes(
215-
chartData[metric],
218+
graphItems,
216219
startTime,
217220
stopTime,
218221
granularity,
@@ -371,7 +374,7 @@ const MetricTable = ({
371374
return (
372375
<TableRow key={index}>
373376
<TableCell>
374-
<span>{entry.granularity_bucket} </span>
377+
<span>{entry?.metadata_info.timestamp} </span>
375378
</TableCell>
376379
<TableCell sx={{ py: 0.25 }}>
377380
<code>
@@ -408,3 +411,18 @@ const MetricTable = ({
408411
</TableContainer>
409412
);
410413
};
414+
415+
// creates chart items to visualize in the series graph, group by device name and display name
416+
function formGraphItem(data: any[]) {
417+
const res: any[] = [];
418+
data.forEach((item) => {
419+
const deviceId = item.metadata_info.device_id;
420+
const displayName = item.display;
421+
const group_key =
422+
deviceId !== "" ? `${displayName} (${deviceId})` : displayName;
423+
const seriesData = deepClone(item);
424+
seriesData.group_key = group_key;
425+
res.push(seriesData);
426+
});
427+
return res;
428+
}

torchci/lib/benchmark/llms/common.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ export interface LLMsBenchmarkData {
118118
arch: string;
119119
display?: string;
120120
extra?: { [key: string]: string };
121-
additional_info?: { [key: string]: string };
121+
metadata_info?: { [key: string]: string };
122122
}
123123

124124
export interface BranchAndCommitPerfData extends BranchAndCommit {

torchci/lib/benchmark/llms/utils/llmUtils.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -401,12 +401,12 @@ const processJobLevelFailureRows = (
401401
if ("FAILURE_REPORT" in record) {
402402
const failure_record = record["FAILURE_REPORT"];
403403
const hasrFailure =
404-
"r" in failure_record && failure_record["r"].additional_info
405-
? failure_record["r"].additional_info["failure_type"] === "GIT_JOB"
404+
"r" in failure_record && failure_record["r"].metadata_info
405+
? failure_record["r"].metadata_info["failure_type"] === "GIT_JOB"
406406
: false;
407407
const haslFailure =
408-
"l" in failure_record && failure_record["l"].additional_info
409-
? failure_record["l"].additional_info["failure_type"] === "GIT_JOB"
408+
"l" in failure_record && failure_record["l"].metadata_info
409+
? failure_record["l"].metadata_info["failure_type"] === "GIT_JOB"
410410
: false;
411411
isJobLevelFailure = hasrFailure || haslFailure;
412412
}

0 commit comments

Comments
 (0)