Skip to content

Commit ca303db

Browse files
authored
Add TorchAO aggregated geomean speedup metric (#6119)
This is the follow-up of #6118 to add an aggregated geomean speedup metric for all models grouped by devices. I limit this change to TorchAO `speedup` metric for now until I have time to polish the rest of the metrics (or if there is a need to add them at all) ### Testing https://torchci-git-fork-huydhn-add-benchmark-summary-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fao
1 parent d407580 commit ca303db

File tree

5 files changed

+163
-91
lines changed

5 files changed

+163
-91
lines changed

torchci/components/benchmark/llms/ModelGraphPanel.tsx

Lines changed: 117 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import {
1919
} from "components/metrics/panels/TimeSeriesPanel";
2020
import dayjs from "dayjs";
2121
import { computeSpeedup } from "lib/benchmark/aoUtils";
22-
import { useBenchmark } from "lib/benchmark/llmUtils";
22+
import { computeGeomean, useBenchmark } from "lib/benchmark/llmUtils";
2323
import { BranchAndCommit } from "lib/types";
2424

2525
const GRAPH_ROW_HEIGHT = 245;
@@ -64,10 +64,6 @@ export function GraphPanel({
6464
);
6565
}
6666

67-
if (modelName === DEFAULT_MODEL_NAME) {
68-
return <></>;
69-
}
70-
7167
const dataWithSpeedup = computeSpeedup(repoName, data);
7268

7369
// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
@@ -84,39 +80,67 @@ export function GraphPanel({
8480
const chartData: { [k: string]: any } = {};
8581
const graphSeries: { [k: string]: any } = {};
8682
metricNames.forEach((metric: string) => {
87-
chartData[metric] = dataWithSpeedup
88-
.filter((record: LLMsBenchmarkData) => {
89-
return (
90-
record.model === modelName &&
91-
(record.dtype === dtypeName || dtypeName === DEFAULT_DTYPE_NAME) &&
92-
(`${record.device} (${record.arch})` === deviceName ||
93-
deviceName === DEFAULT_DEVICE_NAME) &&
94-
record.metric === metric
95-
);
96-
})
97-
.filter((record: LLMsBenchmarkData) => {
98-
const id = record.workflow_id;
99-
return (
100-
(id >= lWorkflowId && id <= rWorkflowId) ||
101-
(id <= lWorkflowId && id >= rWorkflowId) ||
102-
(lWorkflowId === undefined && rWorkflowId === undefined)
103-
);
104-
})
105-
.map((record: LLMsBenchmarkData) => {
106-
const model = record.model;
107-
const dtype = record.dtype;
108-
const device = record.device;
83+
// TODO (huydhn): Only display aggregated speedup metric for now
84+
if (modelName === DEFAULT_MODEL_NAME && metric !== "speedup") {
85+
chartData[metric] = [];
86+
return;
87+
}
88+
89+
const geomean = computeGeomean(dataWithSpeedup, metric);
90+
chartData[metric] =
91+
modelName === DEFAULT_MODEL_NAME
92+
? geomean
93+
.filter((record: LLMsBenchmarkData) => {
94+
const id = record.workflow_id;
95+
return (
96+
(id >= lWorkflowId && id <= rWorkflowId) ||
97+
(id <= lWorkflowId && id >= rWorkflowId) ||
98+
(lWorkflowId === undefined && rWorkflowId === undefined) ||
99+
// This is a hack to handle the mock workflow ID coming from running TorchAO benchmark locally
100+
// In such caase, the workflow ID is actually the epoch timestamp and the value is completely
101+
// different than the regular GitHub workflow ID
102+
0.5 > rWorkflowId / lWorkflowId ||
103+
rWorkflowId / lWorkflowId > 2
104+
);
105+
})
106+
.map((record: LLMsBenchmarkData) => {
107+
record.display = `${record.device} (${record.arch})`;
108+
return record;
109+
})
110+
: dataWithSpeedup
111+
.filter((record: LLMsBenchmarkData) => {
112+
return (
113+
record.model === modelName &&
114+
(record.dtype === dtypeName ||
115+
dtypeName === DEFAULT_DTYPE_NAME) &&
116+
(`${record.device} (${record.arch})` === deviceName ||
117+
deviceName === DEFAULT_DEVICE_NAME) &&
118+
record.metric === metric
119+
);
120+
})
121+
.filter((record: LLMsBenchmarkData) => {
122+
const id = record.workflow_id;
123+
return (
124+
(id >= lWorkflowId && id <= rWorkflowId) ||
125+
(id <= lWorkflowId && id >= rWorkflowId) ||
126+
(lWorkflowId === undefined && rWorkflowId === undefined)
127+
);
128+
})
129+
.map((record: LLMsBenchmarkData) => {
130+
const model = record.model;
131+
const dtype = record.dtype;
132+
const device = record.device;
109133

110-
record.display = model.includes(dtype)
111-
? model.includes(device)
112-
? model
113-
: `${model} (${device})`
114-
: model.includes(device)
115-
? `${model} (${dtype})`
116-
: `${model} (${dtype} / ${device})`;
134+
record.display = model.includes(dtype)
135+
? model.includes(device)
136+
? model
137+
: `${model} (${device})`
138+
: model.includes(device)
139+
? `${model} (${dtype})`
140+
: `${model} (${dtype} / ${device})`;
117141

118-
return record;
119-
});
142+
return record;
143+
});
120144

121145
graphSeries[metric] = seriesWithInterpolatedTimes(
122146
chartData[metric],
@@ -141,7 +165,13 @@ export function GraphPanel({
141165
{metricNames
142166
.filter((metric) => chartData[metric].length !== 0)
143167
.map((metric: string) => (
144-
<Grid item xs={12} lg={4} height={GRAPH_ROW_HEIGHT} key={metric}>
168+
<Grid
169+
item
170+
xs={12}
171+
lg={modelName === DEFAULT_MODEL_NAME ? 12 : 4}
172+
height={GRAPH_ROW_HEIGHT}
173+
key={metric}
174+
>
145175
<TimeSeriesPanelWithData
146176
data={chartData[metric]}
147177
series={graphSeries[metric]}
@@ -169,54 +199,56 @@ export function GraphPanel({
169199
))}
170200
</Grid>
171201
</div>
172-
<div>
173-
<table>
174-
<thead>
175-
<tr>
176-
<th>Date</th>
177-
<th>Commit</th>
178-
{metricNames.map((metric: string) => (
179-
<th key={metric}>
180-
{chartData[metric].length !== 0
181-
? metric in METRIC_DISPLAY_SHORT_HEADERS
182-
? METRIC_DISPLAY_SHORT_HEADERS[metric]
183-
: metric
184-
: ""}
185-
</th>
186-
))}
187-
</tr>
188-
</thead>
189-
<tbody>
190-
{chartData[availableMetric].map((entry: any, index: number) => {
191-
let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
192-
return (
193-
<tr key={index}>
194-
<td>{entry.granularity_bucket}</td>
195-
<td>
196-
<code>
197-
<a
198-
onClick={() => navigator.clipboard.writeText(commit)}
199-
className="animate-on-click"
200-
>
201-
{commit}
202-
</a>
203-
</code>
204-
</td>
205-
{metricNames
206-
.filter((metric) => chartData[metric].length !== 0)
207-
.map((metric: string) => (
208-
<td key={`${metric}-${index}`}>
209-
{chartData[metric][index] !== undefined
210-
? chartData[metric][index].actual
211-
: ""}
212-
</td>
213-
))}
214-
</tr>
215-
);
216-
})}
217-
</tbody>
218-
</table>
219-
</div>
202+
{modelName !== DEFAULT_MODEL_NAME && (
203+
<div>
204+
<table>
205+
<thead>
206+
<tr>
207+
<th>Date</th>
208+
<th>Commit</th>
209+
{metricNames.map((metric: string) => (
210+
<th key={metric}>
211+
{chartData[metric].length !== 0
212+
? metric in METRIC_DISPLAY_SHORT_HEADERS
213+
? METRIC_DISPLAY_SHORT_HEADERS[metric]
214+
: metric
215+
: ""}
216+
</th>
217+
))}
218+
</tr>
219+
</thead>
220+
<tbody>
221+
{chartData[availableMetric].map((entry: any, index: number) => {
222+
let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
223+
return (
224+
<tr key={index}>
225+
<td>{entry.granularity_bucket}</td>
226+
<td>
227+
<code>
228+
<a
229+
onClick={() => navigator.clipboard.writeText(commit)}
230+
className="animate-on-click"
231+
>
232+
{commit}
233+
</a>
234+
</code>
235+
</td>
236+
{metricNames
237+
.filter((metric) => chartData[metric].length !== 0)
238+
.map((metric: string) => (
239+
<td key={`${metric}-${index}`}>
240+
{chartData[metric][index] !== undefined
241+
? chartData[metric][index].actual
242+
: ""}
243+
</td>
244+
))}
245+
</tr>
246+
);
247+
})}
248+
</tbody>
249+
</table>
250+
</div>
251+
)}
220252
</>
221253
);
222254
}

torchci/components/benchmark/llms/SummaryPanel.tsx

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,9 @@ export function SummaryPanel({
8383
model
8484
)}${backend}${dtype}&deviceName=${encodeURIComponent(deviceArch)}`;
8585

86-
const isNewModel = params.value.l === undefined ? "(NEW!) " : "";
87-
const isModelStopRunning = params.value.r === undefined ? "❌" : "";
88-
8986
return (
9087
<a href={url}>
91-
{isNewModel}
92-
{isModelStopRunning}&nbsp;<b>{model}</b>
88+
<b>{model}</b>
9389
</a>
9490
);
9591
},

torchci/components/benchmark/llms/common.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
1414
token_per_sec: "Token per second",
1515
flops_utilization: "FLOPs utilization",
1616
"compilation_time(s)": "Compilation Time (s)",
17+
speedup: "Speedup",
1718
};
1819
// The variable name is a bit dumb, but it tells if a higher metric value
1920
// is good or bad so that we can highlight it on the dashboard accordingly.

torchci/lib/benchmark/llmUtils.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {
22
BranchAndCommitPerfData,
33
LLMsBenchmarkData,
44
} from "components/benchmark/llms/common";
5+
import { geomean } from "lib/benchmark/compilerUtils";
56
import { fetcher } from "lib/GeneralUtils";
67
import { BranchAndCommit } from "lib/types";
78
import useSWR from "swr";
@@ -158,3 +159,45 @@ export function combineLeftAndRight(
158159

159160
return data;
160161
}
162+
163+
export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
164+
const metricValues: { [key: string]: number[] } = {};
165+
const returnedGeomean: LLMsBenchmarkData[] = [];
166+
167+
data.forEach((r: LLMsBenchmarkData) => {
168+
if (r.metric !== metricName) {
169+
return;
170+
}
171+
172+
const k = `${r.granularity_bucket}+${r.workflow_id}+${r.job_id}+${r.backend}+${r.dtype}+${r.device}+${r.arch}+${r.metric}`;
173+
if (!(k in metricValues)) {
174+
metricValues[k] = [];
175+
}
176+
177+
if (r.actual !== 0) {
178+
metricValues[k].push(r.actual);
179+
}
180+
});
181+
182+
Object.keys(metricValues).forEach((k: string) => {
183+
const gm = geomean(metricValues[k]);
184+
185+
const [bucket, workflowId, jobId, backend, dtype, device, arch, metric] =
186+
k.split("+");
187+
returnedGeomean.push({
188+
granularity_bucket: bucket,
189+
model: "",
190+
backend: backend,
191+
workflow_id: Number(workflowId),
192+
job_id: Number(jobId),
193+
metric: `${metric} (geomean)`,
194+
actual: Number(gm),
195+
target: 0,
196+
dtype: dtype,
197+
device: device,
198+
arch: arch,
199+
});
200+
});
201+
202+
return returnedGeomean;
203+
}

torchci/pages/benchmark/llms.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ export default function Page() {
152152
const defaultStopTime = dayjs();
153153
const [stopTime, setStopTime] = useState(defaultStopTime);
154154
const [timeRange, setTimeRange] = useState<number>(LAST_N_DAYS);
155-
const [granularity, setGranularity] = useState<Granularity>("hour");
155+
const [granularity, setGranularity] = useState<Granularity>("day");
156156
const [lBranch, setLBranch] = useState<string>(MAIN_BRANCH);
157157
const [lCommit, setLCommit] = useState<string>("");
158158
const [rBranch, setRBranch] = useState<string>(MAIN_BRANCH);

0 commit comments

Comments
 (0)