Skip to content

Commit 9490568

Browse files
authored
feat: show statpopgen benchmarks (#4359)
The diff is huge because I deduplicated a bunch of code between the web worker and non-web-worker implementation. Signed-off-by: Daniel King <[email protected]>
1 parent 99a4d87 commit 9490568

File tree

7 files changed

+407
-640
lines changed

7 files changed

+407
-640
lines changed

benchmarks-website/config.js

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ export const BENCHMARK_DESCRIPTIONS = {
7676
"ClickHouse's analytical benchmark suite testing real-world query patterns on web analytics data, run against NVMe storage",
7777
"TPC-DS (NVMe)":
7878
"TPC-DS benchmark queries executed on local NVMe storage, testing complex analytical query performance with a retail sales dataset",
79+
"Statistical and Population Genetics":`A suite of Statistical and Population genetics queries executed on local NVMe storage.
80+
81+
A custom benchmark for statistical and population genetics workloads using the gnomAD v3.1.2 release of the jointly called One Thousand Genomes (1kG) and Human Genome Diversity Project (HGDP) dataset (1kG+HGDP). Only a prefix of Chromosome 21 is used for benchmarking.
82+
83+
Data source: <https://gnomad.broadinstitute.org/>.`,
7984
};
8085

8186
// Category tags mapping
@@ -94,6 +99,7 @@ export const CATEGORY_TAGS = {
9499
"TPC-H (S3) (SF=1000)": ["Queries (S3)", "TPC-H (SF=1000)"],
95100
"TPC-DS (NVMe) (SF=1)": ["Queries (NVMe)", "TPC-DS (SF=1)"],
96101
"TPC-DS (NVMe) (SF=10)": ["Queries (NVMe)", "TPC-DS (SF=10)"],
102+
"Statistical and Population Genetics": ["Queries (NVMe)", "StatPopGen"],
97103
};
98104

99105
// Scale factor descriptions
@@ -106,8 +112,30 @@ export const SCALE_FACTOR_DESCRIPTIONS = {
106112

107113
// Query name transformations
108114
export const QUERY_NAME_MAP = {
115+
"TPCH Q1": "TPC-H Q1",
116+
"TPCH Q2": "TPC-H Q2",
117+
"TPCH Q3": "TPC-H Q3",
118+
"TPCH Q4": "TPC-H Q4",
119+
"TPCH Q5": "TPC-H Q5",
120+
"TPCH Q6": "TPC-H Q6",
121+
"TPCH Q7": "TPC-H Q7",
122+
"TPCH Q8": "TPC-H Q8",
123+
"TPCH Q9": "TPC-H Q9",
124+
"TPCH Q10": "TPC-H Q10",
125+
"TPCH Q11": "TPC-H Q11",
126+
"TPCH Q12": "TPC-H Q12",
127+
"TPCH Q13": "TPC-H Q13",
128+
"TPCH Q14": "TPC-H Q14",
129+
"TPCH Q15": "TPC-H Q15",
130+
"TPCH Q16": "TPC-H Q16",
131+
"TPCH Q17": "TPC-H Q17",
132+
"TPCH Q18": "TPC-H Q18",
133+
"TPCH Q19": "TPC-H Q19",
134+
"TPCH Q20": "TPC-H Q20",
135+
"TPCH Q21": "TPC-H Q21",
136+
"TPCH Q22": "TPC-H Q22",
109137
"VORTEX:RAW SIZE": "VORTEX COMPRESSION RATIO",
110-
"VORTEX:PARQUET-ZSTD SIZE": "VORTEX:PARQUET-ZSTD SIZE RATIO",
138+
"VORTEX:PARQUET-ZSTD SIZE": "VORTEX:PARQUET-ZSTD SIZE RATIO"
111139
};
112140

113141
// Engine labels
@@ -135,4 +163,5 @@ export const BENCHMARK_GROUPS = [
135163
"TPC-H (S3) (SF=1000)",
136164
"TPC-DS (NVMe) (SF=1)",
137165
"TPC-DS (NVMe) (SF=10)",
138-
];
166+
"Statistical and Population Genetics"
167+
];
Lines changed: 17 additions & 280 deletions
Original file line numberDiff line numberDiff line change
@@ -1,155 +1,27 @@
11
"use strict";
22

3-
import { BENCHMARK_GROUPS, QUERY_NAME_MAP } from './config.js';
3+
import { shared } from './data-shared.js';
4+
import { BENCHMARK_GROUPS } from './config.js';
45

56
// Data processing module
67
export const dataProcessor = {
7-
parseCommits(commitMetadata) {
8-
const commits = [];
9-
Object.values(commitMetadata)
10-
.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp))
11-
.forEach((commit, index) => {
12-
commit.sortedIndex = index;
13-
commits.push(commit);
14-
});
15-
return commits;
16-
},
17-
18-
createMissingCommit(commitId) {
19-
return {
20-
author: { email: "[email protected]", name: "Dan King" },
21-
committer: { email: "[email protected]", name: "GitHub" },
22-
id: commitId,
23-
message: "!! This commit is missing from commits.json !!",
24-
timestamp: "1970-01-01T00:00:00Z",
25-
tree_id: null,
26-
url: `https://github.com/vortex-data/vortex/commit/${commitId}`,
27-
};
28-
},
8+
parseCommits: shared.parseCommits,
299

30-
determineGroupId(benchmark) {
31-
const { name, dataset, storage } = benchmark;
10+
createMissingCommit: shared.createMissingCommit,
3211

33-
if (dataset?.tpch) {
34-
const scaleFactor = dataset.tpch.scale_factor;
35-
const isNvme = storage === undefined || storage === "nvme";
36-
return this.getTpchGroupId(scaleFactor, isNvme);
37-
}
12+
determineGroupId: shared.determineGroupId,
3813

39-
if (dataset?.tpcds) {
40-
const scaleFactor = dataset.tpcds.scale_factor;
41-
const isNvme = storage === undefined || storage === "nvme";
42-
return this.getTpcdsGroupId(scaleFactor, isNvme);
43-
}
44-
45-
if (dataset?.clickbench) return "Clickbench";
46-
if (name.startsWith("random-access/")) return "Random Access";
47-
if (name.includes("compress time/")) return "Compression";
48-
if (name.startsWith("vortex size/")) return "Compression Size";
49-
if (
50-
name.startsWith("vortex:raw size/") ||
51-
name.startsWith("vortex:parquet-zstd size/")
52-
) {
53-
return "Compression Size";
54-
}
55-
if (name.startsWith("tpch_q")) {
56-
const isNvme = storage === undefined || storage === "nvme";
57-
return isNvme ? "TPC-H (NVMe) (SF=1)" : "TPC-H (S3) (SF=1)";
58-
}
59-
if (name.startsWith("tpcds_q")) {
60-
const isNvme = storage === undefined || storage === "nvme";
61-
return isNvme ? "TPC-DS (NVMe) (SF=1)" : "TPC-DS (S3) (SF=1)";
62-
}
63-
if (name.startsWith("clickbench")) return "Clickbench";
14+
getTpchGroupId: shared.getTpchGroupId,
6415

65-
return null;
66-
},
16+
getTpcdsGroupId: shared.getTpcdsGroupId,
6717

68-
getTpchGroupId(scaleFactor, isNvme) {
69-
const sf = Number(scaleFactor);
70-
const storage = isNvme ? "NVMe" : "S3";
18+
normalizeSeriesName: shared.normalizeSeriesName,
7119

72-
switch (sf) {
73-
case 1:
74-
return `TPC-H (${storage}) (SF=1)`;
75-
case 10:
76-
return `TPC-H (${storage}) (SF=10)`;
77-
case 100:
78-
return `TPC-H (${storage}) (SF=100)`;
79-
case 1000:
80-
return `TPC-H (${storage}) (SF=1000)`;
81-
default:
82-
console.warn("Unknown scale factor:", scaleFactor);
83-
return null;
84-
}
85-
},
20+
formatQueryName: shared.formatQueryName,
8621

87-
getTpcdsGroupId(scaleFactor, isNvme) {
88-
const sf = Number(scaleFactor);
89-
const storage = isNvme ? "NVMe" : "S3";
22+
convertValue: shared.convertValue,
9023

91-
switch (sf) {
92-
case 1:
93-
return `TPC-DS (${storage}) (SF=1)`;
94-
case 10:
95-
return `TPC-DS (${storage}) (SF=10)`;
96-
case 100:
97-
return `TPC-DS (${storage}) (SF=100)`;
98-
case 1000:
99-
return `TPC-DS (${storage}) (SF=1000)`;
100-
default:
101-
console.warn("Unknown scale factor:", scaleFactor);
102-
return null;
103-
}
104-
},
105-
106-
normalizeSeriesName(name, seriesName) {
107-
let normalizedName = seriesName;
108-
let normalizedQuery = name;
109-
110-
if (
111-
seriesName.endsWith(" throughput") ||
112-
seriesName.endsWith("throughput")
113-
) {
114-
const suffix = seriesName.endsWith(" throughput")
115-
? " throughput"
116-
: "throughput";
117-
normalizedName = seriesName.slice(0, seriesName.length - suffix.length);
118-
normalizedQuery = name.replace("time", "throughput");
119-
}
120-
121-
return { name: normalizedQuery, seriesName: normalizedName };
122-
},
123-
124-
formatQueryName(query) {
125-
let prettyQ = query.replace(/_/g, " ").toUpperCase();
126-
prettyQ = QUERY_NAME_MAP[prettyQ] || prettyQ;
127-
prettyQ = prettyQ.replace(/^TPCH\s/, "TPC-H ");
128-
prettyQ = prettyQ.replace(/^TPCDS\s/, "TPC-DS ");
129-
return prettyQ;
130-
},
131-
132-
convertValue(value, unit) {
133-
const isNanos = unit === "ns/iter" || unit === "ns";
134-
const isBytes = unit === "bytes";
135-
const isThroughput = unit === "bytes/ns";
136-
137-
if (isNanos) return value / 1_000_000;
138-
if (isBytes) return value / 1_048_576;
139-
if (isThroughput) return (value * 1_000_000_000) / 1_048_576;
140-
return value;
141-
},
142-
143-
getUnit(unit) {
144-
const isNanos = unit === "ns/iter" || unit === "ns";
145-
const isBytes = unit === "bytes";
146-
const isThroughput = unit === "bytes/ns";
147-
148-
if (isNanos) return "ms/iter";
149-
if (isBytes) return "MiB";
150-
if (isThroughput) return "MiB/s";
151-
return unit;
152-
},
24+
getUnit: shared.getUnit,
15325

15426
downloadAndGroupData(data, commitMetadata, seriesRenameFn) {
15527
const commits = this.parseCommits(commitMetadata);
@@ -190,148 +62,13 @@ export const dataProcessor = {
19062
}));
19163
},
19264

193-
initializeGroups() {
194-
const groups = {};
195-
BENCHMARK_GROUPS.forEach((name) => {
196-
groups[name] = new Map();
197-
});
198-
return groups;
199-
},
200-
201-
processBenchmark(
202-
benchmark,
203-
commitMetadata,
204-
commits,
205-
groups,
206-
seriesRenameFn,
207-
missingCommits,
208-
uncategorizableNames
209-
) {
210-
// Ensure commit metadata
211-
if (!benchmark.commit) {
212-
benchmark.commit = commitMetadata[benchmark.commit_id];
213-
if (!benchmark.commit) {
214-
missingCommits.add(benchmark.commit_id);
215-
benchmark.commit = commitMetadata[benchmark.commit_id] =
216-
this.createMissingCommit(benchmark.commit_id);
217-
}
218-
}
219-
220-
// Determine group
221-
const groupId = this.determineGroupId(benchmark);
222-
if (!groupId) {
223-
uncategorizableNames.add(benchmark.name);
224-
return;
225-
}
226-
227-
const group = groups[groupId];
228-
if (!group) {
229-
console.warn("Cannot find group element in group:", groupId);
230-
return;
231-
}
232-
233-
// Process benchmark data
234-
let [query, seriesName] = benchmark.name.split("/");
235-
const normalized = this.normalizeSeriesName(query, seriesName);
236-
query = normalized.name;
237-
seriesName = normalized.seriesName;
238-
239-
// Apply series renaming
240-
seriesName = this.applySeriesRenaming(
241-
seriesName,
242-
groupId,
243-
seriesRenameFn
244-
);
245-
246-
// Format query name
247-
const prettyQ = this.formatQueryName(query);
248-
if (prettyQ.includes("PARQUET-UNC")) return;
249-
250-
// Set units
251-
let unit = benchmark.unit;
252-
if (!unit && benchmark.name.startsWith("vortex size/")) {
253-
unit = "bytes";
254-
} else if (
255-
!unit &&
256-
(benchmark.name.startsWith("vortex:raw size/") ||
257-
benchmark.name.startsWith("vortex:parquet-zstd size/"))
258-
) {
259-
unit = "ratio";
260-
}
261-
262-
// Calculate sort position
263-
const sortPosition =
264-
query.slice(0, 4) === "tpch" || query.slice(0, 5) === "tpcds"
265-
? parseInt(prettyQ.split(" ")[1].substring(1), 10)
266-
: 0;
65+
initializeGroups: shared.initializeGroups,
26766

268-
// Add to group
269-
this.addToGroup(
270-
group,
271-
prettyQ,
272-
seriesName,
273-
benchmark,
274-
unit,
275-
sortPosition,
276-
commits
277-
);
278-
},
67+
processBenchmark: shared.processBenchmark,
27968

280-
applySeriesRenaming(seriesName, groupId, seriesRenameFn) {
281-
if (!seriesRenameFn) return seriesName;
69+
applySeriesRenaming: shared.applySeriesRenaming,
28270

283-
const renamer = seriesRenameFn.find(([name]) => name === groupId);
284-
if (renamer?.[1]?.renamedDatasets) {
285-
const renameDict = renamer[1].renamedDatasets;
286-
return renameDict[seriesName] || seriesName;
287-
}
288-
return seriesName;
289-
},
71+
addToGroup: shared.addToGroup,
29072

291-
addToGroup(
292-
group,
293-
queryName,
294-
seriesName,
295-
benchmark,
296-
unit,
297-
sortPosition,
298-
commits
299-
) {
300-
let arr = group.get(queryName);
301-
if (!arr) {
302-
group.set(queryName, {
303-
sort_position: sortPosition,
304-
commits,
305-
unit: this.getUnit(unit),
306-
series: new Map(),
307-
});
308-
arr = group.get(queryName);
309-
}
310-
311-
let series = arr.series.get(seriesName);
312-
if (!series) {
313-
arr.series.set(seriesName, new Array(commits.length).fill(null));
314-
series = arr.series.get(seriesName);
315-
}
316-
317-
series[benchmark.commit.sortedIndex] = {
318-
range: "this was the range",
319-
value: this.convertValue(benchmark.value, unit),
320-
};
321-
},
322-
323-
sortGroups(groups) {
324-
const sortByPositionThenName = (a, b) => {
325-
const positionCompare = a[1].sort_position - b[1].sort_position;
326-
return positionCompare !== 0
327-
? positionCompare
328-
: a[0].localeCompare(b[0]);
329-
};
330-
331-
Object.entries(groups).forEach(([name, charts]) => {
332-
groups[name] = new Map(
333-
[...charts.entries()].sort(sortByPositionThenName)
334-
);
335-
});
336-
},
337-
};
73+
sortGroups: shared.sortGroups,
74+
};

0 commit comments

Comments
 (0)