Skip to content

Commit 3262025

Browse files
authored
Add Trino to CDK benchmarks (#244)
* Add trino to CDK * Fix trino deployments * Fix trino commands * Add Trino benchmark scripts * Remove unused shuffle-task-batch-size * Fix view creation * Add warmups * Use explain analyze in Trino for not pulling rows
1 parent 99ab70b commit 3262025

File tree

6 files changed

+660
-201
lines changed

6 files changed

+660
-201
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import path from "path";
2+
import fs from "fs/promises";
3+
import { z } from 'zod';
4+
5+
export const ROOT = path.join(__dirname, '../../..')
6+
7+
// Simple data structures
8+
export type QueryResult = {
9+
query: string;
10+
iterations: { elapsed: number; row_count: number }[];
11+
}
12+
13+
export type BenchmarkResults = {
14+
queries: QueryResult[];
15+
}
16+
17+
export const BenchmarkResults = z.object({
18+
queries: z.array(z.object({
19+
query: z.string(),
20+
iterations: z.array(z.object({
21+
elapsed: z.number(),
22+
row_count: z.number()
23+
}))
24+
}))
25+
})
26+
27+
export const IDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
28+
29+
export async function writeJson(results: BenchmarkResults, outputPath?: string) {
30+
if (!outputPath) return;
31+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
32+
await fs.writeFile(outputPath, JSON.stringify(results, null, 2));
33+
}
34+
35+
export async function compareWithPrevious(results: BenchmarkResults, outputPath: string) {
36+
let prevResults: BenchmarkResults;
37+
try {
38+
const prevContent = await fs.readFile(outputPath, 'utf-8');
39+
prevResults = BenchmarkResults.parse(JSON.parse(prevContent));
40+
} catch {
41+
return; // No previous results to compare
42+
}
43+
44+
console.log('\n==== Comparison with previous run ====');
45+
46+
for (const query of results.queries) {
47+
const prevQuery = prevResults.queries.find(q => q.query === query.query);
48+
if (!prevQuery || prevQuery.iterations.length === 0 || query.iterations.length === 0) {
49+
continue;
50+
}
51+
52+
const avgPrev = Math.round(
53+
prevQuery.iterations.reduce((sum, i) => sum + i.elapsed, 0) / prevQuery.iterations.length
54+
);
55+
const avg = Math.round(
56+
query.iterations.reduce((sum, i) => sum + i.elapsed, 0) / query.iterations.length
57+
);
58+
59+
const factor = avg < avgPrev ? avgPrev / avg : avg / avgPrev;
60+
const tag = avg < avgPrev ? "faster" : "slower";
61+
const emoji = factor > 1.2 ? (avg < avgPrev ? "✅" : "❌") : (avg < avgPrev ? "✔" : "✖");
62+
63+
console.log(
64+
`${query.query.padStart(8)}: prev=${avgPrev.toString().padStart(4)} ms, new=${avg.toString().padStart(4)} ms, ${factor.toFixed(2)}x ${tag} ${emoji}`
65+
);
66+
}
67+
}
68+
69+
export interface BenchmarkRunner {
70+
createTables(sf: number): Promise<void>;
71+
72+
executeQuery(query: string): Promise<{ rowCount: number }>;
73+
}
74+
75+
export async function runBenchmark(
76+
runner: BenchmarkRunner,
77+
options: {
78+
sf: number;
79+
iterations: number;
80+
specificQuery?: number;
81+
outputPath: string;
82+
}
83+
) {
84+
const { sf, iterations, specificQuery, outputPath } = options;
85+
86+
const results: BenchmarkResults = { queries: [] };
87+
const queriesPath = path.join(ROOT, "testdata", "tpch", "queries")
88+
89+
console.log("Creating tables...");
90+
await runner.createTables(sf);
91+
92+
for (let id of IDS) {
93+
if (specificQuery && specificQuery !== id) {
94+
continue;
95+
}
96+
97+
const queryId = `q${id}`;
98+
const filePath = path.join(queriesPath, `${queryId}.sql`)
99+
const queryToExecute = await fs.readFile(filePath, 'utf-8')
100+
101+
const queryResult: QueryResult = {
102+
query: queryId,
103+
iterations: []
104+
};
105+
106+
console.log(`Warming up query ${id}...`)
107+
await runner.executeQuery(queryToExecute);
108+
109+
for (let i = 0; i < iterations; i++) {
110+
const start = new Date()
111+
const response = await runner.executeQuery(queryToExecute);
112+
const elapsed = Math.round(new Date().getTime() - start.getTime())
113+
114+
queryResult.iterations.push({
115+
elapsed,
116+
row_count: response.rowCount
117+
});
118+
119+
console.log(
120+
`Query ${id} iteration ${i} took ${elapsed} ms and returned ${response.rowCount} rows`
121+
);
122+
}
123+
124+
const avg = Math.round(
125+
queryResult.iterations.reduce((a, b) => a + b.elapsed, 0) / queryResult.iterations.length
126+
);
127+
console.log(`Query ${id} avg time: ${avg} ms`);
128+
129+
results.queries.push(queryResult);
130+
}
131+
132+
// Write results and compare
133+
await compareWithPrevious(results, outputPath);
134+
await writeJson(results, outputPath);
135+
}

0 commit comments

Comments
 (0)