Skip to content

Commit f61df98

Browse files
committed
add clickbench runs for vortex
1 parent 5830420 commit f61df98

File tree

7 files changed

+270
-45
lines changed

7 files changed

+270
-45
lines changed

duckdb-vortex/benchmark.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/bash
2+
3+
# Install
4+
sudo apt-get update
5+
sudo apt-get install ninja-build cmake build-essential make ccache pip clang pkg-config -y
6+
7+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --no-modify-path
8+
9+
export CC=clang
10+
export CXX=clang++
11+
git clone https://github.com/vortex-data/vortex --recursive
12+
cd vortex/duckdb-vortex
13+
git checkout 0.34.0
14+
GEN=ninja NATIVE_ARCH=1 LTO=thin make
15+
export PATH="`pwd`/build/release/:$PATH"
16+
cd ../..
17+
18+
# Load the data
19+
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_{}.parquet'
20+
21+
# Convert parquet files to vortex
22+
seq 0 99 | xargs -P"$(nproc)" -I{} bash -c '
23+
if [ ! -f "hits_{}.vortex" ]; then
24+
duckdb -c "COPY 'hits_{}.parquet' TO hits_{}.vortex (FORMAT vortex)"
25+
fi
26+
'
27+
28+
time duckdb hits.db -f create.sql
29+
30+
# Run the queries
31+
32+
./run.sh 2>&1 | tee log.txt
33+
34+
wc -c hits.db
35+
36+
cat log.txt |
37+
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
38+
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
39+
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

duckdb-vortex/create.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
CREATE VIEW hits AS SELECT * FROM read_vortex('hits_*.vortex');
2+

duckdb-vortex/queries.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
SELECT COUNT(*) FROM hits;
2+
SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
3+
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
4+
SELECT AVG(UserID) FROM hits;
5+
SELECT COUNT(DISTINCT UserID) FROM hits;
6+
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
7+
SELECT MIN(EventDate), MAX(EventDate) FROM hits;
8+
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
9+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
10+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
11+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
12+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
13+
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
14+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
15+
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
16+
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
17+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
18+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
19+
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
20+
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
21+
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
22+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
23+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
24+
SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
25+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
26+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
27+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
28+
SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
30+
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
31+
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
32+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
33+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
34+
SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
35+
SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
36+
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
37+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
38+
SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
39+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
40+
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
41+
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
42+
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
43+
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"system": "DuckDB (Vortex, partitioned)",
3+
"date": "2025-05-20",
4+
"machine": "c6a.4xlarge, 500gb gp2",
5+
"cluster_size": 1,
6+
"comment": "",
7+
8+
"tags": ["rust", "vortex", "column-oriented", "embedded", "stateless"],
9+
10+
"load_time": 0,
11+
"data_size": 16028685104,
12+
13+
"result": [
14+
[0.203,0.027,0.023],
15+
[0.477,0.038,0.035],
16+
[1.550,0.159,0.156],
17+
[3.140,0.119,0.115],
18+
[3.261,0.386,0.384],
19+
[3.337,0.429,0.438],
20+
[0.251,0.067,0.066],
21+
[0.392,0.045,0.041],
22+
[4.467,0.592,0.588],
23+
[4.564,0.855,0.848],
24+
[2.846,0.156,0.152],
25+
[3.381,0.174,0.172],
26+
[3.819,0.390,0.411],
27+
[6.480,0.786,0.761],
28+
[3.774,0.429,0.433],
29+
[2.803,0.461,0.459],
30+
[6.221,1.151,1.155],
31+
[5.989,1.021,0.965],
32+
[8.417,2.058,2.123],
33+
[2.461,0.242,0.225],
34+
[28.080,0.646,0.634],
35+
[30.654,0.648,0.535],
36+
[41.335,1.147,0.989],
37+
[114.739,102.458,104.874],
38+
[5.360,0.274,0.249],
39+
[3.524,0.197,0.189],
40+
[5.719,0.266,0.258],
41+
[27.922,0.815,0.777],
42+
[21.561,9.044,9.108],
43+
[0.754,0.117,0.119],
44+
[6.939,0.362,0.348],
45+
[12.776,0.495,0.468],
46+
[10.465,2.253,2.251],
47+
[27.317,2.012,2.037],
48+
[27.889,2.306,2.350],
49+
[1.834,0.646,0.638],
50+
[0.260,0.081,0.086],
51+
[0.891,0.065,0.083],
52+
[0.971,0.147,0.155],
53+
[1.112,0.167,0.106],
54+
[0.848,0.075,0.052],
55+
[0.882,0.051,0.051],
56+
[0.831,0.049,0.048]
57+
]
58+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"system": "DuckDB (Vortex, partitioned)",
3+
"date": "2025-05-20",
4+
"machine": "c6a.metal, 500gb gp2",
5+
"cluster_size": 1,
6+
"comment": "",
7+
8+
"tags": ["rust", "vortex", "column-oriented", "embedded", "stateless"],
9+
10+
"load_time": 0,
11+
"data_size": 16028685104,
12+
13+
"result": [
14+
[0.173,0.025,0.025],
15+
[0.446,0.074,0.066],
16+
[1.410,0.192,0.193],
17+
[3.040,0.138,0.094],
18+
[3.125,0.233,0.243],
19+
[3.399,0.269,0.272],
20+
[0.242,0.130,0.128],
21+
[0.297,0.088,0.085],
22+
[4.286,0.391,0.367],
23+
[4.595,0.509,0.514],
24+
[2.896,0.154,0.139],
25+
[3.211,0.160,0.147],
26+
[3.633,0.230,0.238],
27+
[6.317,0.372,0.363],
28+
[3.718,0.251,0.228],
29+
[2.800,0.260,0.263],
30+
[6.121,0.477,0.484],
31+
[6.152,0.498,0.510],
32+
[8.184,0.749,0.726],
33+
[2.447,0.221,0.216],
34+
[29.004,0.334,0.301],
35+
[31.965,0.354,0.342],
36+
[43.794,0.523,0.446],
37+
[113.575,1.920,1.839],
38+
[5.334,0.124,0.085],
39+
[3.431,0.150,0.121],
40+
[5.527,0.178,0.147],
41+
[28.774,0.363,0.325],
42+
[23.202,1.569,1.582],
43+
[0.762,0.150,0.146],
44+
[6.704,0.252,0.208],
45+
[12.707,0.270,0.390],
46+
[9.594,0.884,0.907],
47+
[27.720,0.741,0.696],
48+
[27.784,0.749,0.769],
49+
[1.775,0.362,0.350],
50+
[0.398,0.145,0.160],
51+
[0.751,0.133,0.125],
52+
[1.108,0.147,0.142],
53+
[1.157,0.183,0.198],
54+
[0.825,0.098,0.099],
55+
[0.719,0.104,0.095],
56+
[0.633,0.081,0.082]
57+
]
58+
}

duckdb-vortex/run.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
TRIES=3
4+
5+
cat queries.sql | while read -r query; do
6+
sync
7+
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
8+
9+
echo "$query";
10+
cli_params=()
11+
# cli_params+=("-c")
12+
# cli_params+=("SET parquet_metadata_cache=true")
13+
cli_params+=("-c")
14+
cli_params+=(".timer on")
15+
for i in $(seq 1 $TRIES); do
16+
cli_params+=("-c")
17+
cli_params+=("${query}")
18+
done;
19+
echo "${cli_params[@]}"
20+
duckdb hits.db "${cli_params[@]}"
21+
done;

0 commit comments

Comments
 (0)