Skip to content

Commit 2d78045

Browse files
Merge pull request #5 from spiraldb/ji/duckdb-vortex-single
update
2 parents 186f13e + 69953c9 commit 2d78045

File tree

6 files changed

+235
-0
lines changed

6 files changed

+235
-0
lines changed

duckdb-vortex/benchmark.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/bash
2+
3+
# Install
4+
sudo apt-get update
5+
sudo apt-get install ninja-build cmake build-essential make ccache pip clang pkg-config -y
6+
7+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --no-modify-path
8+
9+
export CC=clang
10+
export CXX=clang++
11+
git clone https://github.com/vortex-data/vortex --recursive
12+
cd vortex/duckdb-vortex
13+
git checkout 0.35.0
14+
GEN=ninja NATIVE_ARCH=1 LTO=thin make
15+
export PATH="`pwd`/build/release/:$PATH"
16+
cd ../..
17+
18+
# Load the data
19+
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://pub-3ba949c0f0354ac18db1f0f14f0a2c52.r2.dev/clickbench/parquet_many/hits_{}.parquet'
20+
21+
# Convert parquet files to vortex partitioned
22+
seq 0 99 | xargs -P"$(nproc)" -I{} bash -c '
23+
if [ ! -f "hits_{}.vortex" ]; then
24+
duckdb -c "COPY 'hits_{}.parquet' TO hits_{}.vortex (FORMAT vortex)"
25+
fi
26+
'
27+
28+
# Convert parquet files to vortex single
29+
if [ ! -f "hits.vortex" ]; then
30+
duckdb -c "COPY 'hits_*.parquet' TO hits.vortex (FORMAT vortex)"
31+
fi
32+
33+
time duckdb hits-partitioned.db -c "CREATE VIEW hits AS SELECT * FROM read_vortex('hits_*.vortex')";
34+
35+
time duckdb hits-single.db -c "CREATE VIEW hits AS SELECT * FROM read_vortex('hits.vortex')";
36+
37+
38+
39+
# Run the queries
40+
echo 'partitioned'
41+
42+
./run.sh 'hits-partitioned.db' 2>&1 | tee log-p.txt
43+
cat log-p.txt |
44+
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
45+
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
46+
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
47+
48+
echo ''
49+
echo 'single'
50+
51+
./run.sh 'hits-single.db' 2>&1 | tee log-s.txt
52+
cat log-s.txt |
53+
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
54+
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
55+
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

duckdb-vortex/queries.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
SELECT COUNT(*) FROM hits;
2+
SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
3+
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
4+
SELECT AVG(UserID) FROM hits;
5+
SELECT COUNT(DISTINCT UserID) FROM hits;
6+
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
7+
SELECT MIN(EventDate), MAX(EventDate) FROM hits;
8+
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
9+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
10+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
11+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
12+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
13+
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
14+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
15+
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
16+
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
17+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
18+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
19+
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
20+
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
21+
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
22+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
23+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
24+
SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
25+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
26+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
27+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
28+
SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
30+
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
31+
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
32+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
33+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
34+
SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
35+
SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
36+
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
37+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
38+
SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
39+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
40+
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
41+
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
42+
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
43+
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"system": "DuckDB (Vortex, partitioned)",
3+
"date": "2025-05-21",
4+
"machine": "c6a.4xlarge, 500gb gp2",
5+
"cluster_size": 1,
6+
"comment": "",
7+
8+
"tags": ["rust", "vortex", "column-oriented", "embedded", "stateless"],
9+
10+
"load_time": 0,
11+
"data_size": 16028112632,
12+
13+
"result": [
14+
[0.239,0.011,0.010],
15+
[0.498,0.027,0.027],
16+
[1.586,0.150,0.145],
17+
[3.163,0.108,0.108],
18+
[3.321,0.381,0.380],
19+
[3.327,0.417,0.415],
20+
[0.247,0.050,0.052],
21+
[0.446,0.033,0.030],
22+
[4.517,0.568,0.565],
23+
[4.495,0.819,0.829],
24+
[2.772,0.147,0.144],
25+
[3.467,0.175,0.168],
26+
[3.809,0.388,0.387],
27+
[6.402,0.789,0.773],
28+
[3.615,0.409,0.440],
29+
[2.741,0.449,0.454],
30+
[6.105,1.176,1.135],
31+
[5.895,0.987,0.993],
32+
[8.413,2.137,2.134],
33+
[1.890,0.256,0.254],
34+
[27.029,0.593,0.593],
35+
[28.245,0.619,0.659],
36+
[35.089,1.163,1.177],
37+
[98.719,2.680,2.845],
38+
[5.261,0.259,0.254],
39+
[3.447,0.187,0.179],
40+
[5.643,0.258,0.247],
41+
[25.593,1.212,0.654],
42+
[21.055,9.490,9.472],
43+
[0.773,0.106,0.107],
44+
[6.830,0.357,0.345],
45+
[12.732,0.531,0.486],
46+
[10.444,2.272,2.167],
47+
[26.346,1.819,1.804],
48+
[26.248,2.339,2.319],
49+
[1.834,0.622,0.627],
50+
[0.247,0.032,0.033],
51+
[0.619,0.028,0.027],
52+
[0.884,0.022,0.037],
53+
[1.011,0.065,0.062],
54+
[0.718,0.025,0.027],
55+
[0.819,0.028,0.023],
56+
[0.765,0.024,0.020]
57+
]
58+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"system": "DuckDB (Vortex, single)",
3+
"date": "2025-05-21",
4+
"machine": "c6a.4xlarge, 500gb gp2",
5+
"cluster_size": 1,
6+
"comment": "",
7+
8+
"tags": ["rust", "vortex", "column-oriented", "embedded", "stateless"],
9+
10+
"load_time": 0,
11+
"data_size": 17138413352,
12+
13+
"result": [
14+
[0.129,0.009,0.009],
15+
[0.316,0.058,0.058],
16+
[1.203,0.150,0.150],
17+
[2.647,0.146,0.153],
18+
[2.763,0.414,0.412],
19+
[2.518,0.584,0.585],
20+
[0.234,0.107,0.110],
21+
[0.338,0.063,0.063],
22+
[4.047,0.595,0.572],
23+
[5.539,0.817,0.819],
24+
[2.342,0.326,0.321],
25+
[2.478,0.341,0.338],
26+
[2.596,0.621,0.619],
27+
[5.883,0.965,0.960],
28+
[2.908,0.649,0.650],
29+
[2.366,0.483,0.487],
30+
[5.805,1.287,1.262],
31+
[5.806,1.076,1.039],
32+
[8.363,2.188,2.193],
33+
[1.991,0.580,0.572],
34+
[26.778,1.731,1.733],
35+
[29.899,1.896,1.896],
36+
[39.083,2.986,2.973],
37+
[107.607,71.144,52.572],
38+
[4.731,0.453,0.459],
39+
[2.441,0.375,0.370],
40+
[4.725,0.470,0.467],
41+
[26.806,1.735,1.786],
42+
[20.778,10.216,10.217],
43+
[0.515,0.106,0.111],
44+
[6.083,0.575,0.581],
45+
[12.189,0.759,0.738],
46+
[10.020,2.208,2.159],
47+
[27.433,2.884,2.905],
48+
[27.539,3.811,3.769],
49+
[1.158,0.766,0.743],
50+
[1.918,1.819,1.777],
51+
[2.442,1.119,1.116],
52+
[1.817,1.669,1.668],
53+
[3.125,2.950,2.955],
54+
[0.578,0.445,0.448],
55+
[0.518,0.388,0.390],
56+
[0.366,0.235,0.233]
57+
]
58+
}

duckdb-vortex/run.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
TRIES=3
4+
5+
cat queries.sql | while read -r query; do
6+
sync
7+
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
8+
9+
echo "$query";
10+
cli_params=()
11+
cli_params+=("-c")
12+
cli_params+=(".timer on")
13+
for i in $(seq 1 $TRIES); do
14+
cli_params+=("-c")
15+
cli_params+=("${query}")
16+
done;
17+
echo "${cli_params[@]}"
18+
duckdb "$1" "${cli_params[@]}"
19+
done;

0 commit comments

Comments
 (0)