Update results and scripts for GlareDB

scsmithr · scsmithr · commit 771f4ae275af · 2025-05-06T18:51:34.000-05:00
diff --git a/glaredb/.gitignore b/glaredb/.gitignore
@@ -0,0 +1,9 @@
+# The binary
+/glaredb
+
+# Any of the hits files
+/data
+
+# Result data, should be manually copied to the right spot if being kept.
+results.csv
+results.json
diff --git a/glaredb/benchmark.sh b/glaredb/benchmark.sh
@@ -1,20 +1,42 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
-# Install
+set -e
 
-sudo apt-get install -y unzip
-curl https://glaredb.com/install.sh | sh
+repo_root=$(git rev-parse --show-toplevel)
+script_dir=$(dirname "$0")
 
-wget https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet
+if [[ "$(basename "$repo_root")" == "glaredb" ]]; then
+    # Inside glaredb repo, build from source.
+    cargo build --release --bin glaredb
+    cp "${repo_root}/target/release/glaredb" "${script_dir}/glaredb"
+else
+    # Not in glaredb repo, use prebuilt binary.
+    export GLAREDB_INSTALL_DIR="${script_dir}"
+    export GLAREDB_VERSION="v25.5.2"
+    curl -fsSL https://glaredb.com/install.sh | sh
+fi
 
-cat queries.sql | while read -r query
-do
-    sync
-    echo 3 | sudo tee /proc/sys/vm/drop_caches
+# Get the data.
+mkdir -p "${script_dir}/data"
+pushd "${script_dir}/data"
 
-    for i in $(seq 1 3); do
-        ./glaredb --timing --query "${query}"
-    done;
-done 2>&1 | tee log.txt
+mode="${1:-single}" # Default to 'single' if no arg given.
+case "${mode}" in
+    single)
+        wget --continue https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet
+        ;;
+    partitioned)
+        seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
+        ;;
+    *)
+        echo "Invalid argument to 'benchmark.sh', expected 'single' or 'partitioned'"
+        exit 1
+        ;;
+esac
+popd
 
-cat log.txt | grep -oP 'Time: \d+\.\d+s|Error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+)s/\1/; s/Error/null/' | awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
+# Ensure working directory is the script dir. The view that gets created uses a
+# relative path.
+pushd "${script_dir}"
+
+./run.sh "${mode}"
diff --git a/glaredb/create_single.sql b/glaredb/create_single.sql
@@ -0,0 +1,3 @@
+CREATE TEMP VIEW hits AS
+  SELECT * REPLACE (EventDate::DATE AS EventDate)
+    FROM read_parquet('./data/hits.parquet');
diff --git a/glaredb/queries.sql b/glaredb/queries.sql
diff --git a/glaredb/results/c6a.4xlarge.json b/glaredb/results/c6a.4xlarge.json
@@ -1,55 +1,55 @@
 {
-    "system": "GlareDB",
-    "date": "2024-02-02",
+    "system": "GlareDB (Parquet, single)",
+    "date": "2025-05-06",
     "machine": "c6a.4xlarge, 500gb gp2",
     "cluster_size": 1,
     "comment": "",
     "tags": ["Rust", "serverless"],
     "load_time": 0,
     "data_size": 14779976446,
     "result": [
-[0.221,0.070,0.069],
-[0.509,0.425,0.424],
-[0.569,0.462,0.466],
-[0.787,0.483,0.470],
-[1.339,1.220,1.200],
-[1.800,1.644,1.649],
-[0.490,0.418,0.410],
-[0.509,0.423,0.423],
-[2.009,1.880,1.920],
-[3.383,3.134,3.147],
-[0.950,0.695,0.702],
-[0.891,0.743,0.721],
-[1.787,1.646,1.654],
-[4.032,3.007,2.974],
-[1.911,1.746,1.762],
-[1.538,1.321,1.360],
-[3.746,3.335,3.327],
-[3.611,3.064,2.993],
-[7.835,6.293,6.414],
-[0.882,0.469,0.470],
-[9.903,1.943,1.947],
-[11.653,2.182,2.172],
-[22.503,4.410,4.416],
-[56.481,11.754,11.769],
-[3.039,0.925,0.917],
-[1.132,0.854,0.855],
-[2.939,0.991,0.973],
-[9.958,2.688,2.695],
-[9.431,5.639,5.614],
-[1.027,0.872,0.814],
-[2.611,1.508,1.497],
-[6.177,1.887,1.960],
-[9.675,9.095,8.891],
-[12.268,7.139,7.063],
-[12.675,7.661,7.671],
-[2.418,2.250,2.210],
-[9.998,2.095,2.066],
-[9.273,2.782,2.722],
-[10.015,2.085,2.079],
-[18.876,3.284,3.317],
-[2.963,0.939,0.917],
-[2.165,0.973,0.936],
-[1.380,0.901,0.864]
-    ]
+    [0.039,0.039,0.039],
+    [0.137,0.132,0.127],
+    [0.221,0.206,0.203],
+    [0.302,0.153,0.153],
+    [1.034,1.036,1.026],
+    [1.187,1.175,1.153],
+    [0.105,0.104,0.104],
+    [0.143,0.146,0.149],
+    [1.594,1.584,1.597],
+    [1.884,1.887,1.875],
+    [0.532,0.516,0.527],
+    [0.599,0.604,0.603],
+    [1.105,1.110,1.088],
+    [2.374,2.327,2.346],
+    [1.203,1.220,1.198],
+    [1.336,1.341,1.343],
+    [2.463,2.459,2.484],
+    [2.168,2.161,2.132],
+    [4.608,4.527,4.519],
+    [0.196,0.187,0.189],
+    [9.395,1.675,1.709],
+    [1.687,1.713,1.724],
+    [8.470,3.776,3.625],
+    [31.127,13.872,13.891],
+    [0.884,0.894,0.895],
+    [0.969,0.964,0.970],
+    [1.086,1.086,1.090],
+    [1.593,1.570,1.617],
+    [12.072,12.256,11.149],
+    [4.814,4.740,4.749],
+    [1.243,1.250,1.239],
+    [1.529,1.540,1.553],
+    [6.825,6.214,6.111],
+    [4.154,4.139,4.186],
+    [4.319,4.420,4.407],
+    [1.474,1.454,1.467],
+    [0.196,0.212,0.203],
+    [0.184,0.168,0.167],
+    [0.132,0.130,0.131],
+    [0.369,0.371,0.373],
+    [0.078,0.078,0.077],
+    [0.079,0.079,0.074],
+    [0.087,0.087,0.085]
+]
 }
diff --git a/glaredb/results/c6a.metal.json b/glaredb/results/c6a.metal.json
diff --git a/glaredb/run.sh b/glaredb/run.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+set -eu
+set -o pipefail
+
+case "$1" in
+    single)
+        create_sql_file="create_single.sql"
+        ;;
+    *)
+        echo "Invalid argument to 'run.sh', expected 'single'"
+        exit 1
+        ;;
+esac
+
+TRIES=3
+QUERY_NUM=0
+
+echo "[" > results.json
+echo "query_num,iteration,duration" > results.csv
+
+cat queries.sql | while read -r query; do
+    sync
+    if [[ -r /proc/sys/vm/drop_caches ]]; then
+        # Only try to run this if we have a proc file system.
+        # Aka not mac.
+        echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
+    fi
+
+    echo "${QUERY_NUM}: ${query}"
+
+    [ "${QUERY_NUM}" != 0 ] && echo "," >> results.json
+    echo -n "    [" >> results.json
+
+    for i in $(seq 1 $TRIES); do
+        output=$(./glaredb --init "${create_sql_file}" -c ".timer on" -c "${query}")
+        duration=$(awk -F': ' '/^Execution duration/ { printf "%.3f\n", $2 }' <<< "$output")
+
+        echo "$output"
+
+        if [ -z "${duration}" ]; then
+           echo "Query failed"
+           exit 1
+        fi
+
+        # JSON results
+        echo -n "${duration}" >> results.json
+        [ "${i}" != "${TRIES}" ] && echo -n "," >> results.json
+
+        # CSV results
+        echo "${QUERY_NUM},${i},${duration}" >> results.csv
+    done
+
+    echo -n "]" >> results.json
+
+    QUERY_NUM=$((QUERY_NUM + 1))
+done
+
+echo "" >> results.csv
+echo "" >> results.json
+echo "]" >> results.json

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+CREATE TEMP VIEW hits AS`
	`2`	`+ SELECT * REPLACE (EventDate::DATE AS EventDate)`
	`3`	`+ FROM read_parquet('./data/hits.parquet');`