Skip to content

Commit 7d863d7

Browse files
authored
General benchmark improvements (#2810)
1. Generate TPC-H files in S3 whenever we run SQL benchmarks. Its fast enough IMO and I'll set an aggressive lifecycle configuration on the bucket to make sure old branches get cleaned up. 2. Whenever we run a benchmarks binary, use `--package bench-vortex` which seems to actually save on some dependencies because it doesn't compile the whole workspace.
1 parent c55d95c commit 7d863d7

File tree

5 files changed

+27
-24
lines changed

5 files changed

+27
-24
lines changed

.github/workflows/bench-pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
env:
5959
RUSTFLAGS: '-C target-cpu=native'
6060
run: |
61-
cargo run --bin ${{ matrix.benchmark.id }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
61+
cargo run --bin ${{ matrix.benchmark.id }} --package bench-vortex --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
6262
6363
- name: Setup AWS CLI
6464
uses: aws-actions/configure-aws-credentials@v4

.github/workflows/bench.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
env:
6565
RUSTFLAGS: '-C target-cpu=native'
6666
run: |
67-
cargo run --bin ${{ matrix.benchmark.id }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
67+
cargo run --bin ${{ matrix.benchmark.id }} --package bench-vortex --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
6868
6969
- name: Setup AWS CLI
7070
uses: aws-actions/configure-aws-credentials@v4

.github/workflows/ci.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,6 @@ jobs:
170170
with:
171171
submodules: "recursive"
172172
- uses: rui314/setup-mold@v1
173-
with:
174-
targets: ${{matrix.config.target || ''}}
175173
- uses: ./.github/actions/setup-c++
176174
- name: Install wasm32 target
177175
if: ${{ matrix.config.target == 'wasm32-unknown-unknown' }}

.github/workflows/generate-benchmarks-s3.yml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,11 @@ jobs:
3131
with:
3232
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
3333
aws-region: us-east-1
34-
- name: Generate TPC-H locally
35-
shell: bash
36-
run: |
37-
# We run each query once to make sure we don't upload a file if there's a bug that causes a panic.
38-
cargo run --release --bin tpch -- --formats parquet,vortex -i1
39-
aws s3 rm --recursive s3://vortex-bench-dev-eu/tpch-sf1/
40-
aws s3 cp --recursive bench-vortex/data/tpch/1 s3://vortex-bench-dev-eu/tpch-sf1/
41-
rm -rf bench-vortex/data/tpch/
42-
4334
- name: Generate clickbench locally
4435
shell: bash
4536
run: |
4637
# We run each query once to make sure we don't upload a file if there's a bug that causes a panic.
47-
cargo run --release --bin clickbench -- --formats parquet,vortex -i1
48-
aws s3 rm --recursive s3://vortex-bench-dev-eu/clickbench/
49-
aws s3 cp --recursive bench-vortex/data/clickbench_partitioned s3://vortex-bench-dev-eu/clickbench/
38+
cargo run --release --bin clickbench --package bench-vortex -- --formats parquet,vortex -i1
39+
aws s3 rm --recursive s3://vortex-bench-dev-eu/develop/clickbench/
40+
aws s3 cp --recursive bench-vortex/data/clickbench_partitioned s3://vortex-bench-dev-eu/develop/clickbench/
5041
rm -rf bench-vortex/data/clickbench_partitioned/

.github/workflows/sql-benchmarks.yml

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ on:
99

1010
jobs:
1111
bench:
12+
# S3 is shared state here, and we want to make sure only one of each job runs at a time
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.ref_name }}-${{matrix.id}}
15+
cancel-in-progress: false
1216
strategy:
1317
fail-fast: false
1418
matrix:
@@ -18,13 +22,16 @@ jobs:
1822
- id: tpch-nvme
1923
binary_name: tpch
2024
name: TPC-H on NVME
25+
local_dir: bench-vortex/data/tpch/1
2126
- id: clickbench-nvme
2227
binary_name: clickbench
2328
name: Clickbench on NVME
29+
local_dir: bench-vortex/data/clickbench_partitioned
2430
- id: tpch-s3
2531
binary_name: tpch
2632
name: TPC-H on S3
27-
remote_storage: s3://vortex-bench-dev-eu/tpch-sf1/
33+
local_dir: bench-vortex/data/tpch/1
34+
remote_storage: s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch-sf1/
2835
runs-on:
2936
- runs-on=${{ github.run_id }}
3037
- family=c7i.8xlarge
@@ -46,6 +53,12 @@ jobs:
4653
with:
4754
submodules: "recursive"
4855

56+
- name: Setup AWS CLI
57+
uses: aws-actions/configure-aws-credentials@v4
58+
with:
59+
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
60+
aws-region: us-east-1
61+
4962
- name: Run ${{ matrix.name }} benchmark
5063
if: matrix.remote_storage == null
5164
shell: bash
@@ -59,6 +72,7 @@ jobs:
5972
run: |
6073
cargo run \
6174
--bin ${{ matrix.binary_name }} \
75+
--package bench-vortex \
6276
--release \
6377
-- \
6478
-d gh-json \
@@ -77,22 +91,22 @@ jobs:
7791
OTEL_EXPORTER_OTLP_HEADERS: '${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}'
7892
OTEL_RESOURCE_ATTRIBUTES: 'bench-name=${{ matrix.id }}'
7993
run: |
94+
# Generate data, running each query once to make sure they don't panic.
95+
cargo run --release --bin ${{ matrix.binary_name }} --package bench-vortex -- --formats parquet,vortex -i1
96+
aws s3 rm --recursive ${{ matrix.remote_storage }}
97+
aws s3 cp --recursive ${{matrix.local_dir}} ${{ matrix.remote_storage }}
98+
8099
cargo run \
100+
--package bench-vortex \
81101
--bin ${{ matrix.binary_name }} \
82102
--release \
83103
-- \
84104
--use-remote-data-dir ${{ matrix.remote_storage }} \
85-
--formats 'parquet,vortex' \
105+
--formats parquet,vortex \
86106
--export-spans \
87107
-d gh-json \
88108
| tee results.json
89109
90-
- name: Setup AWS CLI
91-
uses: aws-actions/configure-aws-credentials@v4
92-
with:
93-
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
94-
aws-region: us-east-1
95-
96110
- name: Install uv
97111
if: inputs.mode == 'pr'
98112
uses: spiraldb/actions/.github/actions/[email protected]

0 commit comments

Comments
 (0)