@@ -190,56 +190,8 @@ jobs:
190190 OTEL_EXPORTER_OTLP_HEADERS : " ${{ (inputs.mode != 'pr' || github.event.pull_request.head.repo.fork == false) && secrets.OTEL_EXPORTER_OTLP_HEADERS || '' }}"
191191 OTEL_RESOURCE_ATTRIBUTES : " bench-name=${{ matrix.id }}"
192192 run : |
193- # Extract formats for each engine from the targets string.
194- # Example input: "datafusion:parquet,datafusion:vortex,datafusion:lance,duckdb:parquet"
195- #
196- # Pipeline: split by comma -> filter by engine prefix -> remove prefix -> rejoin with commas
197-
198- # Lance is filtered out of df_formats because it uses a separate binary (lance-bench).
199- df_formats=$(echo "${{ matrix.targets }}" | tr ',' '\n' | grep '^datafusion:' | grep -v ':lance$' | sed 's/datafusion://' | tr '\n' ',' | sed 's/,$//')
200- ddb_formats=$(echo "${{ matrix.targets }}" | tr ',' '\n' | grep '^duckdb:' | sed 's/duckdb://' | tr '\n' ',' | sed 's/,$//')
201- has_lance=$(echo "${{ matrix.targets }}" | grep -q 'datafusion:lance' && echo "true" || echo "false")
202-
203- # Build options string if scale_factor is set
204- opts=""
205- if [ -n "${{ matrix.scale_factor }}" ]; then
206- opts="--opt scale-factor=${{ matrix.scale_factor }}"
207- fi
208-
209- touch results.json
210-
211- # Run datafusion-bench
212- if [ -n "$df_formats" ]; then
213- target/release_debug/datafusion-bench ${{ matrix.subcommand }} \
214- -d gh-json \
215- --formats "$df_formats" \
216- $opts \
217- -o df-results.json
218-
219- cat df-results.json >> results.json
220- fi
221-
222- # Run duckdb-bench
223- if [ -n "$ddb_formats" ]; then
224- target/release_debug/duckdb-bench ${{ matrix.subcommand }} \
225- -d gh-json \
226- --formats "$ddb_formats" \
227- $opts \
228- --delete-duckdb-database \
229- -o ddb-results.json
230-
231- cat ddb-results.json >> results.json
232- fi
233-
234- # Run lance-bench
235- if [ "$has_lance" = "true" ] && [ -f "target/release_debug/lance-bench" ]; then
236- target/release_debug/lance-bench ${{ matrix.subcommand }} \
237- -d gh-json \
238- $opts \
239- -o lance-results.json
240-
241- cat lance-results.json >> results.json
242- fi
193+ .github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
194+ ${{ matrix.scale_factor && format('--scale-factor {0}', matrix.scale_factor) || '' }}
243195
244196 - name : Run ${{ matrix.name }} benchmark (remote)
245197 if : matrix.remote_storage != null && (inputs.mode != 'pr' || github.event.pull_request.head.repo.fork == false)
@@ -252,53 +204,10 @@ jobs:
252204 OTEL_EXPORTER_OTLP_HEADERS : " ${{ (inputs.mode != 'pr' || github.event.pull_request.head.repo.fork == false) && secrets.OTEL_EXPORTER_OTLP_HEADERS || '' }}"
253205 OTEL_RESOURCE_ATTRIBUTES : " bench-name=${{ matrix.id }}"
254206 run : |
255- # Lance on remote storage is not supported. The infrastructure to generate and upload
256- # lance files to S3 does not exist. If you need lance on S3, you must first implement:
257- # 1. Lance data generation in data-gen (or a separate step)
258- # 2. Lance data upload to S3 before this step runs
259- if echo "${{ matrix.targets }}" | grep -q 'lance'; then
260- echo "ERROR: Lance format is not supported for remote storage benchmarks."
261- echo "Remove 'datafusion:lance' from targets for benchmark '${{ matrix.id }}'."
262- exit 1
263- fi
264-
265- # Extract formats for each engine from the targets string.
266- # Example input: "datafusion:parquet,datafusion:vortex,duckdb:parquet"
267- #
268- # Pipeline: split by comma -> filter by engine prefix -> remove prefix -> rejoin with commas
269- df_formats=$(echo "${{ matrix.targets }}" | tr ',' '\n' | grep '^datafusion:' | sed 's/datafusion://' | tr '\n' ',' | sed 's/,$//')
270- ddb_formats=$(echo "${{ matrix.targets }}" | tr ',' '\n' | grep '^duckdb:' | sed 's/duckdb://' | tr '\n' ',' | sed 's/,$//')
271-
272- # Build options string if scale_factor is set
273- opts="--opt remote-data-dir=${{ matrix.remote_storage }}"
274- if [ -n "${{ matrix.scale_factor }}" ]; then
275- opts="--opt scale-factor=${{ matrix.scale_factor }} ${opts}"
276- fi
277-
278- touch results.json
279-
280- # Run datafusion-bench with remote storage
281- if [ -n "$df_formats" ]; then
282- target/release_debug/datafusion-bench ${{ matrix.subcommand }} \
283- -d gh-json \
284- --formats "$df_formats" \
285- $opts \
286- -o df-results.json
287-
288- cat df-results.json >> results.json
289- fi
290-
291- # Run duckdb-bench with remote storage
292- if [ -n "$ddb_formats" ]; then
293- target/release_debug/duckdb-bench ${{ matrix.subcommand }} \
294- -d gh-json \
295- --formats "$ddb_formats" \
296- $opts \
297- --delete-duckdb-database \
298- -o ddb-results.json
299-
300- cat ddb-results.json >> results.json
301- fi
207+ .github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
208+ --remote-storage "${{ matrix.remote_storage }}" \
209+ --benchmark-id "${{ matrix.id }}" \
210+ ${{ matrix.scale_factor && format('--scale-factor {0}', matrix.scale_factor) || '' }}
302211
303212 - name : Install uv
304213 if : inputs.mode == 'pr'
0 commit comments