Skip to content

Generate TPC-H and clickbench files to S3 #54

Generate TPC-H and clickbench files to S3

Generate TPC-H and clickbench files to S3 #54

name: "Generate TPC-H and clickbench files to S3"
on:
workflow_dispatch: {}
schedule:
# 5AM UTC
- cron: "0 5 * * *"
concurrency:
group: ${{ github.workflow }}
permissions:
actions: write
id-token: write
jobs:
generate_files:
name: Generate S3 TPCH and clickbench files
timeout-minutes: 120
runs-on:
- runs-on=${{ github.run_id }}
- family=m7i.2xlarge
- image=ubuntu24-full-x64
- disk=large
- extras=s3-cache
steps:
- uses: runs-on/action@v2
- uses: actions/checkout@v5
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Generate clickbench locally
shell: bash
run: |
# We run each query once to make sure we don't upload a file if there's a bug that causes a panic.
cargo run --release --bin query_bench --package bench-vortex -- clickbench --targets datafusion:parquet,datafusion:vortex -i1
aws s3 rm --recursive s3://vortex-bench-dev-eu/develop/clickbench/
aws s3 cp --recursive bench-vortex/data/clickbench_partitioned s3://vortex-bench-dev-eu/develop/clickbench/
rm -rf bench-vortex/data/clickbench_partitioned/