Skip to content

FSST Filter Kernel (#5755) #4158

FSST Filter Kernel (#5755)

FSST Filter Kernel (#5755) #4158

Workflow file for this run

# Runs after every commit to `develop` (or in other words, _after_ every pull request merges).
name: Benchmarks
on:
push:
branches: [develop]
permissions:
id-token: write # enables AWS-GitHub OIDC
actions: read
contents: write
deployments: write
jobs:
commit-metadata:
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- uses: actions/checkout@v6
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v5
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Commit Metadata
shell: bash
run: |
set -Eeu -o pipefail -x
sudo apt-get update && sudo apt-get install -y jq
bash scripts/commit-json.sh > new-commit.json
bash scripts/cat-s3.sh vortex-benchmark-results-database commits.json new-commit.json
bench:
timeout-minutes: 120
runs-on:
- runs-on=${{ github.run_id }}
- family=c6id.8xlarge
- image=ubuntu24-full-x64
- spot=false
- extras=s3-cache
- tag=${{ matrix.benchmark.id }}
strategy:
matrix:
benchmark:
- id: random_access
name: Random Access
- id: compress
name: Compression
steps:
- uses: runs-on/action@v2
with:
sccache: s3
- uses: actions/checkout@v6
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install DuckDB
run: |
wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip | funzip > duckdb
chmod +x duckdb
echo "$PWD" >> $GITHUB_PATH
- name: Build binary
shell: bash
env:
RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes"
# The main difference between this and `bench-pr.yml` is that we add the `lance` feature.
run: |
cargo build --bin ${{ matrix.benchmark.id }} --package bench-vortex --profile release_debug --features lance
- name: Setup Polar Signals
uses: polarsignals/[email protected]
with:
polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }}
labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.benchmark.id }}"
parca_agent_version: "0.42.0"
project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56"
extra_args: "--off-cpu-threshold=0.001" # Personally tuned by @brancz
- name: Run ${{ matrix.benchmark.name }} benchmark
shell: bash
env:
RUST_BACKTRACE: full
run: |
target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o ${{ matrix.benchmark.id }}.json --formats parquet,lance,vortex
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v5
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Benchmark Results
shell: bash
run: |
bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz ${{ matrix.benchmark.id }}.json
sql:
uses: ./.github/workflows/sql-benchmarks.yml
secrets: inherit
with:
mode: "develop"
benchmark_matrix: |
[
{
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"build_args": "--features lance"
},
{
"id": "tpch-nvme",
"subcommand": "tpch",
"name": "TPC-H SF=1 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "--scale-factor 1.0",
"build_args": "--features lance"
},
{
"id": "tpch-s3",
"subcommand": "tpch",
"name": "TPC-H SF=1 on S3",
"local_dir": "bench-vortex/data/tpch/1.0",
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "--scale-factor 1.0",
"build_args": "--features lance"
},
{
"id": "tpch-nvme-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "--scale-factor 10.0",
"build_args": "--features lance"
},
{
"id": "tpch-s3-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on S3",
"local_dir": "bench-vortex/data/tpch/10.0",
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "--scale-factor 10.0",
"build_args": "--features lance"
},
{
"id": "tpcds-nvme",
"subcommand": "tpcds",
"name": "TPC-DS SF=1 on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "--scale-factor 1.0"
},
{
"id": "statpopgen",
"subcommand": "statpopgen",
"name": "Statistical and Population Genetics",
"local_dir": "bench-vortex/data/statpopgen",
"targets": "duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "--scale-factor 100"
},
{
"id": "fineweb",
"subcommand": "fineweb",
"name": "FineWeb NVMe",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "--scale-factor 100"
},
{
"id": "fineweb-s3",
"subcommand": "fineweb",
"name": "FineWeb S3",
"local_dir": "bench-vortex/data/fineweb",
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/${{github.run_id}}/fineweb/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "--scale-factor 100"
},
]