Skip to content

Run Lance Benchmarks #246

Run Lance Benchmarks

Run Lance Benchmarks #246

name: Run Lance Benchmarks
on:
workflow_dispatch:
inputs:
git_sha:
description: "Git SHA to benchmark"
required: true
type: string
pr_number:
description: "PR number (enables PR mode - local results only)"
required: false
type: number
pr_repo:
description: "PR repository (org/repo format)"
required: false
type: string
default: "lance-format/lance"
run_rust:
description: "Run Rust benchmarks"
required: false
type: boolean
default: true
run_python:
description: "Run Python benchmarks"
required: false
type: boolean
default: true
rust_crates:
description: "Comma-separated list of Rust crates (empty = all)"
required: false
type: string
default: ""
jobs:
bench-io:
if: ${{ inputs.run_rust && (inputs.rust_crates == '' || contains(inputs.rust_crates, 'lance-io')) }}
uses: ./.github/workflows/run-rust-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
crate_path: "rust/lance-io"
# Need fromJson here due to https://github.com/orgs/community/discussions/67182
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-index:
if: false # TODO: Currently these are failing
uses: ./.github/workflows/run-rust-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
crate_path: "rust/lance-index"
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-linalg:
if: ${{ inputs.run_rust && (inputs.rust_crates == '' || contains(inputs.rust_crates, 'lance-linalg')) }}
uses: ./.github/workflows/run-rust-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
crate_path: "rust/lance-linalg"
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-file:
if: false # TODO: Currently these are failing
uses: ./.github/workflows/run-rust-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
crate_path: "rust/lance-file"
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-encoding:
if: ${{ inputs.run_rust && (inputs.rust_crates == '' || contains(inputs.rust_crates, 'lance-encoding')) }}
uses: ./.github/workflows/run-rust-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
crate_path: "rust/lance-encoding"
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-python:
if: ${{ inputs.run_python }}
uses: ./.github/workflows/run-python-benchmarks.yml
with:
git_sha: ${{ inputs.git_sha }}
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-fts-index:
if: ${{ inputs.run_python }}
uses: ./.github/workflows/run-fts-index-benchmark.yml
with:
git_sha: ${{ inputs.git_sha }}
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
bench-fts-index-postgres:
if: ${{ inputs.run_python }}
uses: ./.github/workflows/run-fts-index-postgres-benchmark.yml
with:
git_sha: ${{ inputs.git_sha }}
pr_number: ${{ inputs.pr_number != '' && fromJson(inputs.pr_number) || '' }}
secrets:
LANCE_BENCH_DB_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
BENCH_S3_USER_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
BENCH_S3_USER_SECRET_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
compare-and-comment:
# The success() || failure() is to ensure that the job runs
# even if one or more dependent steps were skipped. By default,
# GHA would propagate that skip.
if: ${{ inputs.pr_number && (success() || failure()) }}
needs: [bench-io, bench-linalg, bench-encoding, bench-python, bench-fts-index, bench-fts-index-postgres]
runs-on: ubuntu-latest
steps:
- name: Checkout lance-bench repository
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync
pip install PyGithub
- name: Download PR results
if: inputs.pr_number
uses: actions/download-artifact@v4
with:
# No name, download all artifacts
path: ./pr-results
- name: List PR results
run: |
ls -la ./pr-results
- name: Merge PR results
if: inputs.pr_number
run: |
uv run python scripts/merge_pr_results.py \
./pr-results \
merged-results.lance
- name: Compare results
env:
LANCE_BENCH_URI: ${{ secrets.LANCE_BENCH_DB_URI }}
AWS_ACCESS_KEY_ID: ${{ secrets.BENCH_S3_USER_ACCESS_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.BENCH_S3_USER_SECRET_KEY }}
run: |
uv run python scripts/compare_pr_benchmarks.py \
${{ inputs.git_sha }} \
--pr-number ${{ inputs.pr_number }} \
--local-results ./merged-results.lance \
--output comparison_report.md
- name: Print comparison report
run: |
cat comparison_report.md
- name: Post comment
env:
GITHUB_TOKEN: ${{ secrets.PR_COMMENT_TOKEN }}
run: |
python scripts/post_pr_comment.py \
${{ inputs.pr_number }} \
--repo ${{ inputs.pr_repo }} \
--body-file comparison_report.md