Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
d0f884d
upd
yzh119 Oct 5, 2025
a0b9b3a
upd
yzh119 Oct 5, 2025
4e9bc16
upd
yzh119 Oct 5, 2025
449eaf0
upd
yzh119 Oct 5, 2025
9e70e37
upd
yzh119 Oct 5, 2025
db75317
upd
yzh119 Oct 5, 2025
8ab269d
upd
yzh119 Oct 6, 2025
030567e
remove unused files
yzh119 Oct 6, 2025
c584970
upd
yzh119 Oct 6, 2025
0925678
upd
yzh119 Oct 6, 2025
13b44e5
upd
yzh119 Oct 6, 2025
95f194d
remove unused files
yzh119 Oct 6, 2025
d3efce7
upd
yzh119 Oct 6, 2025
a0c0f89
upd
yzh119 Oct 6, 2025
80a6f5e
upd
yzh119 Oct 6, 2025
e4bae87
upd
yzh119 Oct 6, 2025
ff96843
upd
yzh119 Oct 6, 2025
43bf95c
add unittest following build
yzh119 Oct 6, 2025
a841721
upd
yzh119 Oct 6, 2025
05ce648
upd
yzh119 Oct 6, 2025
d09ba32
upd
yzh119 Oct 6, 2025
c12d5c4
upd
yzh119 Oct 6, 2025
b18da8b
upd
yzh119 Oct 6, 2025
7f6cbee
upd
yzh119 Oct 6, 2025
1db6e19
upd
yzh119 Oct 6, 2025
23d2d6b
upd
yzh119 Oct 6, 2025
8cf6f6c
upd
yzh119 Oct 6, 2025
c60cedf
upd
yzh119 Oct 6, 2025
69284ed
use import-mode=importlib
yzh119 Oct 6, 2025
f130e55
add unittests without jit
yzh119 Oct 6, 2025
d3e7b6d
add backoff for download cubin files, and add number of retries
yzh119 Oct 6, 2025
06ffe13
bugfix: turned off verbose
yzh119 Oct 7, 2025
99a6f17
upd
yzh119 Oct 7, 2025
1ce9132
upd
yzh119 Oct 7, 2025
b717e25
upd
yzh119 Oct 7, 2025
1e5787a
upd
yzh119 Oct 8, 2025
ec28dfc
Merge remote-tracking branch 'origin/main' into nightly
yzh119 Oct 8, 2025
dffa1be
upd
yzh119 Oct 8, 2025
865f3ae
upd
yzh119 Oct 8, 2025
e7d89b8
upd
yzh119 Oct 8, 2025
5ee14dc
upd
yzh119 Oct 8, 2025
26a8f1b
address circular dependency
yzh119 Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
322 changes: 322 additions & 0 deletions .github/workflows/nightly-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
name: Nightly Release

on:
schedule:
# Run at 00:00 UTC every day
- cron: '0 0 * * *'
workflow_dispatch:
inputs:
date_suffix:
description: 'Date suffix for dev version (YYYYMMDD, leave empty for today)'
required: false
type: string
pull_request:
# TODO: Remove this before merging - only for debugging this PR

jobs:
setup:
runs-on: ubuntu-latest
outputs:
dev_suffix: ${{ steps.set-suffix.outputs.dev_suffix }}
release_tag: ${{ steps.set-suffix.outputs.release_tag }}
version: ${{ steps.set-suffix.outputs.version }}
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set date suffix and release tag
id: set-suffix
run: |
# Read version from version.txt
VERSION=$(cat version.txt | tr -d '[:space:]')

# Set date suffix
if [ -n "${{ inputs.date_suffix }}" ]; then
DEV_SUFFIX="${{ inputs.date_suffix }}"
else
DEV_SUFFIX=$(date -u +%Y%m%d)
fi

# Create release tag with version
RELEASE_TAG="nightly-v${VERSION}-${DEV_SUFFIX}"

echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "dev_suffix=${DEV_SUFFIX}" >> $GITHUB_OUTPUT
echo "release_tag=${RELEASE_TAG}" >> $GITHUB_OUTPUT
echo "Base version: ${VERSION}"
echo "Using dev suffix: ${DEV_SUFFIX}"
echo "Release tag: ${RELEASE_TAG}"

build-flashinfer-python:
needs: setup
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install build dependencies
run: |
python -m pip install --upgrade pip
pip install build wheel

- name: Build flashinfer-python sdist
env:
FLASHINFER_DEV_RELEASE_SUFFIX: ${{ needs.setup.outputs.dev_suffix }}
run: |
echo "Building flashinfer-python with dev suffix: ${FLASHINFER_DEV_RELEASE_SUFFIX}"
echo "Git commit: $(git rev-parse HEAD)"
python -m build --sdist
ls -lh dist/

- name: Upload flashinfer-python artifact
uses: actions/upload-artifact@v4
with:
name: flashinfer-python-sdist
path: dist/*.tar.gz
retention-days: 7

build-flashinfer-cubin:
needs: setup
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install build dependencies
run: |
python -m pip install --upgrade pip
pip install build twine wheel
pip install setuptools>=61.0 requests filelock torch tqdm numpy apache-tvm-ffi==0.1.0b15

- name: Build flashinfer-cubin wheel
env:
FLASHINFER_DEV_RELEASE_SUFFIX: ${{ needs.setup.outputs.dev_suffix }}
run: |
echo "Building flashinfer-cubin with dev suffix: ${FLASHINFER_DEV_RELEASE_SUFFIX}"
echo "Git commit: $(git rev-parse HEAD)"
cd flashinfer-cubin
rm -rf dist build *.egg-info
python -m build --wheel
ls -lh dist/
mkdir -p ../dist
cp dist/*.whl ../dist/

- name: Upload flashinfer-cubin artifact
uses: actions/upload-artifact@v4
with:
name: flashinfer-cubin-wheel
path: dist/*.whl
retention-days: 7

build-flashinfer-jit-cache:
needs: setup
strategy:
fail-fast: false
matrix:
cuda: ["12.8", "12.9", "13.0"]
arch: ['x86_64', 'aarch64']

runs-on: [self-hosted, "${{ matrix.arch == 'aarch64' && 'arm64' || matrix.arch }}"]

steps:
- name: Display Machine Information
run: |
echo "CPU: $(nproc) cores, $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)"
echo "RAM: $(free -h | awk '/^Mem:/ {print $7 " available out of " $2}')"
echo "Disk: $(df -h / | awk 'NR==2 {print $4 " available out of " $2}')"
echo "Architecture: $(uname -m)"

- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true

- name: Build wheel in container
env:
DOCKER_IMAGE: ${{ matrix.arch == 'aarch64' && format('pytorch/manylinuxaarch64-builder:cuda{0}', matrix.cuda) || format('pytorch/manylinux2_28-builder:cuda{0}', matrix.cuda) }}
FLASHINFER_CUDA_ARCH_LIST: ${{ matrix.cuda == '12.8' && '7.5 8.0 8.9 9.0a 10.0a 12.0a' || '7.5 8.0 8.9 9.0a 10.0a 10.3a 12.0a' }}
FLASHINFER_DEV_RELEASE_SUFFIX: ${{ needs.setup.outputs.dev_suffix }}
run: |
# Extract CUDA major and minor versions
CUDA_MAJOR=$(echo "${{ matrix.cuda }}" | cut -d'.' -f1)
CUDA_MINOR=$(echo "${{ matrix.cuda }}" | cut -d'.' -f2)
export CUDA_MAJOR
export CUDA_MINOR
export CUDA_VERSION_SUFFIX="cu${CUDA_MAJOR}${CUDA_MINOR}"

chown -R $(id -u):$(id -g) ${{ github.workspace }}
mkdir -p ${{ github.workspace }}/ci-cache
chown -R $(id -u):$(id -g) ${{ github.workspace }}/ci-cache

# Run the build script inside the container with proper mounts
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-v ${{ github.workspace }}/ci-cache:/ci-cache \
-e FLASHINFER_CI_CACHE=/ci-cache \
-e CUDA_VERSION="${{ matrix.cuda }}" \
-e CUDA_MAJOR="$CUDA_MAJOR" \
-e CUDA_MINOR="$CUDA_MINOR" \
-e CUDA_VERSION_SUFFIX="$CUDA_VERSION_SUFFIX" \
-e FLASHINFER_DEV_RELEASE_SUFFIX="${FLASHINFER_DEV_RELEASE_SUFFIX}" \
-e ARCH="${{ matrix.arch }}" \
-e FLASHINFER_CUDA_ARCH_LIST="${FLASHINFER_CUDA_ARCH_LIST}" \
--user $(id -u):$(id -g) \
-w /workspace \
${{ env.DOCKER_IMAGE }} \
bash /workspace/scripts/build_flashinfer_jit_cache_whl.sh
timeout-minutes: 180

- name: Display wheel size
run: du -h flashinfer-jit-cache/dist/*

- name: Create artifact name
id: artifact-name
run: |
CUDA_NO_DOT=$(echo "${{ matrix.cuda }}" | tr -d '.')
echo "name=jit-cache-cu${CUDA_NO_DOT}-${{ matrix.arch }}" >> $GITHUB_OUTPUT

- name: Upload flashinfer-jit-cache artifact
uses: actions/upload-artifact@v4
with:
name: ${{ steps.artifact-name.outputs.name }}
path: flashinfer-jit-cache/dist/*.whl
retention-days: 7

create-release:
needs: [setup, build-flashinfer-python, build-flashinfer-cubin, build-flashinfer-jit-cache]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Create GitHub Release (empty first)
env:
GH_TOKEN: ${{ github.token }}
run: |
TAG="${{ needs.setup.outputs.release_tag }}"

# Delete existing release and tag if they exist
if gh release view "$TAG" &>/dev/null; then
echo "Deleting existing release: $TAG"
gh release delete "$TAG" --yes --cleanup-tag
fi

# Create new release without assets first
gh release create "$TAG" \
--title "Nightly Release v${{ needs.setup.outputs.version }}-${{ needs.setup.outputs.dev_suffix }}" \
--notes "Automated nightly build for version ${{ needs.setup.outputs.version }} (dev${{ needs.setup.outputs.dev_suffix }})" \
--prerelease

- name: Download flashinfer-python artifact
uses: actions/download-artifact@v4
with:
name: flashinfer-python-sdist
path: dist-python/

- name: Upload flashinfer-python to release
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release upload "${{ needs.setup.outputs.release_tag }}" dist-python/* --clobber

- name: Download flashinfer-cubin artifact
uses: actions/download-artifact@v4
with:
name: flashinfer-cubin-wheel
path: dist-cubin/

- name: Upload flashinfer-cubin to release
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release upload "${{ needs.setup.outputs.release_tag }}" dist-cubin/* --clobber

- name: Upload flashinfer-jit-cache wheels to release (one at a time to avoid OOM)
env:
GH_TOKEN: ${{ github.token }}
run: |
# Upload jit-cache wheels one at a time to avoid OOM
# Each wheel can be several GB, so we download, upload, delete, repeat
mkdir -p dist-jit-cache

for cuda in 128 129 130; do
for arch in x86_64 aarch64; do
ARTIFACT_NAME="jit-cache-cu${cuda}-${arch}"
echo "Processing ${ARTIFACT_NAME}..."

# Download this specific artifact
gh run download ${{ github.run_id }} -n "${ARTIFACT_NAME}" -D dist-jit-cache/ || {
echo "Warning: Failed to download ${ARTIFACT_NAME}, skipping..."
continue
}

# Upload to release
if [ -n "$(ls -A dist-jit-cache/)" ]; then
gh release upload "${{ needs.setup.outputs.release_tag }}" dist-jit-cache/* --clobber
echo "βœ… Uploaded ${ARTIFACT_NAME}"
fi

# Clean up to save disk space before next iteration
rm -rf dist-jit-cache/*
done
done

update-wheel-index:
needs: [setup, create-release]
runs-on: ubuntu-latest
steps:
- name: Checkout flashinfer repo
uses: actions/checkout@v4

- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts/

- name: Collect wheels
run: |
mkdir -p dist
find artifacts/ -name "*.whl" -exec cp {} dist/ \;
ls -lh dist/

- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Update wheel index
run: |
python3 scripts/update_whl_index.py \
--dist-dir dist \
--output-dir flashinfer-whl \
--release-tag "${{ needs.setup.outputs.release_tag }}"

- name: Push wheel index
run: |
cd flashinfer-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl for nightly ${{ needs.setup.outputs.dev_suffix }}"
git push
31 changes: 31 additions & 0 deletions flashinfer-cubin/build_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,37 @@
# Add parent directory to path to import artifacts module
sys.path.insert(0, str(Path(__file__).parent.parent))


def _get_git_version():
"""Get git commit hash."""
import subprocess

try:
git_version = (
subprocess.check_output(
["git", "rev-parse", "HEAD"],
cwd=Path(__file__).parent.parent,
stderr=subprocess.DEVNULL,
)
.decode("ascii")
.strip()
)
return git_version
except Exception:
return "unknown"


# add flashinfer._build_meta, always override to ensure version is up-to-date
build_meta_file = Path(__file__).parent.parent / "flashinfer" / "_build_meta.py"
version_file = Path(__file__).parent.parent / "version.txt"
if version_file.exists():
with open(version_file, "r") as f:
version = f.read().strip()
git_version = _get_git_version()
with open(build_meta_file, "w") as f:
f.write('"""Build metadata for flashinfer package."""\n')
f.write(f'__version__ = "{version}"\n')
f.write(f'__git_version__ = "{git_version}"\n')


def _download_cubins():
Expand Down Expand Up @@ -60,13 +82,22 @@ def _create_build_metadata():
else:
version = "0.0.0+unknown"

# Add dev suffix if specified
dev_suffix = os.environ.get("FLASHINFER_DEV_RELEASE_SUFFIX", "")
if dev_suffix:
version = f"{version}.dev{dev_suffix}"

# Get git version
git_version = _get_git_version()

# Create build metadata in the source tree
package_dir = Path(__file__).parent / "flashinfer_cubin"
build_meta_file = package_dir / "_build_meta.py"

with open(build_meta_file, "w") as f:
f.write('"""Build metadata for flashinfer-cubin package."""\n')
f.write(f'__version__ = "{version}"\n')
f.write(f'__git_version__ = "{git_version}"\n')

print(f"Created build metadata file with version {version}")
return version
Expand Down
Loading