Skip to content

kernels: use Metadata from kernels-data #264

kernels: use Metadata from kernels-data

kernels: use Metadata from kernels-data #264

Workflow file for this run

name: "E2E: kernel-builder init + build + upload + download"
on:
push:
branches: [main]
pull_request:
branches: [main]
paths:
- "kernel-builder/**"
- "kernels/src/**"
- "nix-builder/**"
- "kernels-data/**"
- ".github/workflows/test_e2e.yaml"
workflow_dispatch:
env:
HF_ENDPOINT: https://hub-ci.huggingface.co
HF_TOKEN: ${{ secrets.HF_HUB_CI_TOKEN }}
E2E_REPO_ID: __DUMMY_KERNELS_USER__/kernels-upload-test
E2E_BRANCH: e2e-${{ github.event.pull_request.number || github.run_id }}-${{ github.run_attempt }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
init-build-upload:
name: Init, build, and upload kernel
runs-on:
group: aws-highmemory-32-plus-nix
outputs:
variant: ${{ steps.variant.outputs.name }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25
with:
extra-conf: |
max-jobs = 8
cores = 12
sandbox-fallback = false
- uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16
with:
name: huggingface
authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
env:
USER: runner
- name: Init kernel project
run: |
cd /tmp
nix run $GITHUB_WORKSPACE#kernel-builder -- init \
--name ${{ env.E2E_REPO_ID }} \
--backends cuda
- name: Validate scaffold
run: |
cd /tmp/kernels-upload-test
test -f build.toml
test -f flake.nix
test -f torch-ext/kernels_upload_test/__init__.py
test -f torch-ext/torch_binding.cpp
test -f torch-ext/torch_binding.h
test -f kernels_upload_test_cuda/kernels_upload_test.cu
test -f tests/test_kernels_upload_test.py
test -f example.py
grep -q 'name = "kernels-upload-test"' build.toml
grep -q 'repo-id = "__DUMMY_KERNELS_USER__/kernels-upload-test"' build.toml
grep -q 'backend = "cuda"' build.toml
- name: Patch flake.nix to use local nix-builder
run: |
cd /tmp/kernels-upload-test
sed -i 's|github:huggingface/kernels|path:'"$GITHUB_WORKSPACE"'|' flake.nix
- name: Determine latest variant
id: variant
run: |
cd /tmp/kernels-upload-test
VARIANT=$(nix run $GITHUB_WORKSPACE#kernel-builder -- list-variants . | tail -1)
echo "name=$VARIANT" >> $GITHUB_OUTPUT
echo "Building variant: $VARIANT"
- name: Build kernel
run: |
cd /tmp/kernels-upload-test
nix run $GITHUB_WORKSPACE#kernel-builder -- build --variant ${{ steps.variant.outputs.name }} . -L
- name: Verify build artifacts
run: |
cd /tmp/kernels-upload-test
VARIANT_DIR=$(ls -d result/torch* | head -1)
echo "Built variant: $VARIANT_DIR"
test -f "$VARIANT_DIR/__init__.py"
test -f "$VARIANT_DIR/metadata.json"
ls "$VARIANT_DIR"/*.so
- name: Upload kernel to Hub
run: |
nix run $GITHUB_WORKSPACE#kernel-builder -- upload /tmp/kernels-upload-test --branch ${{ env.E2E_BRANCH }}
nix run $GITHUB_WORKSPACE#kernel-builder -- upload /tmp/kernels-upload-test --branch ${{ env.E2E_BRANCH }} --repo-type model
download-and-test:
name: Download and test kernel via get_kernel
needs: init-build-upload
runs-on:
group: aws-g6-24xlarge
env:
UV_PYTHON_PREFERENCE: only-managed
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv and set Python version
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
with:
python-version: "3.12"
- name: Install Python deps
working-directory: ./kernels
run: |
VARIANT="${{ needs.init-build-upload.outputs.variant }}"
CUDA_TAG=$(echo "$VARIANT" | grep -oP 'cu\d+')
echo "Installing torch matching variant $VARIANT (CUDA tag: $CUDA_TAG)"
uv sync --all-extras --dev
uv pip install --upgrade torch --index-url https://download.pytorch.org/whl/$CUDA_TAG
uv run --no-sync python -c "import torch; print(f'torch={torch.__version__}, cuda={torch.version.cuda}, cxx11_abi={torch.compiled_with_cxx11_abi()}')"
- name: Test get_kernel download and usage
working-directory: ./kernels
run: |
uv run --no-sync python -c "
import torch
from kernels import get_kernel
kernel = get_kernel('${{ env.E2E_REPO_ID }}', revision='${{ env.E2E_BRANCH }}')
x = torch.randn(1024, 1024, dtype=torch.float32, device='cuda')
result = kernel.kernels_upload_test(x)
expected = x + 1.0
torch.testing.assert_close(result, expected)
print('E2E test passed: get_kernel + correctness check')
"