Molly/enable xpu ci (sgl-project#5)

DiweiSun · web-flow · commit f5fe3af62e0e · 2025-08-15T15:08:44.000+08:00
* enable ci

* Update pr-test-xpu.yml

* Update pr-test-xpu.yml

* fix typo

* enable ci test and format fix
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,22 @@
+name: Lint
+
+on: [pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install pre-commit hook
+        run: |
+          python -m pip install pre-commit
+          pre-commit install
+
+      - name: Linting
+        run: pre-commit run --all-files --show-diff-on-failure
diff --git a/.github/workflows/pr-test-xpu.yml b/.github/workflows/pr-test-xpu.yml
@@ -0,0 +1,81 @@
+name: PR Test (XPU)
+
+on:
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: pr-test-xpu-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-and-test:
+    if: (github.repository == 'sgl-project/sgl-kernel-xpu' || github.event_name == 'pull_request') &&
+      github.event.pull_request.draft == false
+    runs-on: sglang-pvc
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Build Docker image
+        run: |
+          docker build --no-cache --progress=plain -f Dockerfile.xpu_kernel -t xpu_sglang:pvc .
+
+
+      - name: Run container
+        run: |
+          docker run -dt \
+            --device /dev/dri/ \
+            --name ci_sglang_xpu \
+            -e HF_TOKEN=$(cat ~/huggingface_token.txt) \
+            xpu_sglang:pvc
+
+      - name: Install Dependency
+        timeout-minutes: 20
+        run: |
+          docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
+          docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
+          docker exec ci_sglang_xpu /bin/bash -c '/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} '
+          docker exec ci_sglang_xpu /bin/bash -c "ln -sf /miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"
+
+      - name: Run Sglang Kernel Cases
+        timeout-minutes: 20
+        run: |
+          docker exec -w /root/sglang ci_sglang_xpu \
+            /bin/bash -c "cd /root/sglang/sgl-kernel-xpu/tests &&  python3 -m pytest -v -s test_awq_dequant.py"
+
+      - name: Run E2E Bfloat16 tests
+        timeout-minutes: 20
+        run: |
+          echo "[PlaceHolder for E2E Test...]"
+
+      - name: Run E2E Qunatization tests
+        timeout-minutes: 20
+        run: |
+          echo "[PlaceHolder for E2E Test...]"
+
+      - name: Cleanup container
+        if: always()
+        run: |
+          docker rm -f ci_sglang_xpu || true
+
+  finish:
+    if: always()
+    needs: [build-and-test]
+    runs-on: sglang-pvc
+    steps:
+      - name: Check job status
+        run: |
+          if [ "${{ needs.build-and-test.result }}" != "success" ]; then
+            echo "Job failed with result: ${{ needs.build-and-test.result }}"
+            echo "BUILD_LOG=$(cat build.log)" >> $GITHUB_ENV
+            exit 1
+          fi
+          echo "All jobs completed successfully"
+          exit 0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,59 @@
+default_stages: [pre-commit, pre-push, manual]
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-symlinks
+      - id: destroyed-symlinks
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: check-toml
+      - id: check-ast
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: detect-private-key
+      - id: debug-statements
+      - id: no-commit-to-branch
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.7
+    hooks:
+      - id: ruff
+        args: [--select=F401, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/)
+        exclude: \.ipynb$
+  - repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+      - id: black-jupyter
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1
+    hooks:
+      - id: codespell
+        additional_dependencies: ['tomli']
+        args: ['--toml', 'python/pyproject.toml', '-L', 'cann']
+        exclude: |
+          (?x)^(
+            test/srt/test_reasoning_parser\.py|
+            docs/advanced_features/vlm_query\.ipynb
+          )$
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v18.1.8
+    hooks:
+    - id: clang-format
+      types_or: [c++, cuda]
+      args: [--style=file, --verbose]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.8.1
+    hooks:
+      - id: nbstripout
+        args:
+          - '--keep-output'
+          - '--extra-keys=metadata.kernelspec metadata.language_info.version'
diff --git a/Dockerfile.xpu_kernel b/Dockerfile.xpu_kernel
@@ -0,0 +1,78 @@
+# If the device is Battlemage, we need to set UBUNTU_VERSION to 24.10
+
+# Usage: docker build --build-arg UBUNTU_VERSION=24.04 --build-arg PYTHON_VERSION=3.10 -t sglang:xpu_kernel -f  Dockerfile.xpu --no-cache .
+
+
+# Set default Ubuntu version to 24.04
+FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Define build arguments
+ARG PYTHON_VERSION=3.10
+
+# Set environment variables
+
+ARG SG_LANG_REPO=https://github.com/sgl-project/sglang.git
+ARG SG_LANG_BRANCH=main
+
+ARG SG_LANG_KERNEL_REPO=https://github.com/sgl-project/sgl-kernel-xpu.git
+ARG SG_LANG_KERNEL_BRANCH=main
+
+# Install Miniforge & PyTorch/Triton & build vllm/SGlang from source
+RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/25.1.1-0/Miniforge3-Linux-x86_64.sh && \
+    bash miniforge.sh -b -p ./miniforge3 && \
+    rm miniforge.sh && \
+    . ./miniforge3/bin/activate && \
+    conda create -y -n py${PYTHON_VERSION} python=${PYTHON_VERSION} && conda activate py${PYTHON_VERSION} && \
+    conda install pip && \
+    echo ". /miniforge3/bin/activate; conda activate py${PYTHON_VERSION}; . /opt/intel/oneapi/setvars.sh; cd /root/" >> /root/.bashrc;
+
+# Install Miniforge & PyTorch/Triton & build vllm/SGlang from source
+RUN apt-get update && \
+    apt install -y intel-ocloc
+
+RUN --mount=type=secret,id=github_token \
+    cd /root && \
+    . /miniforge3/bin/activate && \
+    conda activate py${PYTHON_VERSION} && \
+    # . /opt/intel/oneapi/setvars.sh --force && \
+    # Install Torch
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu
+
+# Install vllm from source
+RUN --mount=type=secret,id=github_token \
+    cd /root && \
+    . /miniforge3/bin/activate && \
+    conda activate py${PYTHON_VERSION} && \
+    echo "Building vllm/sglang from source ..." && \
+    git clone https://github.com/zhuyuhua-v/vllm.git && \
+    cd vllm && \
+    git checkout yuhua/deepseek && \
+    pip install setuptools_scm --root-user-action=ignore && \
+    pip install setuptools==75.6.0 packaging==24.2 --root-user-action=ignore && \
+    VLLM_TARGET_DEVICE=xpu python setup.py install
+
+# Install SGlang from source
+RUN --mount=type=secret,id=github_token \
+    # Install vllm from source
+    cd /root && \
+    . /miniforge3/bin/activate && \
+    conda activate py${PYTHON_VERSION} && \
+    echo "cloning ${SG_LANG_BRANCH} from ${SG_LANG_REPO}" && \
+    git clone --branch ${SG_LANG_BRANCH} --single-branch ${SG_LANG_REPO} && \
+    cd sglang && \
+    pip install -e "python[all_xpu]" --root-user-action=ignore && \
+    # Clone sgl-kernel and build sglang-kernel...
+    echo "cloning ${SG_LANG_KERNEL_REPO} from ${SG_LANG_KERNEL_BRANCH}" && \
+    git clone --branch ${SG_LANG_KERNEL_BRANCH} --single-branch ${SG_LANG_KERNEL_REPO} && \
+    cd sgl-kernel-xpu && \
+    pip install -v . &&\
+    # Install required packages for sglang workloads
+    pip install msgspec blake3 py-cpuinfo compressed_tensors gguf partial_json_parser einops --root-user-action=ignore && \
+    conda install libsqlite=3.48.0 -y && \
+    echo ". /miniforge3/bin/activate; conda activate py${PYTHON_VERSION}; cd /root/" >> /root/.bashrc;
+
+# Set the default shell to bash
+SHELL ["bash", "-c"]
+CMD ["bash", "-c", "source /root/.bashrc && exec bash"]
diff --git a/benchmark/bench_fp8_blockwise_gemm.py b/benchmark/bench_fp8_blockwise_gemm.py
@@ -7,11 +7,10 @@
 import triton
 from deep_gemm import get_col_major_tma_aligned_tensor
 from sgl_kernel import fp8_blockwise_scaled_mm
-from vllm._custom_ops import cutlass_scaled_mm as vllm_scaled_mm
-
 from sglang.srt.layers.quantization.fp8_kernel import (
     w8a8_block_fp8_matmul_triton as w8a8_block_fp8_matmul,
 )
+from vllm._custom_ops import cutlass_scaled_mm as vllm_scaled_mm
 
 
 def get_weight_shapes(args):
diff --git a/benchmark/bench_moe_ep_post_reorder.py b/benchmark/bench_moe_ep_post_reorder.py
@@ -1,7 +1,6 @@
 import torch
 import triton
 from sgl_kernel import ep_moe_post_reorder
-
 from sglang.srt.layers.moe.ep_moe.kernels import post_reorder_triton_kernel
 
 batch_sizes = [64, 128, 256, 512, 640, 768, 1024, 2048, 4096]
diff --git a/benchmark/bench_moe_ep_pre_reorder.py b/benchmark/bench_moe_ep_pre_reorder.py
@@ -1,7 +1,6 @@
 import torch
 import triton
 from sgl_kernel import ep_moe_pre_reorder
-
 from sglang.srt.layers.moe.ep_moe.kernels import pre_reorder_triton_kernel
 
 batch_sizes = [64, 128, 256, 512, 640, 768, 1024, 2048, 4096]
diff --git a/benchmark/bench_moe_fused_gate.py b/benchmark/bench_moe_fused_gate.py
@@ -5,7 +5,6 @@
 import triton
 import triton.language as tl
 from sgl_kernel import moe_fused_gate
-
 from sglang.srt.layers.moe.topk import biased_grouped_topk
 
 
diff --git a/benchmark/bench_moe_silu_and_mul.py b/benchmark/bench_moe_silu_and_mul.py
@@ -3,7 +3,6 @@
 import torch
 import triton
 from sgl_kernel import ep_moe_silu_and_mul
-
 from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_triton_kernel
 
 batch_size_range = [64, 128, 256, 512, 640, 768, 1024, 2048, 4096]
diff --git a/benchmark/bench_per_tensor_quant_fp8.py b/benchmark/bench_per_tensor_quant_fp8.py
@@ -7,9 +7,8 @@
 import triton
 import triton.testing
 from sgl_kernel import sgl_per_tensor_quant_fp8
-from vllm import _custom_ops as ops
-
 from sglang.srt.utils import is_hip
+from vllm import _custom_ops as ops
 
 _is_hip = is_hip()
 fp8_type_ = torch.float8_e4m3fnuz if _is_hip else torch.float8_e4m3fn
diff --git a/benchmark/bench_per_token_group_quant_8bit.py b/benchmark/bench_per_token_group_quant_8bit.py
@@ -5,7 +5,6 @@
 import triton
 import triton.language as tl
 from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_int8
-
 from sglang.srt.utils import is_hip
 
 _is_hip = is_hip()
diff --git a/benchmark/bench_per_token_quant_fp8.py b/benchmark/bench_per_token_quant_fp8.py
@@ -5,9 +5,8 @@
 import triton
 import triton.testing
 from sgl_kernel import sgl_per_token_quant_fp8
-from vllm import _custom_ops as ops
-
 from sglang.srt.utils import is_hip
+from vllm import _custom_ops as ops
 
 _is_hip = is_hip()
 fp8_type_ = torch.float8_e4m3fnuz if _is_hip else torch.float8_e4m3fn
diff --git a/python/sgl_kernel.egg-info/SOURCES.txt b/python/sgl_kernel.egg-info/SOURCES.txt
@@ -47,4 +47,4 @@ tests/test_qserve_w4a8_per_chn_gemm.py
 tests/test_qserve_w4a8_per_group_gemm.py
 tests/test_rotary_embedding.py
 tests/test_sampling.py
-tests/test_sparse_flash_attn.py
+tests/test_sparse_flash_attn.py
diff --git a/python/sgl_kernel.egg-info/dependency_links.txt b/python/sgl_kernel.egg-info/dependency_links.txt
@@ -1 +0,0 @@
-
diff --git a/tests/test_custom_allreduce.py b/tests/test_custom_allreduce.py
@@ -8,9 +8,8 @@
 import sgl_kernel.allreduce as custom_ops
 import torch
 import torch.distributed as dist
-from torch.distributed import ProcessGroup
-
 from sglang.srt.distributed.device_communicators.cuda_wrapper import CudaRTLibrary
+from torch.distributed import ProcessGroup
 
 
 def _run_correctness_worker(world_size, rank, distributed_init_port, test_sizes):
diff --git a/tests/test_ep_moe_post_reorder_kernel.py b/tests/test_ep_moe_post_reorder_kernel.py
@@ -3,7 +3,6 @@
 import pytest
 import torch
 from sgl_kernel import ep_moe_post_reorder
-
 from sglang.srt.layers.moe.ep_moe.kernels import post_reorder_triton_kernel
 
 
diff --git a/tests/test_ep_moe_pre_reorder_kernel.py b/tests/test_ep_moe_pre_reorder_kernel.py
@@ -3,7 +3,6 @@
 import pytest
 import torch
 from sgl_kernel import ep_moe_pre_reorder
-
 from sglang.srt.layers.moe.ep_moe.kernels import pre_reorder_triton_kernel
 
 
diff --git a/tests/test_ep_moe_silu_and_mul_kernel.py b/tests/test_ep_moe_silu_and_mul_kernel.py
@@ -3,7 +3,6 @@
 import pytest
 import torch
 from sgl_kernel import ep_moe_silu_and_mul
-
 from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_triton_kernel
 
 
diff --git a/tests/test_moe_fused_gate.py b/tests/test_moe_fused_gate.py
@@ -1,7 +1,6 @@
 import pytest
 import torch
 from sgl_kernel import moe_fused_gate
-
 from sglang.srt.layers.moe.topk import biased_grouped_topk
 
 
diff --git a/tests/test_per_tensor_quant_fp8.py b/tests/test_per_tensor_quant_fp8.py
@@ -4,7 +4,6 @@
 import pytest
 import torch
 from sgl_kernel import sgl_per_tensor_quant_fp8
-
 from sglang.srt.utils import is_hip
 
 _is_hip = is_hip()
diff --git a/tests/test_per_token_group_quant_8bit.py b/tests/test_per_token_group_quant_8bit.py
@@ -6,7 +6,6 @@
 import triton
 import triton.language as tl
 from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_int8
-
 from sglang.srt.utils import is_hip
 
 _is_hip = is_hip()
diff --git a/tests/test_per_token_quant_fp8.py b/tests/test_per_token_quant_fp8.py
@@ -4,7 +4,6 @@
 import pytest
 import torch
 from sgl_kernel import sgl_per_token_quant_fp8
-
 from sglang.srt.utils import is_hip
 
 _is_hip = is_hip()