vllm-project · Xunzhuo · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
@@ -5,12 +5,12 @@ on:
   workflow_call:
     inputs:
       tag_suffix:
-        description: 'Custom tag suffix for the Docker image'
+        description: "Custom tag suffix for the Docker image"
         required: false
         type: string
-        default: ''
+        default: ""
       is_nightly:
-        description: 'Whether this is a nightly build'
+        description: "Whether this is a nightly build"
         required: false
         type: boolean
         default: false
@@ -20,7 +20,7 @@ on:
         type: boolean
         default: true
   push:
-    branches: [ "main" ]
+    branches: ["main"]
   pull_request:
     paths:
       - ".github/workflows/docker-publish.yml"
@@ -42,16 +42,32 @@ jobs:
         # Multi-architecture build strategy:
         # - AMD64: Native build on ubuntu-latest (fast)
         # - ARM64: Cross-compilation on ubuntu-latest (faster than emulation)
-#        arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }}
+        #        arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }}
         arch: ["amd64", "arm64"]
       fail-fast: false
 
     steps:
+      - name: Free up disk space
+        run: |
+          echo "Before cleanup:"
+          df -h
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+          echo "After cleanup:"
+          df -h
+
       - name: Check out the repo
         uses: actions/checkout@v4
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
 
       - name: Set up QEMU for cross-compilation
         if: matrix.arch == 'arm64'
@@ -172,6 +188,14 @@ jobs:
             fi
           fi
 
+      - name: Additional cleanup for llm-katan (large Python packages)
+        if: matrix.image == 'llm-katan'
+        run: |
+          echo "Freeing up more space for llm-katan build..."
+          sudo apt-get clean
+          sudo rm -rf /var/lib/apt/lists/*
+          df -h
+
       - name: Build and push ${{ matrix.image }} Docker image
         id: build
         uses: docker/build-push-action@v5
@@ -182,10 +206,8 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           load: ${{ github.event_name == 'pull_request' }}
           tags: ${{ steps.tags.outputs.tags }}
-          cache-from: |
-            type=gha
-            type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache,mode=max
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
           build-args: |
             BUILDKIT_INLINE_CACHE=1
             CARGO_BUILD_JOBS=${{ github.event_name == 'pull_request' && '8' || '16' }}

@@ -64,6 +64,7 @@ jobs:
           key: ${{ runner.os }}-models-v1-${{ hashFiles('tools/make/models.mk') }}
           restore-keys: |
             ${{ runner.os }}-models-v1-
+        continue-on-error: true # Don't fail the job if caching fails
 
       - name: Check go mod tidy
         run: make check-go-mod-tidy

@@ -1,100 +1,100 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
-# Basic hooks for Go, Rust, Python And JavaScript files only
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v6.0.0
-  hooks:
-  - id: trailing-whitespace
-    files: \.(go|rs|py|js)$
-  - id: end-of-file-fixer
-    files: \.(go|rs|py|js)$
-  - id: check-added-large-files
-    args: ['--maxkb=500']
-    files: \.(go|rs|py|js)$
+  # Basic hooks for Go, Rust, Python And JavaScript files only
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+      - id: trailing-whitespace
+        files: \.(go|rs|py|js)$
+      - id: end-of-file-fixer
+        files: \.(go|rs|py|js)$
+      - id: check-added-large-files
+        args: ["--maxkb=500"]
+        files: \.(go|rs|py|js)$
 
-# Go specific hooks
-- repo: local
-  hooks:
-  - id: go-fmt
-    name: go fmt
-    entry: gofmt -w
-    language: system
-    files: \.go$
+  # Go specific hooks
+  - repo: local
+    hooks:
+      - id: go-fmt
+        name: go fmt
+        entry: gofmt -w
+        language: system
+        files: \.go$
 
-- repo: local
-  hooks:
-    - id: golang-lint
-      name: go lint
-      entry: make go-lint
-      language: system
-      files: \.go$
-      pass_filenames: false
+  - repo: local
+    hooks:
+      - id: golang-lint
+        name: go lint
+        entry: make go-lint
+        language: system
+        files: \.go$
+        pass_filenames: false
 
-# Markdown specific hooks
-- repo: local
-  hooks:
-  - id: md-fmt
-    name: md fmt
-    entry: bash -c "make markdown-lint"
-    language: system
-    files: \.md$
-    exclude: ^(\node_modules/|CLAUDE\.md)
+  # Markdown specific hooks
+  - repo: local
+    hooks:
+      - id: md-fmt
+        name: md fmt
+        entry: bash -c "make markdown-lint"
+        language: system
+        files: \.md$
+        exclude: ^(\node_modules/|CLAUDE\.md)
 
-# Yaml specific hooks
-- repo: local
-  hooks:
-  - id: yaml-and-yml-fmt
-    name: yaml/yml fmt
-    entry: bash -c "make markdown-lint"
-    language: system
-    files: \.(yaml|yml)$
-    exclude: ^(\node_modules/)
+  # Yaml specific hooks
+  - repo: local
+    hooks:
+      - id: yaml-and-yml-fmt
+        name: yaml/yml fmt
+        entry: bash -c "make markdown-lint"
+        language: system
+        files: \.(yaml|yml)$
+        exclude: ^(\node_modules/)
 
-# JavaScript and TypeScript specific hooks
-- repo: local
-  hooks:
-  - id: js-ts-lint
-    name: js/ts lint
-    entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
-    language: system
-    files: \.(js|ts|tsx)$
-    exclude: ^(\node_modules/)
-    pass_filenames: false
+  # JavaScript and TypeScript specific hooks
+  - repo: local
+    hooks:
+      - id: js-ts-lint
+        name: js/ts lint
+        entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
+        language: system
+        files: \.(js|ts|tsx)$
+        exclude: ^(\node_modules/)
+        pass_filenames: false
 
-# Rust specific hooks
-- repo: local
-  hooks:
-  - id: cargo-fmt
-    name: cargo fmt
-    entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
-    language: system
-    files: \.rs$
-    pass_filenames: false
-  - id: cargo-check
-    name: cargo check
-    entry: bash -c 'cd candle-binding && cargo check'
-    language: system
-    files: \.rs$
-    pass_filenames: false
+  # Rust specific hooks
+  - repo: local
+    hooks:
+      - id: cargo-fmt
+        name: cargo fmt
+        entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
+        language: system
+        files: \.rs$
+        pass_filenames: false
+      - id: cargo-check
+        name: cargo check
+        entry: bash -c 'cd candle-binding && cargo check'
+        language: system
+        files: \.rs$
+        pass_filenames: false
 
-# Python specific hooks
-- repo: https://github.com/psf/black
-  rev: 25.1.0
-  hooks:
-  - id: black
-    language_version: python3
-    files: \.py$
-    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
-
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
-    args: ["--profile", "black"]
-    files: \.py$
-    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
+  # Python specific hooks
+  # isort must run before black
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: ["--profile", "black", "--line-length", "88"]
+        files: \.py$
+        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 
+  - repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+      - id: black
+        language_version: python3
+        files: \.py$
+        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 # Commented out flake8 - only reports issues, doesn't auto-fix
 # -   repo: https://github.com/PyCQA/flake8
 #     rev: 7.3.0

@@ -14,7 +14,7 @@
 
 # BERT model for semantic caching and tool selection
 bert_model:
-  model_id: "sentence-transformers/all-MiniLM-L6-v2"
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.85
   use_cpu: true
 

@@ -3,7 +3,7 @@
 # for local development and debugging.
 
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true
 

@@ -1,5 +1,5 @@
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true
 semantic_cache:

@@ -3,7 +3,7 @@
 # for production deployment with Jaeger or other OTLP-compatible backends.
 
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true
 

@@ -13,7 +13,7 @@
 # - Jailbreak protection enabled
 
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.7  # Higher threshold for better precision
   use_cpu: true
 

@@ -13,7 +13,7 @@
 # - Minimal observability overhead
 
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.4  # Very low threshold for fast matching
   use_cpu: true
 

@@ -13,7 +13,7 @@
 # - Larger batch sizes for efficient processing
 
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.5  # Lower threshold for faster matching
   use_cpu: true
 

@@ -1,5 +1,5 @@
 bert_model:
-  model_id: sentence-transformers/all-MiniLM-L12-v2
+  model_id: models/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true