Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e47255f
feat: enhance CI pipeline with improved caching and multi-arch support
Aias00 Oct 8, 2025
8a3c551
Merge branch 'main' into feat/ci_time
Aias00 Oct 8, 2025
0dd98df
feat: refactor Dockerfile to use Makefile for Rust library build
Aias00 Oct 8, 2025
c3ceee1
Merge branch 'main' into feat/ci_time
Aias00 Oct 8, 2025
f1a7601
feat: simplify Dockerfile by removing unnecessary directory creation
Aias00 Oct 8, 2025
06e5784
Merge branch 'feat/ci_time' of https://github.com/Aias00/semantic-rou…
Aias00 Oct 8, 2025
5aa2708
Merge branch 'main' into feat/ci_time
Aias00 Oct 9, 2025
d8ef7d5
feat: enhance Dockerfiles for cross-compilation and optimize CI pipeline
Aias00 Oct 9, 2025
2bd7a67
feat: enhance cross-compilation support in Dockerfile for ARM64 and A…
Aias00 Oct 9, 2025
5e03de2
feat: improve ARM64 cross-compilation process in Dockerfile
Aias00 Oct 9, 2025
a5c53d7
feat: enhance Dockerfile to include detailed build output for Rust li…
Aias00 Oct 9, 2025
76e71d9
feat: enhance Dockerfile to include detailed build output for Rust li…
Aias00 Oct 9, 2025
e0acd87
feat: optimize CI pipeline with disk cleanup steps and improved build…
Aias00 Oct 9, 2025
790bd2d
feat: update CI configuration for ARM64 builds and improve Dockerfile…
Aias00 Oct 9, 2025
ebdc8a3
feat: simplify CI configuration by standardizing runner for ARM64 builds
Aias00 Oct 9, 2025
621b07c
feat: enhance Dockerfile for ARM64 cross-compilation with OpenSSL con…
Aias00 Oct 9, 2025
75e158c
feat: enhance Dockerfile for ARM64 cross-compilation with OpenSSL con…
Aias00 Oct 9, 2025
a1b17cf
feat: enhance Dockerfile for ARM64 cross-compilation with OpenSSL con…
Aias00 Oct 9, 2025
2fc08e4
feat: enhance Dockerfile for ARM64 cross-compilation with OpenSSL con…
Aias00 Oct 9, 2025
3c0ee66
feat: update CI configuration for multi-architecture Docker image builds
Aias00 Oct 9, 2025
a7cb96c
feat: update CI manifest tagging for pull requests with architecture …
Aias00 Oct 9, 2025
5bdbb16
feat: remove architecture suffix from pull request tags in CI manifest
Aias00 Oct 9, 2025
291fbae
feat: update CI manifest tagging for pull requests with architecture …
Aias00 Oct 9, 2025
0a509b7
feat: update CI manifest tagging for pull requests with architecture …
Aias00 Oct 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 154 additions & 58 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,37 @@ on:
required: false
type: boolean
default: false
skip_multiarch:
description: "Skip multi-architecture build for faster CI"
use_cross_compilation:
description: "Use cross-compilation instead of emulation for ARM64"
required: false
type: boolean
default: false
default: true
push:
branches: [ "main" ]
pull_request:
paths:
- ".github/workflows/docker-publish.yml"
- "Dockerfile*"
- "candle-binding/**"
- "src/**"
- "e2e-tests/llm-katan/**"

jobs:
# Parallel job for building both images
build_and_push:
# Build job for multi-architecture Docker images
build_multiarch:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
matrix:
image: [extproc, llm-katan]
fail-fast: false # Continue building other images if one fails
# Multi-architecture build strategy:
# - AMD64: Native build on ubuntu-latest (fast)
# - ARM64: Cross-compilation on ubuntu-latest (faster than emulation)
# arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }}
arch: ["amd64", "arm64"]
fail-fast: false

steps:
- name: Check out the repo
Expand All @@ -41,8 +53,8 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU (only for multi-arch builds)
if: inputs.skip_multiarch != true
- name: Set up QEMU for cross-compilation
if: matrix.arch == 'arm64'
uses: docker/setup-qemu-action@v3
with:
platforms: arm64
Expand All @@ -54,91 +66,175 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Generate date tag for nightly builds
id: date
if: inputs.is_nightly == true
run: echo "date_tag=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT

- name: Set lowercase repository owner
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV

# Rust build cache for extproc - only use GitHub Actions cache for non-PR builds
- name: Cache Rust dependencies (extproc only)
if: matrix.image == 'extproc' && github.event_name != 'pull_request'
# Enhanced Rust caching for extproc builds with incremental compilation
- name: Cache Rust dependencies (extproc)
if: matrix.image == 'extproc'
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
candle-binding/target/
key: ${{ runner.os }}-cargo-extproc-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }}
~/.rustup/
key: ${{ runner.os }}-cargo-${{ matrix.arch }}-${{ hashFiles('candle-binding/Cargo.toml') }}-${{ hashFiles('candle-binding/Cargo.lock') }}-${{ hashFiles('candle-binding/src/**/*.rs') }}
restore-keys: |
${{ runner.os }}-cargo-extproc-
${{ runner.os }}-cargo-${{ matrix.arch }}-${{ hashFiles('candle-binding/Cargo.toml') }}-${{ hashFiles('candle-binding/Cargo.lock') }}-
${{ runner.os }}-cargo-${{ matrix.arch }}-${{ hashFiles('candle-binding/Cargo.toml') }}-
${{ runner.os }}-cargo-${{ matrix.arch }}-
${{ runner.os }}-cargo-

# Python caching for llm-katan builds
- name: Cache Python dependencies (llm-katan)
if: matrix.image == 'llm-katan'
uses: actions/cache@v4
with:
path: |
~/.cache/pip
e2e-tests/llm-katan/.venv
key: ${{ runner.os }}-pip-${{ matrix.arch }}-${{ hashFiles('e2e-tests/llm-katan/requirements.txt', 'e2e-tests/llm-katan/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.arch }}-
${{ runner.os }}-pip-

- name: Generate date tag for nightly builds
id: date
if: inputs.is_nightly == true
run: echo "date_tag=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT

- name: Set lowercase repository owner
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV

# Set build context and dockerfile based on matrix
- name: Set build parameters
id: build-params
run: |
# Default to cross-compilation for ARM64 (always enabled for better performance)
USE_CROSS_COMPILATION="${{ inputs.use_cross_compilation || 'true' }}"

if [ "${{ matrix.image }}" = "extproc" ]; then
echo "context=." >> $GITHUB_OUTPUT
echo "dockerfile=./Dockerfile.extproc" >> $GITHUB_OUTPUT
echo "platforms=${{ inputs.skip_multiarch == true && 'linux/amd64' || 'linux/amd64,linux/arm64' }}" >> $GITHUB_OUTPUT
if [ "$USE_CROSS_COMPILATION" = "true" ] && [ "${{ matrix.arch }}" = "arm64" ]; then
echo "dockerfile=./Dockerfile.extproc.cross" >> $GITHUB_OUTPUT
echo "platform=linux/${{ matrix.arch }}" >> $GITHUB_OUTPUT
else
echo "dockerfile=./Dockerfile.extproc" >> $GITHUB_OUTPUT
echo "platform=linux/${{ matrix.arch }}" >> $GITHUB_OUTPUT
fi
elif [ "${{ matrix.image }}" = "llm-katan" ]; then
echo "context=./e2e-tests/llm-katan" >> $GITHUB_OUTPUT
echo "dockerfile=./e2e-tests/llm-katan/Dockerfile" >> $GITHUB_OUTPUT
echo "platforms=${{ inputs.skip_multiarch == true && 'linux/amd64' || 'linux/amd64,linux/arm64' }}" >> $GITHUB_OUTPUT
echo "platform=linux/${{ matrix.arch }}" >> $GITHUB_OUTPUT
fi

# Extract version for llm-katan
- name: Extract version from pyproject.toml
id: version
if: matrix.image == 'llm-katan'
run: |
VERSION=$(grep '^version = ' e2e-tests/llm-katan/pyproject.toml | sed 's/version = "\(.*\)"/\1/')
echo "version=$VERSION" >> $GITHUB_OUTPUT

# Generate tags for extproc
- name: Generate extproc tags
id: extproc-tags
if: matrix.image == 'extproc'
- name: Generate tags
id: tags
run: |
REPO_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')
if [ "${{ inputs.is_nightly }}" = "true" ]; then
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/extproc:nightly-${{ steps.date.outputs.date_tag }}" >> $GITHUB_OUTPUT
else
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/extproc:${{ github.sha }},ghcr.io/${REPO_LOWER}/semantic-router/extproc:latest" >> $GITHUB_OUTPUT
else
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/extproc:${{ github.sha }}" >> $GITHUB_OUTPUT
fi
fi
ARCH_SUFFIX="${{ matrix.arch }}"

# Generate tags for llm-katan
- name: Generate llm-katan tags
id: llm-katan-tags
if: matrix.image == 'llm-katan'
run: |
REPO_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')
if [ "${{ inputs.is_nightly }}" = "true" ]; then
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/llm-katan:nightly-${{ steps.date.outputs.date_tag }}" >> $GITHUB_OUTPUT
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:nightly-${{ steps.date.outputs.date_tag }}-${ARCH_SUFFIX}" >> $GITHUB_OUTPUT
else
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/llm-katan:${{ github.sha }},ghcr.io/${REPO_LOWER}/semantic-router/llm-katan:latest,ghcr.io/${REPO_LOWER}/semantic-router/llm-katan:v${{ steps.version.outputs.version }}" >> $GITHUB_OUTPUT
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${{ github.sha }}-${ARCH_SUFFIX}" >> $GITHUB_OUTPUT
else
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/llm-katan:${{ github.sha }}" >> $GITHUB_OUTPUT
echo "tags=ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:pr-${{ github.event.number }}-${ARCH_SUFFIX}" >> $GITHUB_OUTPUT
fi
fi

- name: Build and push ${{ matrix.image }} Docker image
id: build
uses: docker/build-push-action@v5
with:
context: ${{ steps.build-params.outputs.context }}
file: ${{ steps.build-params.outputs.dockerfile }}
platforms: ${{ steps.build-params.outputs.platforms }}
platforms: ${{ steps.build-params.outputs.platform }}
push: ${{ github.event_name != 'pull_request' }}
load: ${{ github.event_name == 'pull_request' }}
tags: ${{ matrix.image == 'extproc' && steps.extproc-tags.outputs.tags || steps.llm-katan-tags.outputs.tags }}
tags: ${{ steps.tags.outputs.tags }}
cache-from: |
type=gha
type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache,mode=max
build-args: |
BUILDKIT_INLINE_CACHE=1
CARGO_BUILD_JOBS=${{ github.event_name == 'pull_request' && '8' || '16' }}
CARGO_INCREMENTAL=1
RUSTC_WRAPPER=""
CARGO_NET_GIT_FETCH_WITH_CLI=true
BUILDKIT_PROGRESS=plain
TARGETARCH=${{ matrix.arch }}
# Optimize Rust compilation for ARM64
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
# Enable link-time optimization for release builds
CARGO_PROFILE_RELEASE_LTO=thin
CARGO_PROFILE_RELEASE_CODEGEN_UNITS=1
# Use faster linker
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS="-C link-arg=-fuse-ld=lld"
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUSTFLAGS="-C link-arg=-fuse-ld=lld"

# Create multi-arch manifest for final images
create_manifest:
needs: build_multiarch
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
if: github.event_name != 'pull_request'
strategy:
matrix:
image: [extproc, llm-katan]

steps:
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set lowercase repository owner
run: echo "REPOSITORY_OWNER_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV

- name: Generate date tag for nightly builds
id: date
if: inputs.is_nightly == true
run: echo "date_tag=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT

- name: Create and push manifest
run: |
REPO_LOWER=$(echo $GITHUB_REPOSITORY_OWNER | tr '[:upper:]' '[:lower:]')

# Create manifest for the specific image
if [ "${{ inputs.is_nightly }}" = "true" ]; then
TAG="nightly-${{ steps.date.outputs.date_tag }}"
else
if [ "${{ github.event_name }}" != "pull_request" ]; then
TAG="${{ github.sha }}"
else
TAG="pr-${{ github.event.number }}"
fi
fi

# Create and push manifest by combining architecture-specific images
docker buildx imagetools create \
--tag ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${TAG} \
ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${TAG}-amd64 \
ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${TAG}-arm64

# Also tag as latest for non-nightly builds
if [ "${{ inputs.is_nightly }}" != "true" ]; then
docker buildx imagetools create \
--tag ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:latest \
ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${TAG}-amd64 \
ghcr.io/${REPO_LOWER}/semantic-router/${{ matrix.image }}:${TAG}-arm64
fi

- name: Build summary
if: always()
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "::notice title=Build Success::${{ matrix.image }} multi-arch manifest created successfully"
else
echo "::error title=Build Failed::${{ matrix.image }} build failed"
fi
58 changes: 57 additions & 1 deletion .github/workflows/k8s-integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,32 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Free up disk space before build
run: |
echo "=== Initial disk usage ==="
df -h
echo ""
echo "=== Cleaning up system ==="
# Remove unnecessary packages and caches
sudo apt-get clean
sudo apt-get autoremove -y
sudo rm -rf /var/lib/apt/lists/*
sudo rm -rf /tmp/*
sudo rm -rf /var/tmp/*

# Clean Docker system
docker system prune -af --volumes

# Remove large unnecessary files/directories
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL

echo ""
echo "=== Disk usage after cleanup ==="
df -h

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

Expand Down Expand Up @@ -115,6 +141,20 @@ jobs:
kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test
echo "Image loaded successfully!"

- name: Clean up after image build
run: |
echo "=== Cleaning up Docker build artifacts ==="
# Remove build cache and unused images
docker builder prune -af
docker image prune -af

# Keep only the images we need
docker images

echo ""
echo "=== Disk usage after build cleanup ==="
df -h

- name: Verify cluster
run: |
kubectl cluster-info
Expand Down Expand Up @@ -181,7 +221,7 @@ jobs:

# Reduce resource requirements for CI testing and set imagePullPolicy
patches:
# Patch for main container
# Patch for main container - reduced resources for CI
- patch: |-
- op: replace
path: /spec/template/spec/containers/0/resources/requests/memory
Expand Down Expand Up @@ -239,6 +279,22 @@ jobs:

echo "✓ Connectivity check completed"

- name: Final disk cleanup before deployment
run: |
echo "=== Final cleanup before deployment ==="
# Clean up any remaining build artifacts
docker system prune -f

# Clear system caches
sudo sync
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null || true

echo "=== Final disk usage ==="
df -h

echo "=== Available memory ==="
free -h

- name: Deploy to kind cluster
run: |
echo "Deploying semantic-router to kind cluster..."
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Rust
target/
**/*.rs.bk
Cargo.lock
# Note: Cargo.lock should be committed for applications and workspace roots
# Cargo.lock

# Python
*.pyc
Expand Down
Loading
Loading