Merge pull request #257 from codelion/feat-offline-dockerfile

codelion · web-flow · commit 6956fd1eb1d1 · 2025-09-30T11:28:02.000+08:00
Feat offline dockerfile
diff --git a/.github/workflows/publish-docker-manifest.yml b/.github/workflows/publish-docker-manifest.yml
@@ -1,11 +1,13 @@
 name: Docker Multi-arch Manifests
 on:
   workflow_run:
-    workflows: 
+    workflows:
       - "Docker Proxy AMD64"
       - "Docker Proxy ARM64"
       - "Docker Full AMD64"
       - "Docker Full ARM64"
+      - "Docker Offline AMD64"
+      - "Docker Offline ARM64"
     types: [completed]
 
 jobs:
@@ -23,9 +25,11 @@ jobs:
           script: |
             const workflows = [
               "Docker Proxy AMD64",
-              "Docker Proxy ARM64", 
+              "Docker Proxy ARM64",
               "Docker Full AMD64",
-              "Docker Full ARM64"
+              "Docker Full ARM64",
+              "Docker Offline AMD64",
+              "Docker Offline ARM64"
             ];
             
             const runId = context.payload.workflow_run.id;
@@ -107,17 +111,35 @@ jobs:
       - name: Create full multi-arch manifest
         run: |
           VERSION="${{ needs.check-builds.outputs.version }}"
-          
+
           # Create versioned full manifest
           docker manifest create ghcr.io/${{ github.repository }}:${VERSION} \
             ghcr.io/${{ github.repository }}:${VERSION}-amd64 \
             ghcr.io/${{ github.repository }}:${VERSION}-arm64
-          
+
           docker manifest push ghcr.io/${{ github.repository }}:${VERSION}
-          
+
           # Create latest full manifest
           docker manifest create ghcr.io/${{ github.repository }}:latest \
             ghcr.io/${{ github.repository }}:latest-amd64 \
             ghcr.io/${{ github.repository }}:latest-arm64
-          
-          docker manifest push ghcr.io/${{ github.repository }}:latest
+
+          docker manifest push ghcr.io/${{ github.repository }}:latest
+
+      - name: Create offline multi-arch manifest
+        run: |
+          VERSION="${{ needs.check-builds.outputs.version }}"
+
+          # Create versioned offline manifest
+          docker manifest create ghcr.io/${{ github.repository }}:${VERSION}-offline \
+            ghcr.io/${{ github.repository }}:${VERSION}-offline-amd64 \
+            ghcr.io/${{ github.repository }}:${VERSION}-offline-arm64
+
+          docker manifest push ghcr.io/${{ github.repository }}:${VERSION}-offline
+
+          # Create latest offline manifest
+          docker manifest create ghcr.io/${{ github.repository }}:latest-offline \
+            ghcr.io/${{ github.repository }}:latest-offline-amd64 \
+            ghcr.io/${{ github.repository }}:latest-offline-arm64
+
+          docker manifest push ghcr.io/${{ github.repository }}:latest-offline
diff --git a/.github/workflows/publish-docker-offline-amd64.yml b/.github/workflows/publish-docker-offline-amd64.yml
@@ -0,0 +1,52 @@
+name: Docker Offline AMD64
+on:
+  release:
+    types: [created]
+
+jobs:
+  build:
+    name: Build offline Docker image for AMD64
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          VERSION=${GITHUB_REF#refs/tags/}
+          if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
+            VERSION="latest"
+          fi
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build and push offline AMD64 image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.offline
+          push: true
+          platforms: linux/amd64
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-amd64
+            ghcr.io/${{ github.repository }}:latest-offline-amd64
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (AMD64)
+            org.opencontainers.image.licenses=Apache-2.0
+            org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
+          cache-from: type=gha,scope=offline-amd64
+          cache-to: type=gha,scope=offline-amd64,mode=max
diff --git a/.github/workflows/publish-docker-offline-arm64.yml b/.github/workflows/publish-docker-offline-arm64.yml
@@ -0,0 +1,55 @@
+name: Docker Offline ARM64
+on:
+  release:
+    types: [created]
+
+jobs:
+  build:
+    name: Build offline Docker image for ARM64
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          VERSION=${GITHUB_REF#refs/tags/}
+          if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
+            VERSION="latest"
+          fi
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build and push offline ARM64 image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.offline
+          push: true
+          platforms: linux/arm64
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-arm64
+            ghcr.io/${{ github.repository }}:latest-offline-arm64
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (ARM64)
+            org.opencontainers.image.licenses=Apache-2.0
+            org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
+          cache-from: type=gha,scope=offline-arm64
+          cache-to: type=gha,scope=offline-arm64,mode=max
diff --git a/Dockerfile.offline b/Dockerfile.offline
@@ -0,0 +1,67 @@
+# Build stage
+FROM python:3.12-slim-bookworm AS builder
+
+# Define build argument with default value
+ARG PORT=8000
+# Make it available as env variable at runtime
+ENV OPTILLM_PORT=$PORT
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    python3-dev \
+    gcc \
+    g++ \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy only the requirements file first to leverage Docker cache
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Download spaCy model for offline use
+RUN python -m spacy download en_core_web_lg
+
+# Final stage
+FROM python:3.12-slim-bookworm
+
+# Add labels for the final image
+LABEL org.opencontainers.image.source="https://github.com/codelion/optillm"
+LABEL org.opencontainers.image.description="OptiLLM offline image with pre-downloaded models for fully offline operation"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+
+# Install curl for the healthcheck
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Copy installed dependencies from builder stage
+COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY . .
+
+# Create a non-root user and switch to it
+RUN useradd -m appuser
+USER appuser
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+
+# Use the ARG in EXPOSE
+EXPOSE ${PORT}
+
+# Run the application
+ENTRYPOINT ["python", "optillm.py"]
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.3.0"
+__version__ = "0.3.1"
 
 # Import from server module
 from .server import (
diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py
@@ -105,6 +105,9 @@ def get_analyzer_engine() -> AnalyzerEngine:
     global _analyzer_engine
     if _analyzer_engine is None:
         _analyzer_engine = AnalyzerEngine()
+        # Pre-warm the analyzer to load all recognizers once during initialization
+        # This prevents recognizers from being reloaded on each analyze() call
+        _analyzer_engine.analyze(text="warm up", language="en")
     return _analyzer_engine
 
 def get_anonymizer_engine() -> AnonymizerEngine:
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.3.0"
+version = "0.3.1"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/test_privacy_plugin_performance.py b/tests/test_privacy_plugin_performance.py
@@ -181,6 +181,64 @@ def test_singleton_instances_are_reused():
         print(f"❌ Singleton test failed: {e}")
         raise
 
+def test_recognizers_not_reloaded():
+    """
+    Test that recognizers are not fetched/reloaded on each analyze() call.
+    This prevents the performance regression where "Fetching all recognizers for language en"
+    appears in logs on every request.
+    """
+    print("\nTesting that recognizers are not reloaded on each call...")
+
+    # Reset module state
+    if 'optillm.plugins.privacy_plugin' in sys.modules:
+        del sys.modules['optillm.plugins.privacy_plugin']
+
+    try:
+        # Mock at the presidio level to track registry calls
+        with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
+             patch('spacy.util.is_package', return_value=True):
+
+            # Create a mock analyzer instance
+            mock_analyzer_instance = MagicMock()
+            mock_registry = MagicMock()
+
+            # Track calls to get_recognizers
+            mock_registry.get_recognizers = MagicMock(return_value=[])
+            mock_analyzer_instance.registry = mock_registry
+            mock_analyzer_instance.analyze = MagicMock(return_value=[])
+
+            MockAnalyzerEngine.return_value = mock_analyzer_instance
+
+            # Import module with mocks
+            import optillm.plugins.privacy_plugin as privacy_plugin
+
+            # First call to get_analyzer_engine - should create and warm up
+            analyzer1 = privacy_plugin.get_analyzer_engine()
+            initial_analyze_calls = mock_analyzer_instance.analyze.call_count
+
+            print(f"Warm-up analyze calls: {initial_analyze_calls}")
+            assert initial_analyze_calls == 1, f"Expected 1 warm-up analyze call, got {initial_analyze_calls}"
+
+            # Second call - should return cached instance without additional analyze
+            analyzer2 = privacy_plugin.get_analyzer_engine()
+            second_analyze_calls = mock_analyzer_instance.analyze.call_count
+
+            print(f"Total analyze calls after second get_analyzer_engine: {second_analyze_calls}")
+            assert second_analyze_calls == 1, f"Analyzer should not call analyze() again on cached retrieval, got {second_analyze_calls} calls"
+
+            # Verify it's the same instance
+            assert analyzer1 is analyzer2, "Should return the same cached analyzer instance"
+
+            print("✅ Recognizer reload test PASSED - Recognizers are pre-warmed and not reloaded!")
+            return True
+
+    except ImportError as e:
+        print(f"⚠️  Skipping recognizer reload test - dependencies not installed: {e}")
+        return True
+    except Exception as e:
+        print(f"❌ Recognizer reload test failed: {e}")
+        raise
+
 if __name__ == "__main__":
     print("=" * 60)
     print("Privacy Plugin Performance & Caching Tests")
@@ -200,6 +258,12 @@ def test_singleton_instances_are_reused():
         all_passed = False
         print(f"❌ Singleton instance test failed: {e}")
 
+    try:
+        test_recognizers_not_reloaded()
+    except Exception as e:
+        all_passed = False
+        print(f"❌ Recognizer reload test failed: {e}")
+
     try:
         test_privacy_plugin_performance()
     except Exception as e: