Skip to content

Commit 6956fd1

Browse files
authored
Merge pull request #257 from codelion/feat-offline-dockerfile
Feat offline dockerfile
2 parents 4b0d734 + def7f1b commit 6956fd1

File tree

8 files changed

+273
-10
lines changed

8 files changed

+273
-10
lines changed

.github/workflows/publish-docker-manifest.yml

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
name: Docker Multi-arch Manifests
22
on:
33
workflow_run:
4-
workflows:
4+
workflows:
55
- "Docker Proxy AMD64"
66
- "Docker Proxy ARM64"
77
- "Docker Full AMD64"
88
- "Docker Full ARM64"
9+
- "Docker Offline AMD64"
10+
- "Docker Offline ARM64"
911
types: [completed]
1012

1113
jobs:
@@ -23,9 +25,11 @@ jobs:
2325
script: |
2426
const workflows = [
2527
"Docker Proxy AMD64",
26-
"Docker Proxy ARM64",
28+
"Docker Proxy ARM64",
2729
"Docker Full AMD64",
28-
"Docker Full ARM64"
30+
"Docker Full ARM64",
31+
"Docker Offline AMD64",
32+
"Docker Offline ARM64"
2933
];
3034
3135
const runId = context.payload.workflow_run.id;
@@ -107,17 +111,35 @@ jobs:
107111
- name: Create full multi-arch manifest
108112
run: |
109113
VERSION="${{ needs.check-builds.outputs.version }}"
110-
114+
111115
# Create versioned full manifest
112116
docker manifest create ghcr.io/${{ github.repository }}:${VERSION} \
113117
ghcr.io/${{ github.repository }}:${VERSION}-amd64 \
114118
ghcr.io/${{ github.repository }}:${VERSION}-arm64
115-
119+
116120
docker manifest push ghcr.io/${{ github.repository }}:${VERSION}
117-
121+
118122
# Create latest full manifest
119123
docker manifest create ghcr.io/${{ github.repository }}:latest \
120124
ghcr.io/${{ github.repository }}:latest-amd64 \
121125
ghcr.io/${{ github.repository }}:latest-arm64
122-
123-
docker manifest push ghcr.io/${{ github.repository }}:latest
126+
127+
docker manifest push ghcr.io/${{ github.repository }}:latest
128+
129+
- name: Create offline multi-arch manifest
130+
run: |
131+
VERSION="${{ needs.check-builds.outputs.version }}"
132+
133+
# Create versioned offline manifest
134+
docker manifest create ghcr.io/${{ github.repository }}:${VERSION}-offline \
135+
ghcr.io/${{ github.repository }}:${VERSION}-offline-amd64 \
136+
ghcr.io/${{ github.repository }}:${VERSION}-offline-arm64
137+
138+
docker manifest push ghcr.io/${{ github.repository }}:${VERSION}-offline
139+
140+
# Create latest offline manifest
141+
docker manifest create ghcr.io/${{ github.repository }}:latest-offline \
142+
ghcr.io/${{ github.repository }}:latest-offline-amd64 \
143+
ghcr.io/${{ github.repository }}:latest-offline-arm64
144+
145+
docker manifest push ghcr.io/${{ github.repository }}:latest-offline
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Docker Offline AMD64
2+
on:
3+
release:
4+
types: [created]
5+
6+
jobs:
7+
build:
8+
name: Build offline Docker image for AMD64
9+
runs-on: ubuntu-latest
10+
permissions:
11+
contents: read
12+
packages: write
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Set up Docker Buildx
17+
uses: docker/setup-buildx-action@v3
18+
19+
- name: Log in to GitHub Container Registry
20+
uses: docker/login-action@v3
21+
with:
22+
registry: ghcr.io
23+
username: ${{ github.actor }}
24+
password: ${{ secrets.GITHUB_TOKEN }}
25+
26+
- name: Extract version from tag
27+
id: version
28+
run: |
29+
VERSION=${GITHUB_REF#refs/tags/}
30+
if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
31+
VERSION="latest"
32+
fi
33+
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
34+
35+
- name: Build and push offline AMD64 image
36+
uses: docker/build-push-action@v5
37+
with:
38+
context: .
39+
file: Dockerfile.offline
40+
push: true
41+
platforms: linux/amd64
42+
provenance: false
43+
tags: |
44+
ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-amd64
45+
ghcr.io/${{ github.repository }}:latest-offline-amd64
46+
labels: |
47+
org.opencontainers.image.source=https://github.com/${{ github.repository }}
48+
org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (AMD64)
49+
org.opencontainers.image.licenses=Apache-2.0
50+
org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
51+
cache-from: type=gha,scope=offline-amd64
52+
cache-to: type=gha,scope=offline-amd64,mode=max
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Docker Offline ARM64
2+
on:
3+
release:
4+
types: [created]
5+
6+
jobs:
7+
build:
8+
name: Build offline Docker image for ARM64
9+
runs-on: ubuntu-latest
10+
permissions:
11+
contents: read
12+
packages: write
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Set up QEMU
17+
uses: docker/setup-qemu-action@v3
18+
19+
- name: Set up Docker Buildx
20+
uses: docker/setup-buildx-action@v3
21+
22+
- name: Log in to GitHub Container Registry
23+
uses: docker/login-action@v3
24+
with:
25+
registry: ghcr.io
26+
username: ${{ github.actor }}
27+
password: ${{ secrets.GITHUB_TOKEN }}
28+
29+
- name: Extract version from tag
30+
id: version
31+
run: |
32+
VERSION=${GITHUB_REF#refs/tags/}
33+
if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
34+
VERSION="latest"
35+
fi
36+
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
37+
38+
- name: Build and push offline ARM64 image
39+
uses: docker/build-push-action@v5
40+
with:
41+
context: .
42+
file: Dockerfile.offline
43+
push: true
44+
platforms: linux/arm64
45+
provenance: false
46+
tags: |
47+
ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-arm64
48+
ghcr.io/${{ github.repository }}:latest-offline-arm64
49+
labels: |
50+
org.opencontainers.image.source=https://github.com/${{ github.repository }}
51+
org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (ARM64)
52+
org.opencontainers.image.licenses=Apache-2.0
53+
org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
54+
cache-from: type=gha,scope=offline-arm64
55+
cache-to: type=gha,scope=offline-arm64,mode=max

Dockerfile.offline

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Build stage
2+
FROM python:3.12-slim-bookworm AS builder
3+
4+
# Define build argument with default value
5+
ARG PORT=8000
6+
# Make it available as env variable at runtime
7+
ENV OPTILLM_PORT=$PORT
8+
9+
# Set working directory
10+
WORKDIR /app
11+
12+
# Install system dependencies
13+
RUN apt-get update && \
14+
apt-get install -y --no-install-recommends \
15+
build-essential \
16+
python3-dev \
17+
gcc \
18+
g++ \
19+
&& apt-get clean \
20+
&& rm -rf /var/lib/apt/lists/*
21+
22+
# Copy only the requirements file first to leverage Docker cache
23+
COPY requirements.txt .
24+
25+
# Install Python dependencies
26+
RUN pip install --no-cache-dir -r requirements.txt
27+
28+
# Download spaCy model for offline use
29+
RUN python -m spacy download en_core_web_lg
30+
31+
# Final stage
32+
FROM python:3.12-slim-bookworm
33+
34+
# Add labels for the final image
35+
LABEL org.opencontainers.image.source="https://github.com/codelion/optillm"
36+
LABEL org.opencontainers.image.description="OptiLLM offline image with pre-downloaded models for fully offline operation"
37+
LABEL org.opencontainers.image.licenses="Apache-2.0"
38+
39+
# Install curl for the healthcheck
40+
RUN apt-get update && \
41+
apt-get install -y --no-install-recommends \
42+
curl \
43+
&& apt-get clean \
44+
&& rm -rf /var/lib/apt/lists/*
45+
46+
# Set working directory
47+
WORKDIR /app
48+
49+
# Copy installed dependencies from builder stage
50+
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
51+
COPY --from=builder /usr/local/bin /usr/local/bin
52+
53+
# Copy application code
54+
COPY . .
55+
56+
# Create a non-root user and switch to it
57+
RUN useradd -m appuser
58+
USER appuser
59+
60+
# Set environment variables
61+
ENV PYTHONUNBUFFERED=1
62+
63+
# Use the ARG in EXPOSE
64+
EXPOSE ${PORT}
65+
66+
# Run the application
67+
ENTRYPOINT ["python", "optillm.py"]

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version information
2-
__version__ = "0.3.0"
2+
__version__ = "0.3.1"
33

44
# Import from server module
55
from .server import (

optillm/plugins/privacy_plugin.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ def get_analyzer_engine() -> AnalyzerEngine:
105105
global _analyzer_engine
106106
if _analyzer_engine is None:
107107
_analyzer_engine = AnalyzerEngine()
108+
# Pre-warm the analyzer to load all recognizers once during initialization
109+
# This prevents recognizers from being reloaded on each analyze() call
110+
_analyzer_engine.analyze(text="warm up", language="en")
108111
return _analyzer_engine
109112

110113
def get_anonymizer_engine() -> AnonymizerEngine:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "optillm"
7-
version = "0.3.0"
7+
version = "0.3.1"
88
description = "An optimizing inference proxy for LLMs."
99
readme = "README.md"
1010
license = "Apache-2.0"

tests/test_privacy_plugin_performance.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,64 @@ def test_singleton_instances_are_reused():
181181
print(f"❌ Singleton test failed: {e}")
182182
raise
183183

184+
def test_recognizers_not_reloaded():
185+
"""
186+
Test that recognizers are not fetched/reloaded on each analyze() call.
187+
This prevents the performance regression where "Fetching all recognizers for language en"
188+
appears in logs on every request.
189+
"""
190+
print("\nTesting that recognizers are not reloaded on each call...")
191+
192+
# Reset module state
193+
if 'optillm.plugins.privacy_plugin' in sys.modules:
194+
del sys.modules['optillm.plugins.privacy_plugin']
195+
196+
try:
197+
# Mock at the presidio level to track registry calls
198+
with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
199+
patch('spacy.util.is_package', return_value=True):
200+
201+
# Create a mock analyzer instance
202+
mock_analyzer_instance = MagicMock()
203+
mock_registry = MagicMock()
204+
205+
# Track calls to get_recognizers
206+
mock_registry.get_recognizers = MagicMock(return_value=[])
207+
mock_analyzer_instance.registry = mock_registry
208+
mock_analyzer_instance.analyze = MagicMock(return_value=[])
209+
210+
MockAnalyzerEngine.return_value = mock_analyzer_instance
211+
212+
# Import module with mocks
213+
import optillm.plugins.privacy_plugin as privacy_plugin
214+
215+
# First call to get_analyzer_engine - should create and warm up
216+
analyzer1 = privacy_plugin.get_analyzer_engine()
217+
initial_analyze_calls = mock_analyzer_instance.analyze.call_count
218+
219+
print(f"Warm-up analyze calls: {initial_analyze_calls}")
220+
assert initial_analyze_calls == 1, f"Expected 1 warm-up analyze call, got {initial_analyze_calls}"
221+
222+
# Second call - should return cached instance without additional analyze
223+
analyzer2 = privacy_plugin.get_analyzer_engine()
224+
second_analyze_calls = mock_analyzer_instance.analyze.call_count
225+
226+
print(f"Total analyze calls after second get_analyzer_engine: {second_analyze_calls}")
227+
assert second_analyze_calls == 1, f"Analyzer should not call analyze() again on cached retrieval, got {second_analyze_calls} calls"
228+
229+
# Verify it's the same instance
230+
assert analyzer1 is analyzer2, "Should return the same cached analyzer instance"
231+
232+
print("✅ Recognizer reload test PASSED - Recognizers are pre-warmed and not reloaded!")
233+
return True
234+
235+
except ImportError as e:
236+
print(f"⚠️ Skipping recognizer reload test - dependencies not installed: {e}")
237+
return True
238+
except Exception as e:
239+
print(f"❌ Recognizer reload test failed: {e}")
240+
raise
241+
184242
if __name__ == "__main__":
185243
print("=" * 60)
186244
print("Privacy Plugin Performance & Caching Tests")
@@ -200,6 +258,12 @@ def test_singleton_instances_are_reused():
200258
all_passed = False
201259
print(f"❌ Singleton instance test failed: {e}")
202260

261+
try:
262+
test_recognizers_not_reloaded()
263+
except Exception as e:
264+
all_passed = False
265+
print(f"❌ Recognizer reload test failed: {e}")
266+
203267
try:
204268
test_privacy_plugin_performance()
205269
except Exception as e:

0 commit comments

Comments
 (0)