llama-farm · rachmlenig · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 16, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -165,13 +165,12 @@ jobs:
           echo "✓ Datamodel generation complete"
 
       - name: Verify datamodel was generated
+        working-directory: config
         run: |
           echo "Checking for datamodel.py..."
-          ls -lh config/datamodel.py || echo "❌ datamodel.py not found!"
+          ls -lh datamodel.py || { echo "❌ datamodel.py not found!"; exit 1; }
           echo "Attempting import test..."
-          cd config && python -c "from datamodel import LlamaFarmConfig; print('✓ Direct import successful')" || echo "❌ Direct import failed!"
-          echo "Attempting module import test..."
-          python -c "from config.datamodel import LlamaFarmConfig; print('✓ Module import successful')" || echo "❌ Module import failed!"
+          uv run python -c "from datamodel import LlamaFarmConfig; print('✓ Import successful')"
 
       - name: Check if component has tests
         id: check-tests
@@ -225,9 +224,11 @@ jobs:
         continue-on-error: false
 
       - name: Set up Ollama
-        uses: pydantic/ollama-action@v3
-        with:
-          model: nomic-embed-text
+        run: |
+          curl -fsSL https://ollama.com/install.sh | sh
+          ollama serve &
+          sleep 3
+          ollama pull nomic-embed-text
 
       - name: Run tests
         if: steps.check-tests.outputs.has_tests == 'true'

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -23,7 +23,7 @@ jobs:
       packages: write
     strategy:
       matrix:
-        service: [designer, server, rag, runtime]
+        service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
         include:
           - service: designer
             context: ./designer
@@ -41,6 +41,14 @@ jobs:
             context: ./
             dockerfile: ./runtimes/universal/Dockerfile
             description: "LlamaFarm Universal Runtime - Universal Runtime for all models"
+          - service: edge-runtime
+            context: ./
+            dockerfile: ./runtimes/edge/Dockerfile
+            description: "LlamaFarm Edge Runtime - Lightweight runtime for edge/drone deployment"
+          - service: edge-runtime-lite
+            context: ./
+            dockerfile: ./runtimes/edge/Dockerfile
+            description: "LlamaFarm Edge Runtime (Lite) - Language-only runtime without vision deps"
 
     steps:
       - name: Checkout repository
@@ -109,6 +117,7 @@ jobs:
           build-args: |
             GIT_SHA=${{ github.sha }}
             PYTORCH_VARIANT=${{ matrix.service == 'runtime' && github.event_name == 'pull_request' && 'cpu' || '' }}
+            ENABLE_VISION=${{ matrix.service == 'edge-runtime-lite' && 'false' || '' }}
 
       - name: Upload AMD64 image artifact (PR only)
         if: github.event_name == 'pull_request'
@@ -127,7 +136,7 @@ jobs:
       packages: write
     strategy:
       matrix:
-        service: [designer, server, rag, runtime]
+        service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
         include:
           - service: designer
             context: ./designer
@@ -145,6 +154,14 @@ jobs:
             context: ./
             dockerfile: ./runtimes/universal/Dockerfile
             description: "LlamaFarm Universal Runtime - model serving for GGUF and Transformers"
+          - service: edge-runtime
+            context: ./
+            dockerfile: ./runtimes/edge/Dockerfile
+            description: "LlamaFarm Edge Runtime - Lightweight runtime for edge/drone deployment"
+          - service: edge-runtime-lite
+            context: ./
+            dockerfile: ./runtimes/edge/Dockerfile
+            description: "LlamaFarm Edge Runtime (Lite) - Language-only runtime without vision deps"
 
     steps:
       - name: Checkout repository
@@ -195,6 +212,7 @@ jobs:
           outputs: ${{ github.event_name == 'pull_request' && format('type=docker,dest={0}/{1}-arm64.tar', runner.temp, matrix.service) || '' }}
           build-args: |
             GIT_SHA=${{ github.sha }}
+            ENABLE_VISION=${{ matrix.service == 'edge-runtime-lite' && 'false' || '' }}
 
       - name: Upload ARM64 image artifact (PR only)
         if: github.event_name == 'pull_request'
@@ -214,7 +232,7 @@ jobs:
       packages: write
     strategy:
       matrix:
-        service: [designer, server, rag]
+        service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
 
     steps:
       - name: Checkout repository
@@ -368,11 +386,11 @@ jobs:
           IMAGE_TAG: pr-${{ github.event.number }}
         run: |
           # Tag the loaded images with the expected format for docker-compose
-          SERVICES=(designer server rag runtime)
+          SERVICES=(designer server rag runtime edge-runtime edge-runtime-lite)
 
           for SERVICE in "${SERVICES[@]}"; do
             # Find the loaded images for this service
-            AMD64_IMAGE=$(docker images --format "table {{.Repository}}:{{.Tag}}" | grep "$SERVICE" | grep "amd64" | head -1 | tr -d ' ')
+            AMD64_IMAGE=$(docker images --format "table {{.Repository}}:{{.Tag}}" | grep "/${SERVICE}:" | grep "amd64" | head -1 | tr -d ' ')
 
             if [ -n "$AMD64_IMAGE" ]; then
               # Tag for docker-compose (use AMD64 for testing)
@@ -465,6 +483,8 @@ jobs:
           docker compose -f docker-compose.yml logs --tail=50 designer
           docker compose -f docker-compose.yml logs --tail=50 rag
           docker compose -f docker-compose.yml logs --tail=50 runtime
+          docker compose -f docker-compose.yml logs --tail=50 edge-runtime
+          docker compose -f docker-compose.yml logs --tail=50 edge-runtime-lite
 
       - name: Show logs on failure
         if: failure()
@@ -479,7 +499,7 @@ jobs:
           docker compose -f docker-compose.yml logs --tail=200 || true
           echo ""
           echo "=== Individual service logs ==="
-          for service in server designer rag runtime; do
+          for service in server designer rag runtime edge-runtime edge-runtime-lite; do
             echo "--- Logs for $service ---"
             docker compose -f docker-compose.yml logs --tail=100 "$service" || true
             echo ""
@@ -504,14 +524,22 @@ jobs:
       security-events: write
     strategy:
       matrix:
-        service: [designer, server, rag, runtime]
+        service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
+
+      - name: Determine image tag
+        id: tag
+        run: |
+          # Use branch name as tag (matches create-manifest metadata)
+          TAG="${GITHUB_REF_NAME}"
+          echo "image=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.service }}:${TAG}" >> "$GITHUB_OUTPUT"
+
       - name: Run Trivy vulnerability scanner
         uses: aquasecurity/trivy-action@master
         with:
-          image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.service }}:latest
+          image-ref: ${{ steps.tag.outputs.image }}
           format: "sarif"
           output: "trivy-results-${{ matrix.service }}.sarif"
 

diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
@@ -57,7 +57,7 @@ jobs:
           echo "${{ secrets.GITHUB_TOKEN }}" | docker login "${REGISTRY}" -u "${{ github.actor }}" --password-stdin
 
           # Services to retag
-          SERVICES=(designer server rag runtime)
+          SERVICES=(designer server rag runtime edge-runtime)
 
           # Wait for all Docker images to be available
           echo "Waiting for Docker images to be built and pushed..."

diff --git a/cli/cmd/orchestrator/python_env.go b/cli/cmd/orchestrator/python_env.go
@@ -142,24 +142,30 @@ func (m *PythonEnvManager) getEnv() []string {
 	// Start with the current environment
 	env := os.Environ()
 
-	// Filter out Python-related environment variables that could interfere
-	// with UV's managed Python environment
+	// Filter out environment variables that could interfere with UV's managed
+	// Python environment or cause incorrect package resolution.
+	// UV index vars are stripped here so that only services that explicitly
+	// declare them in their Env map (e.g. universal-runtime) will have them.
+	// This prevents the PyTorch CPU index from leaking into server/rag, where
+	// it can cause install failures (e.g. markupsafe with only cp314 wheels).
 	filteredEnv := make([]string, 0, len(env))
 	pythonEnvVars := map[string]bool{
-		"VIRTUAL_ENV":       true,
-		"PYTHONHOME":        true,
-		"PYTHONPATH":        true,
-		"PYTHONSTARTUP":     true,
-		"PYTHONEXECUTABLE":  true,
-		"PYTHONUSERBASE":    true,
-		"CONDA_DEFAULT_ENV": true,
-		"CONDA_PREFIX":      true,
+		"VIRTUAL_ENV":        true,
+		"PYTHONHOME":         true,
+		"PYTHONPATH":         true,
+		"PYTHONSTARTUP":      true,
+		"PYTHONEXECUTABLE":   true,
+		"PYTHONUSERBASE":     true,
+		"CONDA_DEFAULT_ENV":  true,
+		"CONDA_PREFIX":       true,
 		"CONDA_PYTHON_EXE":  true,
-		"PYENV_VERSION":     true,
-		"PYENV_VIRTUAL_ENV": true,
-		"PIPENV_ACTIVE":     true,
-		"POETRY_ACTIVE":     true,
-		"PDM_PYTHON":        true,
+		"PYENV_VERSION":      true,
+		"PYENV_VIRTUAL_ENV":  true,
+		"PIPENV_ACTIVE":      true,
+		"POETRY_ACTIVE":      true,
+		"PDM_PYTHON":         true,
+		"UV_EXTRA_INDEX_URL": true,
+		"UV_INDEX_STRATEGY":  true,
 	}
 
 	for _, e := range env {

diff --git a/cli/cmd/orchestrator/services.go b/cli/cmd/orchestrator/services.go
@@ -150,8 +150,8 @@ var ServiceGraph = map[string]*ServiceDefinition{
 			"LLAMAFARM_GGUF_FORCE_CPU": "", // Set to "1" to force CPU for GGUF inference (avoids Metal SIGSEGV in CI)
 			"HF_TOKEN":                 "",
 			// In CI environments, use CPU-only PyTorch to avoid downloading 3GB+ of CUDA packages
-			"UV_EXTRA_INDEX_URL":  "${UV_EXTRA_INDEX_URL}",
-			"UV_INDEX_STRATEGY":   "", // Inherit from parent env (e.g. unsafe-best-match in CI)
+			"UV_EXTRA_INDEX_URL": "${UV_EXTRA_INDEX_URL}",
+			"UV_INDEX_STRATEGY":  "${UV_INDEX_STRATEGY}",
 		},
 		HealthComponent: "universal-runtime",
 		HardwarePackages: []HardwarePackageSpec{

diff --git a/common/llamafarm_common/__init__.py b/common/llamafarm_common/__init__.py
@@ -17,6 +17,14 @@
     select_gguf_file_with_logging,
 )
 
+# Submodules also importable as llamafarm_common.safe_home, etc.
+# Kept as submodule imports to avoid adding their deps to the top-level namespace.
+# Usage:
+#   from llamafarm_common.safe_home import safe_home, get_data_dir
+#   from llamafarm_common.device import get_optimal_device, get_device_info
+#   from llamafarm_common.model_cache import ModelCache
+#   from llamafarm_common.model_format import detect_model_format
+
 __all__ = [
     "GGUF_QUANTIZATION_PREFERENCE_ORDER",
     "get_gguf_file_path",