Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
85fc182
feat(runtime): add standalone edge runtime for Pi/Jetson deployment
rachmlenig Mar 12, 2026
6a291a9
refactor(common): extract shared utils into llamafarm_common
rachmlenig Mar 12, 2026
f922cd4
chore(runtime): add edge runtime OpenAPI spec
rachmlenig Mar 12, 2026
e274e01
feat(runtime): add Hailo-10H backend for edge vision detection
rachmlenig Mar 16, 2026
270af52
fix(runtime): fix Dockerfile build context and uv source paths
rachmlenig Mar 16, 2026
e18c3a3
fix(llamafarm-llama): fix ARM64 binary download URL for linux-arm64
rachmlenig Mar 16, 2026
6c3db02
fix(runtime): switch Docker base to Ubuntu 24.04 for GLIBC 2.39
rachmlenig Mar 16, 2026
2119f7d
fix(runtime): remove import * from re-export shims, drop unused import
rachmlenig Mar 16, 2026
1a2f1f7
fix(runtime): fix Docker vision deps and harden image decoding
rachmlenig Mar 16, 2026
bfd5d00
fix(ci): re-export HfApi for test mocking, add Nx project config for …
rachmlenig Mar 16, 2026
199816d
fix(ci): fix test mocks and prevent PyTorch index leaking into server…
rachmlenig Mar 17, 2026
5fda695
fix(runtime): fix Hailo PCIe auto-detection in edge server
rachmlenig Mar 18, 2026
3e22dee
fix(runtime): use InferModel for output shape in Hailo inference
rachmlenig Mar 18, 2026
4d94975
fix(runtime): remove unsupported timeout_ms arg from Hailo run()
rachmlenig Mar 18, 2026
ffef60a
fix(runtime): pass timeout as positional arg to Hailo run()
rachmlenig Mar 18, 2026
2c39c0a
fix(runtime): address PR review findings from CodeQL and code-quality…
rachmlenig Mar 18, 2026
d218e89
fix(runtime): fix normalized coordinate parsing in Hailo NMS output
rachmlenig Mar 19, 2026
d09495a
fix(runtime): add debug logging to Hailo NMS coordinate parsing
rachmlenig Mar 19, 2026
6ac481a
fix(runtime): rewrite Hailo NMS parser for per-class output format
rachmlenig Mar 19, 2026
7dd0142
fix(runtime): parse flat per-class Hailo NMS buffer correctly
rachmlenig Mar 19, 2026
ff5f045
fix(runtime): address cubic-dev-ai review feedback
rachmlenig Mar 19, 2026
4ec1f4a
feat(runtime): add Zenoh IPC interface for on-drone inference
rachmlenig Mar 19, 2026
4e8a5ee
fix(runtime): remove await from synchronous zenoh.open() call
rachmlenig Mar 20, 2026
d39fdf1
fix(runtime): use synchronous Zenoh Python API throughout IPC service
rachmlenig Mar 20, 2026
9b3bbb2
fix(common): cache device detection to suppress repeated CPU log spam
rachmlenig Mar 20, 2026
a6d2de7
ci(docker): add edge-runtime to Docker CI and release workflows
rachmlenig Mar 20, 2026
64a102b
chore(rag): update uv.lock to fix Docker build
rachmlenig Mar 20, 2026
f05e41b
feat(edge): add /v1/completions endpoint for raw text completions
mhamann Mar 20, 2026
d77518e
chore(common): update uv.lock to add cachetools dependency
rachmlenig Mar 23, 2026
53d1395
chore(server): update uv.lock to fix Docker build
rachmlenig Mar 23, 2026
1985cca
chore: use multistage docker build with vision toggle
BobbyRadford Mar 26, 2026
fb5c1b1
fix(edge): address PR review comments for path safety and correctness
rachmlenig Mar 26, 2026
310c808
fix(ci): use branch-derived tag for Trivy security scan
rachmlenig Mar 26, 2026
4ccb64b
ci(docker): publish edge-runtime-lite image without vision deps
rachmlenig Mar 26, 2026
22256a0
fix(ci): restore runtime to create-manifest for multi-arch support
rachmlenig Mar 26, 2026
80bc45c
fix(ci): use uv run for datamodel import verification
rachmlenig Mar 27, 2026
8be0ec9
fix(ci): replace broken ollama-action with official install script
rachmlenig Mar 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,12 @@ jobs:
echo "✓ Datamodel generation complete"

- name: Verify datamodel was generated
working-directory: config
run: |
echo "Checking for datamodel.py..."
ls -lh config/datamodel.py || echo "❌ datamodel.py not found!"
ls -lh datamodel.py || { echo "❌ datamodel.py not found!"; exit 1; }
echo "Attempting import test..."
cd config && python -c "from datamodel import LlamaFarmConfig; print('✓ Direct import successful')" || echo "❌ Direct import failed!"
echo "Attempting module import test..."
python -c "from config.datamodel import LlamaFarmConfig; print('✓ Module import successful')" || echo "❌ Module import failed!"
uv run python -c "from datamodel import LlamaFarmConfig; print('✓ Import successful')"

- name: Check if component has tests
id: check-tests
Expand Down Expand Up @@ -225,9 +224,11 @@ jobs:
continue-on-error: false

- name: Set up Ollama
uses: pydantic/ollama-action@v3
with:
model: nomic-embed-text
run: |
curl -fsSL https://ollama.com/install.sh | sh
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Avoid executing an unpinned remote install script in CI; use a pinned action/versioned artifact (or checksum-verified download) to reduce supply-chain and reproducibility risk.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At .github/workflows/ci.yml, line 228:

<comment>Avoid executing an unpinned remote install script in CI; use a pinned action/versioned artifact (or checksum-verified download) to reduce supply-chain and reproducibility risk.</comment>

<file context>
@@ -224,9 +224,11 @@ jobs:
-        with:
-          model: nomic-embed-text
+        run: |
+          curl -fsSL https://ollama.com/install.sh | sh
+          ollama serve &
+          sleep 3
</file context>
Fix with Cubic

ollama serve &
sleep 3
ollama pull nomic-embed-text

- name: Run tests
if: steps.check-tests.outputs.has_tests == 'true'
Expand Down
44 changes: 36 additions & 8 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
packages: write
strategy:
matrix:
service: [designer, server, rag, runtime]
service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
include:
- service: designer
context: ./designer
Expand All @@ -41,6 +41,14 @@ jobs:
context: ./
dockerfile: ./runtimes/universal/Dockerfile
description: "LlamaFarm Universal Runtime - Universal Runtime for all models"
- service: edge-runtime
context: ./
dockerfile: ./runtimes/edge/Dockerfile
description: "LlamaFarm Edge Runtime - Lightweight runtime for edge/drone deployment"
- service: edge-runtime-lite
context: ./
dockerfile: ./runtimes/edge/Dockerfile
description: "LlamaFarm Edge Runtime (Lite) - Language-only runtime without vision deps"

steps:
- name: Checkout repository
Expand Down Expand Up @@ -109,6 +117,7 @@ jobs:
build-args: |
GIT_SHA=${{ github.sha }}
PYTORCH_VARIANT=${{ matrix.service == 'runtime' && github.event_name == 'pull_request' && 'cpu' || '' }}
ENABLE_VISION=${{ matrix.service == 'edge-runtime-lite' && 'false' || '' }}

- name: Upload AMD64 image artifact (PR only)
if: github.event_name == 'pull_request'
Expand All @@ -127,7 +136,7 @@ jobs:
packages: write
strategy:
matrix:
service: [designer, server, rag, runtime]
service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
include:
- service: designer
context: ./designer
Expand All @@ -145,6 +154,14 @@ jobs:
context: ./
dockerfile: ./runtimes/universal/Dockerfile
description: "LlamaFarm Universal Runtime - model serving for GGUF and Transformers"
- service: edge-runtime
context: ./
dockerfile: ./runtimes/edge/Dockerfile
description: "LlamaFarm Edge Runtime - Lightweight runtime for edge/drone deployment"
- service: edge-runtime-lite
context: ./
dockerfile: ./runtimes/edge/Dockerfile
description: "LlamaFarm Edge Runtime (Lite) - Language-only runtime without vision deps"

steps:
- name: Checkout repository
Expand Down Expand Up @@ -195,6 +212,7 @@ jobs:
outputs: ${{ github.event_name == 'pull_request' && format('type=docker,dest={0}/{1}-arm64.tar', runner.temp, matrix.service) || '' }}
build-args: |
GIT_SHA=${{ github.sha }}
ENABLE_VISION=${{ matrix.service == 'edge-runtime-lite' && 'false' || '' }}

- name: Upload ARM64 image artifact (PR only)
if: github.event_name == 'pull_request'
Expand All @@ -214,7 +232,7 @@ jobs:
packages: write
strategy:
matrix:
service: [designer, server, rag]
service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]

steps:
- name: Checkout repository
Expand Down Expand Up @@ -368,11 +386,11 @@ jobs:
IMAGE_TAG: pr-${{ github.event.number }}
run: |
# Tag the loaded images with the expected format for docker-compose
SERVICES=(designer server rag runtime)
SERVICES=(designer server rag runtime edge-runtime edge-runtime-lite)

for SERVICE in "${SERVICES[@]}"; do
# Find the loaded images for this service
AMD64_IMAGE=$(docker images --format "table {{.Repository}}:{{.Tag}}" | grep "$SERVICE" | grep "amd64" | head -1 | tr -d ' ')
AMD64_IMAGE=$(docker images --format "table {{.Repository}}:{{.Tag}}" | grep "/${SERVICE}:" | grep "amd64" | head -1 | tr -d ' ')

if [ -n "$AMD64_IMAGE" ]; then
# Tag for docker-compose (use AMD64 for testing)
Expand Down Expand Up @@ -465,6 +483,8 @@ jobs:
docker compose -f docker-compose.yml logs --tail=50 designer
docker compose -f docker-compose.yml logs --tail=50 rag
docker compose -f docker-compose.yml logs --tail=50 runtime
docker compose -f docker-compose.yml logs --tail=50 edge-runtime
docker compose -f docker-compose.yml logs --tail=50 edge-runtime-lite

- name: Show logs on failure
if: failure()
Expand All @@ -479,7 +499,7 @@ jobs:
docker compose -f docker-compose.yml logs --tail=200 || true
echo ""
echo "=== Individual service logs ==="
for service in server designer rag runtime; do
for service in server designer rag runtime edge-runtime edge-runtime-lite; do
echo "--- Logs for $service ---"
docker compose -f docker-compose.yml logs --tail=100 "$service" || true
echo ""
Expand All @@ -504,14 +524,22 @@ jobs:
security-events: write
strategy:
matrix:
service: [designer, server, rag, runtime]
service: [designer, server, rag, runtime, edge-runtime, edge-runtime-lite]
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Determine image tag
id: tag
run: |
# Use branch name as tag (matches create-manifest metadata)
TAG="${GITHUB_REF_NAME}"
echo "image=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.service }}:${TAG}" >> "$GITHUB_OUTPUT"

- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/${{ matrix.service }}:latest
image-ref: ${{ steps.tag.outputs.image }}
format: "sarif"
output: "trivy-results-${{ matrix.service }}.sarif"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
echo "${{ secrets.GITHUB_TOKEN }}" | docker login "${REGISTRY}" -u "${{ github.actor }}" --password-stdin

# Services to retag
SERVICES=(designer server rag runtime)
SERVICES=(designer server rag runtime edge-runtime)

# Wait for all Docker images to be available
echo "Waiting for Docker images to be built and pushed..."
Expand Down
36 changes: 21 additions & 15 deletions cli/cmd/orchestrator/python_env.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,24 +142,30 @@ func (m *PythonEnvManager) getEnv() []string {
// Start with the current environment
env := os.Environ()

// Filter out Python-related environment variables that could interfere
// with UV's managed Python environment
// Filter out environment variables that could interfere with UV's managed
// Python environment or cause incorrect package resolution.
// UV index vars are stripped here so that only services that explicitly
// declare them in their Env map (e.g. universal-runtime) will have them.
// This prevents the PyTorch CPU index from leaking into server/rag, where
// it can cause install failures (e.g. markupsafe with only cp314 wheels).
filteredEnv := make([]string, 0, len(env))
pythonEnvVars := map[string]bool{
"VIRTUAL_ENV": true,
"PYTHONHOME": true,
"PYTHONPATH": true,
"PYTHONSTARTUP": true,
"PYTHONEXECUTABLE": true,
"PYTHONUSERBASE": true,
"CONDA_DEFAULT_ENV": true,
"CONDA_PREFIX": true,
"VIRTUAL_ENV": true,
"PYTHONHOME": true,
"PYTHONPATH": true,
"PYTHONSTARTUP": true,
"PYTHONEXECUTABLE": true,
"PYTHONUSERBASE": true,
"CONDA_DEFAULT_ENV": true,
"CONDA_PREFIX": true,
"CONDA_PYTHON_EXE": true,
"PYENV_VERSION": true,
"PYENV_VIRTUAL_ENV": true,
"PIPENV_ACTIVE": true,
"POETRY_ACTIVE": true,
"PDM_PYTHON": true,
"PYENV_VERSION": true,
"PYENV_VIRTUAL_ENV": true,
"PIPENV_ACTIVE": true,
"POETRY_ACTIVE": true,
"PDM_PYTHON": true,
"UV_EXTRA_INDEX_URL": true,
"UV_INDEX_STRATEGY": true,
}

for _, e := range env {
Expand Down
4 changes: 2 additions & 2 deletions cli/cmd/orchestrator/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ var ServiceGraph = map[string]*ServiceDefinition{
"LLAMAFARM_GGUF_FORCE_CPU": "", // Set to "1" to force CPU for GGUF inference (avoids Metal SIGSEGV in CI)
"HF_TOKEN": "",
// In CI environments, use CPU-only PyTorch to avoid downloading 3GB+ of CUDA packages
"UV_EXTRA_INDEX_URL": "${UV_EXTRA_INDEX_URL}",
"UV_INDEX_STRATEGY": "", // Inherit from parent env (e.g. unsafe-best-match in CI)
"UV_EXTRA_INDEX_URL": "${UV_EXTRA_INDEX_URL}",
"UV_INDEX_STRATEGY": "${UV_INDEX_STRATEGY}",
},
HealthComponent: "universal-runtime",
HardwarePackages: []HardwarePackageSpec{
Expand Down
8 changes: 8 additions & 0 deletions common/llamafarm_common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
select_gguf_file_with_logging,
)

# Submodules also importable as llamafarm_common.safe_home, etc.
# Kept as submodule imports to avoid adding their deps to the top-level namespace.
# Usage:
# from llamafarm_common.safe_home import safe_home, get_data_dir
# from llamafarm_common.device import get_optimal_device, get_device_info
# from llamafarm_common.model_cache import ModelCache
# from llamafarm_common.model_format import detect_model_format

__all__ = [
"GGUF_QUANTIZATION_PREFERENCE_ORDER",
"get_gguf_file_path",
Expand Down
Loading
Loading