waggle-sensor · iperezx · Feb 6, 2026 · Jan 28, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/.env.example b/.env.example
@@ -0,0 +1,53 @@
+#Secrets
+SAGE_USER=
+SAGE_PASS=
+HF_TOKEN=
+
+#weaviate
+#https://weaviate.io/developers/weaviate/config-refs/env-vars
+BIND_INFERENCE_API=http://multi2vec-bind:8080
+RERANKER_INFERENCE_API=http://reranker-transformers:8080
+QUERY_DEFAULTS_LIMIT=25
+AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true'
+PERSISTENCE_DATA_PATH='/var/lib/weaviate'
+DEFAULT_VECTORIZER_MODULE='multi2vec-bind'
+ENABLE_MODULES='multi2vec-bind,reranker-transformers,backup-filesystem'
+BACKUP_FILESYSTEM_PATH='/tmp/backups'
+CLUSTER_HOSTNAME=node1
+#https://weaviate.io/developers/weaviate/concepts/vector-index#asynchronous-indexing
+ASYNC_INDEXING=true
+#https://weaviate.io/blog/weaviate-1-18-release#improvements-to-bm25-and-hybrid-search
+USE_BLOCKMAX_WAND=true
+USE_INVERTED_SEARCHABLE=true
+LIMIT_RESOURCES=true
+#default is info
+# LOG_LEVEL: 'debug'
+#https://weaviate.io/developers/weaviate/configuration/monitoring 
+# PROMETHEUS_MONITORING_ENABLED: true
+# PROMETHEUS_MONITORING_PORT: 2112
+
+#triton
+# Ensure this path matches the model repository directory 
+MODEL_REPOSITORY=/app/models
+CLIP_MODEL_PATH=/models/DFN5B-CLIP-ViT-H-14-378
+CLIP_MODEL_VERSION=419d1f8f6a96aabaf5913c526d059facda50c24b
+GEMMA_MODEL_PATH=/models/gemma-3-4b-it
+GEMMA_MODEL_VERSION=093f9f388b31de276ce2de164bdc2081324b9767
+
+#weavloader
+TRITON_HOST=triton
+TRITON_PORT=8001
+WEAVIATE_HOST=weaviate
+WEAVIATE_PORT=8080
+CELERY_BROKER_URL=redis://localhost:6379/0
+CELERY_RESULT_BACKEND=redis://localhost:6379/0
+UNALLOWED_NODES="W042,N001,V012,W015,W01C,W01E,W024,W026,W02C,W02D,W02E,W02F,W031,W040,W046,W047,W048,W049,W04A,W051,W055,W059,W05A,W05B,W05C,W05D,W05E,W05F,W060,W061,W062,W063,W064,W065,W066,W06E,W072,W073,W074,W075,W076,W077,W078,W079,W07A,W07B,W07D,W07E,W07F,W080,W081,W086,W088,W089,W08A,W08B,W08D,W08E,W08F,W090,W091,W092,W094,W096,W099,W09B,W09E,W0A0,W0A1,W0BB,W0BC"
+LOG_LEVEL='INFO'
+MONITOR_DATA_STREAM_INTERVAL=60
+MONITOR_DATA_STREAM_QUERY_DELAY_MINUTE=5
+
+#gardio-ui
+WEAVIATE_HOST=weaviate
+WEAVIATE_PORT=8080
+WEAVIATE_GRPC_PORT=50051
+CLUSTER_FLAG=True
diff --git a/Readme.md b/Readme.md
@@ -34,6 +34,51 @@ This repository includes a GitHub Action that builds and pushes Docker images fo
 
 ---
 
+## Docker compose
+envs:
+```
+cp .env.example .env
+```
+Make sure to fill in the secrets (top three env vars)
+
+Run:
+```
+docker compose up -d --build
+```
+
+Clean up:
+```
+docker compose down
+```
+
+All together:
+```
+docker compose down && docker compose up -d --build
+```
+
+Clean up (volumes):
+```
+docker compose down --volumes
+```
+
+Notes:
+- Triton migh not be able load either one of the models (CLIP and gemma3) or for some reason OSErrors loading the model weights so this is a workaround to download the models to your local directory and then move them to the container:
+   ```
+   source .env #assumes that HF_TOKEN is set
+   cd triton
+   python3 -m venv env
+   source env/bin/activate
+   pip install -r requirements.txt
+   huggingface-cli download --local-dir DFN5B-CLIP-ViT-H-14-378  --revision "$CLIP_MODEL_VERSION" apple/DFN5B-CLIP-ViT-H-14-378
+
+   huggingface-cli download --local-dir gemma-3-4b-it --revision "$GEMMA_MODEL_VERSION" google/gemma-3-4b-it
+
+   docker cp DFN5B-CLIP-ViT-H-14-378 sage-nrp-image-search-triton-1:/models/
+   docker cp gemma-3-4b-it sage-nrp-image-search-triton-1:/models/
+   ```
+
+---
+
 ## Kubernetes
 Developed and test with these versions for k8s and kustomize:
 ```

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,118 @@
+version: '3.4'
+services:
+  weaviate:
+    command:
+      - --host
+      - 0.0.0.0
+      - --port
+      - '8080'
+      - --scheme
+      - http
+    image: semitechnologies/weaviate:1.32.0 #https://weaviate.io/developers/weaviate/release-notes#weaviate-core-and-client-releases
+    ports:
+      - 8080:8080
+      - 50051:50051
+    restart: on-failure
+    environment:
+    - BIND_INFERENCE_API
+    - RERANKER_INFERENCE_API
+    - QUERY_DEFAULTS_LIMIT
+    - AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED
+    - PERSISTENCE_DATA_PATH
+    - DEFAULT_VECTORIZER_MODULE
+    - ENABLE_MODULES
+    - BACKUP_FILESYSTEM_PATH
+    - CLUSTER_HOSTNAME
+    - ASYNC_INDEXING
+    - USE_BLOCKMAX_WAND
+    - USE_INVERTED_SEARCHABLE
+    - LIMIT_RESOURCES
+    # - LOG_LEVEL
+    # - PROMETHEUS_MONITORING_ENABLED
+    # - PROMETHEUS_MONITORING_PORT
+    healthcheck:
+      test: ["CMD", "wget", "-qO-", "http://localhost:8080/v1/.well-known/ready"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    volumes:
+    - weaviate:/var/lib/weaviate
+
+  multi2vec-bind:
+    image: semitechnologies/multi2vec-bind:imagebind
+
+  reranker-transformers:
+    image: semitechnologies/reranker-transformers:cross-encoder-ms-marco-MiniLM-L-6-v2
+
+  triton:
+    build:
+      context: ./triton
+    platform: "linux/amd64"
+    ports:
+      - 8000:8000
+      - 8001:8001 
+      - 8002:8002
+    shm_size: '500MB'  #shared memory size
+    restart: on-failure
+    environment:
+    - MODEL_REPOSITORY
+    - CLIP_MODEL_PATH
+    - CLIP_MODEL_VERSION
+    - GEMMA_MODEL_PATH
+    - GEMMA_MODEL_VERSION
+    - HF_TOKEN
+    volumes:
+    - triton:/models
+
+  weavmanage:
+    build:
+      context: ./weavmanage
+    environment:
+    - WEAVIATE_HOST
+    - WEAVIATE_PORT
+    - WEAVIATE_GRPC_PORT
+    depends_on:
+      - weaviate
+
+  weavloader:
+    build:
+      context: ./weavloader
+    ports:
+      - 8081:8080
+      - 5555:5555
+    restart: on-failure
+    environment:
+    - TRITON_HOST
+    - TRITON_PORT
+    - WEAVIATE_HOST
+    - WEAVIATE_PORT
+    - SAGE_USER
+    - SAGE_PASS
+    - CELERY_BROKER_URL
+    - CELERY_RESULT_BACKEND
+    - UNALLOWED_NODES
+    - LOG_LEVEL
+    - MONITOR_DATA_STREAM_INTERVAL
+    - MONITOR_DATA_STREAM_QUERY_DELAY_MINUTE
+    depends_on:
+      - weaviate
+      - weavmanage
+      - triton
+
+  gradio-ui:
+    build:
+      context: ./app
+    ports:
+      - 7860:7860
+    restart: on-failure
+    environment:
+    - WEAVIATE_HOST
+    - WEAVIATE_PORT
+    - WEAVIATE_GRPC_PORT
+    - CLUSTER_FLAG
+    - UNALLOWED_NODES
+
+volumes:
+  triton:
+  weaviate:
diff --git a/kubernetes/README.md b/kubernetes/README.md
@@ -33,6 +33,11 @@ Copy the template and fill in your HuggingFace token (base64-encoded):
 cp base/huggingface-secret.template.yaml base/._huggingface-secret.yaml
 ```
 
+To generateb base64 encoded Hugging Face token:
+```
+echo -n "your_hf_token_here" | base64
+```
+
 ### 2. Sage User Secret
 
 Copy the Sage user secret template and add your Sage account name and password:
@@ -41,7 +46,12 @@ Copy the Sage user secret template and add your Sage account name and password:
 cp base/sage-user-secret.template.yaml base/._sage-user-secret.yaml
 ```
 
-- Encode username and password values as above.
+Base64 encoded SAGE_USER and SAGE_PASS to generate:
+``
+echo -n "your_username_here" | base64
+echo -n "your_password_here" | base64
+```
+
 - Update the `SAGE_USER` and `SAGE_PASS` fields.
 
 > **Important:** 
@@ -66,6 +76,59 @@ Or, using kubectl (if it supports native kustomize):
 kubectl apply -k base/
 ```
 
+Deploy all services:
+```
+kubectl kustomize nrp-dev | kubectl apply -f -
+kubectl kustomize nrp-prod | kubectl apply -f -
+```
+
+Delete all services:
+```
+kubectl kustomize nrp-dev | kubectl delete -f -
+kubectl kustomize nrp-prod | kubectl delete -f -
+```
+
+Debugging - output to yaml:
+```
+kubectl kustomize nrp-dev -o hybrid-search-dev.yaml
+kubectl kustomize nrp-prod -o hybrid-search-dev.yaml
+```
+
+## Testing a Pull Request
+For testing a Pull Request (PR), the overlay [prs](/kubernetes/prs/) is provided. Github Actions is setup to create an image for each PR so that we can manually test or in the future automatically test an instance of the image search deployed on k8s.
+
+The following manual steps are required for now:
+- [kubernetes/prs/kustomization.yaml](/kubernetes/prs/kustomization.yaml)
+    - change the `namePrefix` to the name of the PR
+    - change `commonLabels.env` to the name of the PR
+    - change the `newTag` to the name of the PR for each service that needs it
+- port-forwarding for any of the services to test out (update `pr`):
+    - `kubectl port-forward svc/pr-triton 8001:8001`: triton endpoint to call the LLM models locally
+    - `kubectl port-forward svc/pr-gradio-ui 7860:7860`: Search UI
+    - `kubectl port-forward svc/pr-weaviate 8080:8080`: Weaviate REST endpoint
+    - `kubectl port-forward svc/pr-weaviate 50051:50051`: Weaviate GRPC endpoint
+    - `kubectl port-forward svc/pr-weavloader-metrics 5555:5555`: Weavloader Flower endpoint
+    - `kubectl port-forward svc/pr-weavloader-metrics 8081:8080`: Weavloader Prometheus endpoint
+
+Deploy:
+```
+kubectl kustomize prs | kubectl apply -f -
+```
+
+Delete all services:
+```
+kubectl kustomize prs | kubectl delete -f -
+```
+
+Debugging - output to yaml:
+```
+kubectl kustomize prs -o hybrid-search-pr.yaml
+```
+
+Notes:
+- Make sure that your PR is up-to-date with `main` so that the services that were not modified are reflected for the `latest` tag. This can be also be checked with the [docker-compose](/docker-compose.yml) local deployment (after the PR is up-to-date with `main`) to see if the changes in the PR are working with the rest of the services that were not modified.
+- Users can utilized this overlay to combine it with their local docker compose instance to use a triton instance that has an NVIDIA GPU. This involves commenting out the ports from the docker compose manifest file for triton and doing the kubectl port-forwarding described above.
+
 ## Managing and Customizing
 
 You can extend or patch this `base/` deployment using kustomize overlays for different environments, resource limits, or development setups. See included overlays (such as those in benchmark subfolders) for example usage.

diff --git a/kubernetes/base/triton.yaml b/kubernetes/base/triton.yaml
@@ -22,6 +22,16 @@ spec:
               value: "info"
             - name: TORCHDYNAMO_DISABLE
               value: "1"
+            - name: MODEL_REPOSITORY
+              value: "/app/models"
+            - name: CLIP_MODEL_PATH
+              value: "/models/DFN5B-CLIP-ViT-H-14-378"
+            - name: CLIP_MODEL_VERSION
+              value: "419d1f8f6a96aabaf5913c526d059facda50c24b"
+            - name: GEMMA_MODEL_PATH
+              value: "/models/gemma-3-4b-it"
+            - name: GEMMA_MODEL_VERSION
+              value: "093f9f388b31de276ce2de164bdc2081324b9767"
             - name: HF_TOKEN
               valueFrom:
                 secretKeyRef:
@@ -32,10 +42,12 @@ spec:
               cpu: 1
               memory: 8Gi
               nvidia.com/gpu: 1
+              ephemeral-storage: 50Gi
             requests:
               cpu: 1
               memory: 8Gi
               nvidia.com/gpu: 1
+              ephemeral-storage: 50Gi
           ports:
           - name: http
             containerPort: 8000
@@ -46,10 +58,10 @@ spec:
           volumeMounts:
           - mountPath: /dev/shm
             name: dshm
-          # - mountPath: /app/models/gemma-3-4b-it
-          #   name: models
-          # - mountPath: /app/models/DFN5B-CLIP-ViT-H-14-378
-          #   name: models
+          - mountPath: /models/gemma-3-4b-it
+            name: models
+          - mountPath: /models/DFN5B-CLIP-ViT-H-14-378
+            name: models
       volumes:
       - name: dshm
         emptyDir:

diff --git a/kubernetes/base/weavloader.yaml b/kubernetes/base/weavloader.yaml
@@ -101,4 +101,8 @@ spec:
     - name: metrics
       port: 8080
       targetPort: 8080
+      protocol: TCP
+    - name: flower
+      port: 5555
+      targetPort: 5555
       protocol: TCP
diff --git a/kubernetes/nrp-dev/gpus.yaml b/kubernetes/nrp-dev/gpus.yaml
@@ -14,10 +14,6 @@ spec:
                 operator: In
                 values:
                 - NVIDIA-A10
-              - key: nautilus.io/reservation
-                operator: In
-                values:
-                - sage
       tolerations:
       - key: nautilus.io/reservation
         operator: Equal

diff --git a/kubernetes/nrp-prod/gpus.yaml b/kubernetes/nrp-prod/gpus.yaml
@@ -14,10 +14,6 @@ spec:
                 operator: In
                 values:
                 - NVIDIA-A10
-              - key: nautilus.io/reservation
-                operator: In
-                values:
-                - sage
       tolerations:
       - key: nautilus.io/reservation
         operator: Equal