Skip to content

Commit db79a72

Browse files
authored
Merge pull request #2 from waggle-sensor/fix-local-builds
Fix building locally and run the services
2 parents 069eb6e + ed210ba commit db79a72

File tree

21 files changed

+451
-138
lines changed

21 files changed

+451
-138
lines changed

.env.example

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#Secrets
2+
SAGE_USER=
3+
SAGE_PASS=
4+
HF_TOKEN=
5+
6+
#weaviate
7+
#https://weaviate.io/developers/weaviate/config-refs/env-vars
8+
BIND_INFERENCE_API=http://multi2vec-bind:8080
9+
RERANKER_INFERENCE_API=http://reranker-transformers:8080
10+
QUERY_DEFAULTS_LIMIT=25
11+
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true'
12+
PERSISTENCE_DATA_PATH='/var/lib/weaviate'
13+
DEFAULT_VECTORIZER_MODULE='multi2vec-bind'
14+
ENABLE_MODULES='multi2vec-bind,reranker-transformers,backup-filesystem'
15+
BACKUP_FILESYSTEM_PATH='/tmp/backups'
16+
CLUSTER_HOSTNAME=node1
17+
#https://weaviate.io/developers/weaviate/concepts/vector-index#asynchronous-indexing
18+
ASYNC_INDEXING=true
19+
#https://weaviate.io/blog/weaviate-1-18-release#improvements-to-bm25-and-hybrid-search
20+
USE_BLOCKMAX_WAND=true
21+
USE_INVERTED_SEARCHABLE=true
22+
LIMIT_RESOURCES=true
23+
#default is info
24+
# LOG_LEVEL: 'debug'
25+
#https://weaviate.io/developers/weaviate/configuration/monitoring
26+
# PROMETHEUS_MONITORING_ENABLED: true
27+
# PROMETHEUS_MONITORING_PORT: 2112
28+
29+
#triton
30+
# Ensure this path matches the model repository directory
31+
MODEL_REPOSITORY=/app/models
32+
CLIP_MODEL_PATH=/models/DFN5B-CLIP-ViT-H-14-378
33+
CLIP_MODEL_VERSION=419d1f8f6a96aabaf5913c526d059facda50c24b
34+
GEMMA_MODEL_PATH=/models/gemma-3-4b-it
35+
GEMMA_MODEL_VERSION=093f9f388b31de276ce2de164bdc2081324b9767
36+
37+
#weavloader
38+
TRITON_HOST=triton
39+
TRITON_PORT=8001
40+
WEAVIATE_HOST=weaviate
41+
WEAVIATE_PORT=8080
42+
CELERY_BROKER_URL=redis://localhost:6379/0
43+
CELERY_RESULT_BACKEND=redis://localhost:6379/0
44+
UNALLOWED_NODES="W042,N001,V012,W015,W01C,W01E,W024,W026,W02C,W02D,W02E,W02F,W031,W040,W046,W047,W048,W049,W04A,W051,W055,W059,W05A,W05B,W05C,W05D,W05E,W05F,W060,W061,W062,W063,W064,W065,W066,W06E,W072,W073,W074,W075,W076,W077,W078,W079,W07A,W07B,W07D,W07E,W07F,W080,W081,W086,W088,W089,W08A,W08B,W08D,W08E,W08F,W090,W091,W092,W094,W096,W099,W09B,W09E,W0A0,W0A1,W0BB,W0BC"
45+
LOG_LEVEL='INFO'
46+
MONITOR_DATA_STREAM_INTERVAL=60
47+
MONITOR_DATA_STREAM_QUERY_DELAY_MINUTE=5
48+
49+
#gardio-ui
50+
WEAVIATE_HOST=weaviate
51+
WEAVIATE_PORT=8080
52+
WEAVIATE_GRPC_PORT=50051
53+
CLUSTER_FLAG=True

Readme.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,51 @@ This repository includes a GitHub Action that builds and pushes Docker images fo
3434

3535
---
3636

37+
## Docker compose
38+
envs:
39+
```
40+
cp .env.example .env
41+
```
42+
Make sure to fill in the secrets (top three env vars)
43+
44+
Run:
45+
```
46+
docker compose up -d --build
47+
```
48+
49+
Clean up:
50+
```
51+
docker compose down
52+
```
53+
54+
All together:
55+
```
56+
docker compose down && docker compose up -d --build
57+
```
58+
59+
Clean up (volumes):
60+
```
61+
docker compose down --volumes
62+
```
63+
64+
Notes:
65+
- Triton migh not be able load either one of the models (CLIP and gemma3) or for some reason OSErrors loading the model weights so this is a workaround to download the models to your local directory and then move them to the container:
66+
```
67+
source .env #assumes that HF_TOKEN is set
68+
cd triton
69+
python3 -m venv env
70+
source env/bin/activate
71+
pip install -r requirements.txt
72+
huggingface-cli download --local-dir DFN5B-CLIP-ViT-H-14-378 --revision "$CLIP_MODEL_VERSION" apple/DFN5B-CLIP-ViT-H-14-378
73+
74+
huggingface-cli download --local-dir gemma-3-4b-it --revision "$GEMMA_MODEL_VERSION" google/gemma-3-4b-it
75+
76+
docker cp DFN5B-CLIP-ViT-H-14-378 sage-nrp-image-search-triton-1:/models/
77+
docker cp gemma-3-4b-it sage-nrp-image-search-triton-1:/models/
78+
```
79+
80+
---
81+
3782
## Kubernetes
3883
Developed and test with these versions for k8s and kustomize:
3984
```

docker-compose.yml

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
version: '3.4'
2+
services:
3+
weaviate:
4+
command:
5+
- --host
6+
- 0.0.0.0
7+
- --port
8+
- '8080'
9+
- --scheme
10+
- http
11+
image: semitechnologies/weaviate:1.32.0 #https://weaviate.io/developers/weaviate/release-notes#weaviate-core-and-client-releases
12+
ports:
13+
- 8080:8080
14+
- 50051:50051
15+
restart: on-failure
16+
environment:
17+
- BIND_INFERENCE_API
18+
- RERANKER_INFERENCE_API
19+
- QUERY_DEFAULTS_LIMIT
20+
- AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED
21+
- PERSISTENCE_DATA_PATH
22+
- DEFAULT_VECTORIZER_MODULE
23+
- ENABLE_MODULES
24+
- BACKUP_FILESYSTEM_PATH
25+
- CLUSTER_HOSTNAME
26+
- ASYNC_INDEXING
27+
- USE_BLOCKMAX_WAND
28+
- USE_INVERTED_SEARCHABLE
29+
- LIMIT_RESOURCES
30+
# - LOG_LEVEL
31+
# - PROMETHEUS_MONITORING_ENABLED
32+
# - PROMETHEUS_MONITORING_PORT
33+
healthcheck:
34+
test: ["CMD", "wget", "-qO-", "http://localhost:8080/v1/.well-known/ready"]
35+
interval: 10s
36+
timeout: 5s
37+
retries: 5
38+
start_period: 30s
39+
volumes:
40+
- weaviate:/var/lib/weaviate
41+
42+
multi2vec-bind:
43+
image: semitechnologies/multi2vec-bind:imagebind
44+
45+
reranker-transformers:
46+
image: semitechnologies/reranker-transformers:cross-encoder-ms-marco-MiniLM-L-6-v2
47+
48+
triton:
49+
build:
50+
context: ./triton
51+
platform: "linux/amd64"
52+
ports:
53+
- 8000:8000
54+
- 8001:8001
55+
- 8002:8002
56+
shm_size: '500MB' #shared memory size
57+
restart: on-failure
58+
environment:
59+
- MODEL_REPOSITORY
60+
- CLIP_MODEL_PATH
61+
- CLIP_MODEL_VERSION
62+
- GEMMA_MODEL_PATH
63+
- GEMMA_MODEL_VERSION
64+
- HF_TOKEN
65+
volumes:
66+
- triton:/models
67+
68+
weavmanage:
69+
build:
70+
context: ./weavmanage
71+
environment:
72+
- WEAVIATE_HOST
73+
- WEAVIATE_PORT
74+
- WEAVIATE_GRPC_PORT
75+
depends_on:
76+
- weaviate
77+
78+
weavloader:
79+
build:
80+
context: ./weavloader
81+
ports:
82+
- 8081:8080
83+
- 5555:5555
84+
restart: on-failure
85+
environment:
86+
- TRITON_HOST
87+
- TRITON_PORT
88+
- WEAVIATE_HOST
89+
- WEAVIATE_PORT
90+
- SAGE_USER
91+
- SAGE_PASS
92+
- CELERY_BROKER_URL
93+
- CELERY_RESULT_BACKEND
94+
- UNALLOWED_NODES
95+
- LOG_LEVEL
96+
- MONITOR_DATA_STREAM_INTERVAL
97+
- MONITOR_DATA_STREAM_QUERY_DELAY_MINUTE
98+
depends_on:
99+
- weaviate
100+
- weavmanage
101+
- triton
102+
103+
gradio-ui:
104+
build:
105+
context: ./app
106+
ports:
107+
- 7860:7860
108+
restart: on-failure
109+
environment:
110+
- WEAVIATE_HOST
111+
- WEAVIATE_PORT
112+
- WEAVIATE_GRPC_PORT
113+
- CLUSTER_FLAG
114+
- UNALLOWED_NODES
115+
116+
volumes:
117+
triton:
118+
weaviate:

kubernetes/README.md

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ Copy the template and fill in your HuggingFace token (base64-encoded):
3333
cp base/huggingface-secret.template.yaml base/._huggingface-secret.yaml
3434
```
3535

36+
To generateb base64 encoded Hugging Face token:
37+
```
38+
echo -n "your_hf_token_here" | base64
39+
```
40+
3641
### 2. Sage User Secret
3742

3843
Copy the Sage user secret template and add your Sage account name and password:
@@ -41,7 +46,12 @@ Copy the Sage user secret template and add your Sage account name and password:
4146
cp base/sage-user-secret.template.yaml base/._sage-user-secret.yaml
4247
```
4348

44-
- Encode username and password values as above.
49+
Base64 encoded SAGE_USER and SAGE_PASS to generate:
50+
``
51+
echo -n "your_username_here" | base64
52+
echo -n "your_password_here" | base64
53+
```
54+
4555
- Update the `SAGE_USER` and `SAGE_PASS` fields.
4656
4757
> **Important:**
@@ -66,6 +76,59 @@ Or, using kubectl (if it supports native kustomize):
6676
kubectl apply -k base/
6777
```
6878

79+
Deploy all services:
80+
```
81+
kubectl kustomize nrp-dev | kubectl apply -f -
82+
kubectl kustomize nrp-prod | kubectl apply -f -
83+
```
84+
85+
Delete all services:
86+
```
87+
kubectl kustomize nrp-dev | kubectl delete -f -
88+
kubectl kustomize nrp-prod | kubectl delete -f -
89+
```
90+
91+
Debugging - output to yaml:
92+
```
93+
kubectl kustomize nrp-dev -o hybrid-search-dev.yaml
94+
kubectl kustomize nrp-prod -o hybrid-search-dev.yaml
95+
```
96+
97+
## Testing a Pull Request
98+
For testing a Pull Request (PR), the overlay [prs](/kubernetes/prs/) is provided. Github Actions is setup to create an image for each PR so that we can manually test or in the future automatically test an instance of the image search deployed on k8s.
99+
100+
The following manual steps are required for now:
101+
- [kubernetes/prs/kustomization.yaml](/kubernetes/prs/kustomization.yaml)
102+
- change the `namePrefix` to the name of the PR
103+
- change `commonLabels.env` to the name of the PR
104+
- change the `newTag` to the name of the PR for each service that needs it
105+
- port-forwarding for any of the services to test out (update `pr`):
106+
- `kubectl port-forward svc/pr-triton 8001:8001`: triton endpoint to call the LLM models locally
107+
- `kubectl port-forward svc/pr-gradio-ui 7860:7860`: Search UI
108+
- `kubectl port-forward svc/pr-weaviate 8080:8080`: Weaviate REST endpoint
109+
- `kubectl port-forward svc/pr-weaviate 50051:50051`: Weaviate GRPC endpoint
110+
- `kubectl port-forward svc/pr-weavloader-metrics 5555:5555`: Weavloader Flower endpoint
111+
- `kubectl port-forward svc/pr-weavloader-metrics 8081:8080`: Weavloader Prometheus endpoint
112+
113+
Deploy:
114+
```
115+
kubectl kustomize prs | kubectl apply -f -
116+
```
117+
118+
Delete all services:
119+
```
120+
kubectl kustomize prs | kubectl delete -f -
121+
```
122+
123+
Debugging - output to yaml:
124+
```
125+
kubectl kustomize prs -o hybrid-search-pr.yaml
126+
```
127+
128+
Notes:
129+
- Make sure that your PR is up-to-date with `main` so that the services that were not modified are reflected for the `latest` tag. This can be also be checked with the [docker-compose](/docker-compose.yml) local deployment (after the PR is up-to-date with `main`) to see if the changes in the PR are working with the rest of the services that were not modified.
130+
- Users can utilized this overlay to combine it with their local docker compose instance to use a triton instance that has an NVIDIA GPU. This involves commenting out the ports from the docker compose manifest file for triton and doing the kubectl port-forwarding described above.
131+
69132
## Managing and Customizing
70133

71134
You can extend or patch this `base/` deployment using kustomize overlays for different environments, resource limits, or development setups. See included overlays (such as those in benchmark subfolders) for example usage.

kubernetes/base/triton.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ spec:
2222
value: "info"
2323
- name: TORCHDYNAMO_DISABLE
2424
value: "1"
25+
- name: MODEL_REPOSITORY
26+
value: "/app/models"
27+
- name: CLIP_MODEL_PATH
28+
value: "/models/DFN5B-CLIP-ViT-H-14-378"
29+
- name: CLIP_MODEL_VERSION
30+
value: "419d1f8f6a96aabaf5913c526d059facda50c24b"
31+
- name: GEMMA_MODEL_PATH
32+
value: "/models/gemma-3-4b-it"
33+
- name: GEMMA_MODEL_VERSION
34+
value: "093f9f388b31de276ce2de164bdc2081324b9767"
2535
- name: HF_TOKEN
2636
valueFrom:
2737
secretKeyRef:
@@ -32,10 +42,12 @@ spec:
3242
cpu: 1
3343
memory: 8Gi
3444
nvidia.com/gpu: 1
45+
ephemeral-storage: 50Gi
3546
requests:
3647
cpu: 1
3748
memory: 8Gi
3849
nvidia.com/gpu: 1
50+
ephemeral-storage: 50Gi
3951
ports:
4052
- name: http
4153
containerPort: 8000
@@ -46,10 +58,10 @@ spec:
4658
volumeMounts:
4759
- mountPath: /dev/shm
4860
name: dshm
49-
# - mountPath: /app/models/gemma-3-4b-it
50-
# name: models
51-
# - mountPath: /app/models/DFN5B-CLIP-ViT-H-14-378
52-
# name: models
61+
- mountPath: /models/gemma-3-4b-it
62+
name: models
63+
- mountPath: /models/DFN5B-CLIP-ViT-H-14-378
64+
name: models
5365
volumes:
5466
- name: dshm
5567
emptyDir:

kubernetes/base/weavloader.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,8 @@ spec:
101101
- name: metrics
102102
port: 8080
103103
targetPort: 8080
104+
protocol: TCP
105+
- name: flower
106+
port: 5555
107+
targetPort: 5555
104108
protocol: TCP

kubernetes/nrp-dev/gpus.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ spec:
1414
operator: In
1515
values:
1616
- NVIDIA-A10
17-
- key: nautilus.io/reservation
18-
operator: In
19-
values:
20-
- sage
2117
tolerations:
2218
- key: nautilus.io/reservation
2319
operator: Equal

kubernetes/nrp-prod/gpus.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ spec:
1414
operator: In
1515
values:
1616
- NVIDIA-A10
17-
- key: nautilus.io/reservation
18-
operator: In
19-
values:
20-
- sage
2117
tolerations:
2218
- key: nautilus.io/reservation
2319
operator: Equal

0 commit comments

Comments
 (0)