1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ name : agentic-rag
5+ labels :
6+ app : agentic-rag
7+ spec :
8+ replicas : 1
9+ selector :
10+ matchLabels :
11+ app : agentic-rag
12+ template :
13+ metadata :
14+ labels :
15+ app : agentic-rag
16+ spec :
17+ tolerations :
18+ - key : " nvidia.com/gpu"
19+ operator : " Equal"
20+ value : " present"
21+ effect : " NoSchedule"
22+ initContainers :
23+ - name : unzip
24+ image : busybox
25+ command : ["unzip", "/app/walletzip/wallet.zip", "-d", "/app/wallet"]
26+ volumeMounts :
27+ - name : wallet-config
28+ mountPath : /walletzip
29+ - name : wallet-volume
30+ mountPath : /wallet
31+ containers :
32+ - name : agentic-rag
33+ image : python:3.10-slim
34+ resources :
35+ requests :
36+ memory : " 8Gi"
37+ cpu : " 2"
38+ ephemeral-storage : " 50Gi" # Add this
39+ limits :
40+ memory : " 16Gi"
41+ cpu : " 4"
42+ ephemeral-storage : " 100Gi" # Add this
43+ ports :
44+ - containerPort : 7860
45+ name : gradio
46+ - containerPort : 11434
47+ name : ollama-api
48+ volumeMounts :
49+ - name : config-volume
50+ mountPath : /app/config.yaml
51+ subPath : config.yaml
52+ - name : wallet-config
53+ mountPath : /app/walletzip
54+ - name : wallet-volume
55+ mountPath : /app/wallet
56+ - name : data-volume
57+ mountPath : /app/embeddings
58+ - name : chroma-volume
59+ mountPath : /app/chroma_db
60+ - name : ollama-models
61+ mountPath : /root/.ollama
62+ command : ["/bin/bash", "-c"]
63+ args :
64+ - |
65+ apt-get update && apt-get install -y git curl gnupg
66+
67+ # Install NVIDIA drivers and CUDA
68+ echo "Installing NVIDIA drivers and CUDA..."
69+ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
70+ curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
71+ sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
72+ tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
73+ apt-get update && apt-get install -y nvidia-container-toolkit
74+
75+ # Verify GPU is available
76+ echo "Verifying GPU availability..."
77+ nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
78+
79+ # Install Ollama
80+ echo "Installing Ollama..."
81+ curl -fsSL https://ollama.com/install.sh | sh
82+
83+ # Configure Ollama to use GPU
84+ echo "Configuring Ollama for GPU usage..."
85+ mkdir -p /root/.ollama
86+ echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
87+
88+ # Start Ollama in the background with GPU support
89+ echo "Starting Ollama service with GPU support..."
90+ ollama serve &
91+
92+ # Wait for Ollama to be ready
93+ echo "Waiting for Ollama to be ready..."
94+ until curl -s http://localhost:11434/api/tags >/dev/null; do
95+ sleep 5
96+ done
97+
98+ # Verify models are using GPU
99+ echo "Verifying models are using GPU..."
100+ curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
101+
102+ # Clone and set up the application
103+ cd /app
104+ git clone https://github.com/vmleon/devrel-labs.git
105+ cd devrel-labs/agentic_rag
106+ pip install -r requirements.txt
107+
108+ # Start the Gradio app
109+ echo "Starting Gradio application..."
110+ python gradio_app.py
111+ env :
112+ - name : PYTHONUNBUFFERED
113+ value : " 1"
114+ - name : OLLAMA_HOST
115+ value : " http://localhost:11434"
116+ - name : NVIDIA_VISIBLE_DEVICES
117+ value : " all"
118+ - name : NVIDIA_DRIVER_CAPABILITIES
119+ value : " compute,utility"
120+ - name : TORCH_CUDA_ARCH_LIST
121+ value : " 7.0;7.5;8.0;8.6"
122+ volumes :
123+ - name : config-volume
124+ configMap :
125+ name : agentic-rag-config
126+ - name : wallet-config
127+ configMap :
128+ name : wallet-zip
129+ - name : wallet-volume
130+ emptyDir :
131+ sizeLimit : 50Mi
132+ - name : data-volume
133+ persistentVolumeClaim :
134+ claimName : agentic-rag-data-pvc
135+ - name : chroma-volume
136+ persistentVolumeClaim :
137+ claimName : agentic-rag-chroma-pvc
138+ - name : ollama-models
139+ persistentVolumeClaim :
140+ claimName : ollama-models-pvc
0 commit comments