1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ name : agentic-rag
5+ labels :
6+ app : agentic-rag
7+ spec :
8+ replicas : 1
9+ selector :
10+ matchLabels :
11+ app : agentic-rag
12+ template :
13+ metadata :
14+ labels :
15+ app : agentic-rag
16+ spec :
17+ tolerations :
18+ - key : " nvidia.com/gpu"
19+ operator : " Equal"
20+ value : " present"
21+ effect : " NoSchedule"
22+ initContainers :
23+ - name : unzip
24+ image : busybox
25+ command : ["unzip", "/app/walletzip/wallet.zip", "-d", "/app/wallet"]
26+ volumeMounts :
27+ - name : wallet-config
28+ mountPath : /app/walletzip
29+ - name : wallet-volume
30+ mountPath : /app/wallet
31+ containers :
32+ - name : agentic-rag
33+ image : python:3.10-slim
34+ resources :
35+ requests :
36+ memory : " 8Gi"
37+ cpu : " 2"
38+ ephemeral-storage : " 50Gi" # Add this
39+ limits :
40+ memory : " 16Gi"
41+ cpu : " 4"
42+ ephemeral-storage : " 100Gi" # Add this
43+ ports :
44+ - containerPort : 7860
45+ name : gradio
46+ - containerPort : 11434
47+ name : ollama-api
48+ volumeMounts :
49+ - name : config-volume
50+ mountPath : /app/config.yaml
51+ subPath : config.yaml
52+ - name : wallet-config
53+ mountPath : /app/walletzip
54+ - name : wallet-volume
55+ mountPath : /app/wallet
56+ - name : data-volume
57+ mountPath : /app/embeddings
58+ - name : chroma-volume
59+ mountPath : /app/chroma_db
60+ - name : ollama-models
61+ mountPath : /root/.ollama
62+ command : ["/bin/bash", "-c"]
63+ args :
64+ - |
65+ apt-get update && apt-get install -y git curl gnupg
66+
67+ # Install NVIDIA drivers and CUDA
68+ echo "Installing NVIDIA drivers and CUDA..."
69+ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
70+ curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
71+ sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
72+ tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
73+ apt-get update && apt-get install -y nvidia-container-toolkit
74+
75+ # Verify GPU is available
76+ echo "Verifying GPU availability..."
77+ nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
78+
79+ # Install Ollama
80+ echo "Installing Ollama..."
81+ curl -fsSL https://ollama.com/install.sh | sh
82+
83+ # Configure Ollama to use GPU
84+ echo "Configuring Ollama for GPU usage..."
85+ mkdir -p /root/.ollama
86+ echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
87+
88+ # Start Ollama in the background with GPU support
89+ echo "Starting Ollama service with GPU support..."
90+ ollama serve &
91+
92+ # Wait for Ollama to be ready
93+ echo "Waiting for Ollama to be ready..."
94+ until curl -s http://localhost:11434/api/tags >/dev/null; do
95+ sleep 5
96+ done
97+
98+ # Verify models are using GPU
99+ echo "Verifying models are using GPU..."
100+ curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
101+
102+ # Clone and set up the application
103+ cd /app
104+ git clone -b agentic_rag_automation https://github.com/vmleon/devrel-labs.git
105+ cd devrel-labs/agentic_rag
106+ pip install -r requirements.txt
107+
108+ # Move config.yaml file to agentic-rag folder
109+ echo "Copying config.yaml to /app/devrel-labs/agentic_rag/config.yaml"
110+ cp /app/config.yaml /app/devrel-labs/agentic_rag/config.yaml
111+
112+ # Start the Gradio app
113+ echo "Starting Gradio application..."
114+ python gradio_app.py
115+ env :
116+ - name : PYTHONUNBUFFERED
117+ value : " 1"
118+ - name : OLLAMA_HOST
119+ value : " http://localhost:11434"
120+ - name : NVIDIA_VISIBLE_DEVICES
121+ value : " all"
122+ - name : NVIDIA_DRIVER_CAPABILITIES
123+ value : " compute,utility"
124+ - name : TORCH_CUDA_ARCH_LIST
125+ value : " 7.0;7.5;8.0;8.6"
126+ volumes :
127+ - name : config-volume
128+ configMap :
129+ name : agentic-rag-config
130+ - name : wallet-config
131+ configMap :
132+ name : wallet-zip
133+ - name : wallet-volume
134+ emptyDir :
135+ sizeLimit : 50Mi
136+ - name : data-volume
137+ persistentVolumeClaim :
138+ claimName : agentic-rag-data-pvc
139+ - name : chroma-volume
140+ persistentVolumeClaim :
141+ claimName : agentic-rag-chroma-pvc
142+ - name : ollama-models
143+ persistentVolumeClaim :
144+ claimName : ollama-models-pvc
0 commit comments