Skip to content

Commit fc67c11

Browse files
committed
feat: added test kubernetes files
1 parent 9fd6437 commit fc67c11

File tree

4 files changed

+270
-0
lines changed

4 files changed

+270
-0
lines changed

agentic_rag/k8s/deploy.sh

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/bin/bash
2+
3+
# Deployment script for Agentic RAG
4+
5+
# Function to display usage
6+
usage() {
7+
echo "Usage: $0 [--hf-token TOKEN] [--namespace NAMESPACE] [--cpu-only]"
8+
echo ""
9+
echo "Options:"
10+
echo " --hf-token TOKEN Hugging Face token (optional but recommended)"
11+
echo " --namespace NAMESPACE Kubernetes namespace to deploy to (default: default)"
12+
echo " --cpu-only Deploy without GPU support (not recommended for production)"
13+
exit 1
14+
}
15+
16+
# Default values
17+
NAMESPACE="default"
18+
HF_TOKEN=""
19+
CPU_ONLY=false
20+
21+
# Parse arguments
22+
while [[ $# -gt 0 ]]; do
23+
case $1 in
24+
--hf-token)
25+
HF_TOKEN="$2"
26+
shift 2
27+
;;
28+
--namespace)
29+
NAMESPACE="$2"
30+
shift 2
31+
;;
32+
--cpu-only)
33+
CPU_ONLY=true
34+
shift
35+
;;
36+
*)
37+
usage
38+
;;
39+
esac
40+
done
41+
42+
# Create namespace if it doesn't exist
43+
kubectl get namespace $NAMESPACE > /dev/null 2>&1 || kubectl create namespace $NAMESPACE
44+
45+
echo "Deploying Agentic RAG to namespace $NAMESPACE..."
46+
47+
# Check for GPU availability if not in CPU-only mode
48+
if [[ "$CPU_ONLY" == "false" ]]; then
49+
echo "Checking for GPU availability..."
50+
GPU_COUNT=$(kubectl get nodes "-o=custom-columns=GPU:.status.allocatable.nvidia\.com/gpu" --no-headers | grep -v "<none>" | wc -l)
51+
52+
if [[ "$GPU_COUNT" -eq 0 ]]; then
53+
echo "WARNING: No GPUs detected in the cluster!"
54+
echo "The deployment is configured to use GPUs, but none were found."
55+
echo "Options:"
56+
echo " 1. Install the NVIDIA device plugin: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml"
57+
echo " 2. Use --cpu-only flag to deploy without GPU support (not recommended for production)"
58+
echo " 3. Ensure your nodes have GPUs and proper drivers installed"
59+
60+
read -p "Continue with deployment anyway? (y/n): " CONTINUE
61+
if [[ "$CONTINUE" != "y" && "$CONTINUE" != "Y" ]]; then
62+
echo "Deployment aborted."
63+
exit 1
64+
fi
65+
66+
echo "Continuing with deployment despite no GPUs detected..."
67+
else
68+
echo "Found $GPU_COUNT nodes with GPUs available."
69+
fi
70+
fi
71+
72+
# Create ConfigMap with Hugging Face token if provided
73+
if [[ -n "$HF_TOKEN" ]]; then
74+
echo "Using provided Hugging Face token..."
75+
cat <<EOF | kubectl apply -n $NAMESPACE -f -
76+
apiVersion: v1
77+
kind: ConfigMap
78+
metadata:
79+
name: agentic-rag-config
80+
data:
81+
config.yaml: |
82+
HUGGING_FACE_HUB_TOKEN: "$HF_TOKEN"
83+
EOF
84+
else
85+
echo "No Hugging Face token provided. Creating empty config..."
86+
cat <<EOF | kubectl apply -n $NAMESPACE -f -
87+
apiVersion: v1
88+
kind: ConfigMap
89+
metadata:
90+
name: agentic-rag-config
91+
data:
92+
config.yaml: |
93+
# No Hugging Face token provided
94+
# You can still use Ollama models
95+
EOF
96+
fi
97+
98+
# Apply deployment and service
99+
if [[ "$CPU_ONLY" == "true" ]]; then
100+
echo "Deploying in CPU-only mode (not recommended for production)..."
101+
# Create a temporary CPU-only version of the deployment file
102+
sed '/nvidia.com\/gpu/d' local-deployment/deployment.yaml > local-deployment/deployment-cpu.yaml
103+
kubectl apply -n $NAMESPACE -f local-deployment/deployment-cpu.yaml
104+
rm local-deployment/deployment-cpu.yaml
105+
else
106+
kubectl apply -n $NAMESPACE -f local-deployment/deployment.yaml
107+
fi
108+
109+
kubectl apply -n $NAMESPACE -f local-deployment/service.yaml
110+
111+
echo "Deployment started. Check status with: kubectl get pods -n $NAMESPACE"
112+
echo "Access the application with: kubectl get service agentic-rag -n $NAMESPACE"
113+
echo "Note: Initial startup may take some time as models are downloaded."
114+
115+
# Provide additional guidance for monitoring GPU usage
116+
if [[ "$CPU_ONLY" == "false" ]]; then
117+
echo ""
118+
echo "To monitor GPU usage:"
119+
echo " 1. Check pod status: kubectl get pods -n $NAMESPACE"
120+
echo " 2. View pod logs: kubectl logs -f deployment/agentic-rag -n $NAMESPACE"
121+
echo " 3. Check GPU allocation: kubectl describe pod -l app=agentic-rag -n $NAMESPACE | grep -A5 'Allocated resources'"
122+
fi
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: agentic-rag-config
5+
data:
6+
config.yaml: |
7+
HUGGING_FACE_HUB_TOKEN: "your-huggingface-token"
8+
# Optional OpenAI configuration
9+
# .env: |
10+
# OPENAI_API_KEY=your-openai-api-key
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: agentic-rag
5+
labels:
6+
app: agentic-rag
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: agentic-rag
12+
template:
13+
metadata:
14+
labels:
15+
app: agentic-rag
16+
spec:
17+
containers:
18+
- name: agentic-rag
19+
image: python:3.10-slim
20+
resources:
21+
requests:
22+
memory: "8Gi"
23+
cpu: "2"
24+
nvidia.com/gpu: "1"
25+
limits:
26+
memory: "16Gi"
27+
cpu: "4"
28+
nvidia.com/gpu: "1"
29+
ports:
30+
- containerPort: 7860
31+
name: gradio
32+
- containerPort: 11434
33+
name: ollama-api
34+
volumeMounts:
35+
- name: config-volume
36+
mountPath: /app/config.yaml
37+
subPath: config.yaml
38+
- name: data-volume
39+
mountPath: /app/embeddings
40+
- name: chroma-volume
41+
mountPath: /app/chroma_db
42+
- name: ollama-models
43+
mountPath: /root/.ollama
44+
command: ["/bin/bash", "-c"]
45+
args:
46+
- |
47+
apt-get update && apt-get install -y git curl gnupg
48+
49+
# Install NVIDIA drivers and CUDA
50+
echo "Installing NVIDIA drivers and CUDA..."
51+
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
52+
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
53+
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
54+
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
55+
apt-get update && apt-get install -y nvidia-container-toolkit
56+
57+
# Verify GPU is available
58+
echo "Verifying GPU availability..."
59+
nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
60+
61+
# Install Ollama
62+
echo "Installing Ollama..."
63+
curl -fsSL https://ollama.com/install.sh | sh
64+
65+
# Configure Ollama to use GPU
66+
echo "Configuring Ollama for GPU usage..."
67+
mkdir -p /root/.ollama
68+
echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
69+
70+
# Start Ollama in the background with GPU support
71+
echo "Starting Ollama service with GPU support..."
72+
ollama serve &
73+
74+
# Wait for Ollama to be ready
75+
echo "Waiting for Ollama to be ready..."
76+
until curl -s http://localhost:11434/api/tags >/dev/null; do
77+
sleep 5
78+
done
79+
80+
# Verify models are using GPU
81+
echo "Verifying models are using GPU..."
82+
curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
83+
84+
# Clone and set up the application
85+
cd /app
86+
git clone https://github.com/devrel/devrel-labs.git .
87+
cd agentic_rag
88+
pip install -r requirements.txt
89+
90+
# Start the Gradio app
91+
echo "Starting Gradio application..."
92+
python gradio_app.py
93+
env:
94+
- name: PYTHONUNBUFFERED
95+
value: "1"
96+
- name: OLLAMA_HOST
97+
value: "http://localhost:11434"
98+
- name: NVIDIA_VISIBLE_DEVICES
99+
value: "all"
100+
- name: NVIDIA_DRIVER_CAPABILITIES
101+
value: "compute,utility"
102+
- name: TORCH_CUDA_ARCH_LIST
103+
value: "7.0;7.5;8.0;8.6"
104+
volumes:
105+
- name: config-volume
106+
configMap:
107+
name: agentic-rag-config
108+
- name: data-volume
109+
emptyDir: {}
110+
- name: chroma-volume
111+
emptyDir: {}
112+
- name: ollama-models
113+
emptyDir: {}
114+
# For production, consider using persistent volumes instead of emptyDir
115+
# - name: data-volume
116+
# persistentVolumeClaim:
117+
# claimName: agentic-rag-data-pvc
118+
# - name: chroma-volume
119+
# persistentVolumeClaim:
120+
# claimName: agentic-rag-chroma-pvc
121+
# - name: ollama-models
122+
# persistentVolumeClaim:
123+
# claimName: ollama-models-pvc
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: agentic-rag
5+
labels:
6+
app: agentic-rag
7+
spec:
8+
type: LoadBalancer # Use NodePort if LoadBalancer is not available
9+
ports:
10+
- port: 80
11+
targetPort: 7860
12+
protocol: TCP
13+
name: http
14+
selector:
15+
app: agentic-rag

0 commit comments

Comments
 (0)