1+ #! /bin/bash
2+
3+ # Deployment script for Agentic RAG
4+
5+ # Function to display usage
6+ usage () {
7+ echo " Usage: $0 [--hf-token TOKEN] [--namespace NAMESPACE] [--cpu-only]"
8+ echo " "
9+ echo " Options:"
10+ echo " --hf-token TOKEN Hugging Face token (optional but recommended)"
11+ echo " --namespace NAMESPACE Kubernetes namespace to deploy to (default: default)"
12+ echo " --cpu-only Deploy without GPU support (not recommended for production)"
13+ exit 1
14+ }
15+
16+ # Default values
17+ NAMESPACE=" default"
18+ HF_TOKEN=" "
19+ CPU_ONLY=false
20+
21+ # Parse arguments
22+ while [[ $# -gt 0 ]]; do
23+ case $1 in
24+ --hf-token)
25+ HF_TOKEN=" $2 "
26+ shift 2
27+ ;;
28+ --namespace)
29+ NAMESPACE=" $2 "
30+ shift 2
31+ ;;
32+ --cpu-only)
33+ CPU_ONLY=true
34+ shift
35+ ;;
36+ * )
37+ usage
38+ ;;
39+ esac
40+ done
41+
42+ # Create namespace if it doesn't exist
43+ kubectl get namespace $NAMESPACE > /dev/null 2>&1 || kubectl create namespace $NAMESPACE
44+
45+ echo " Deploying Agentic RAG to namespace $NAMESPACE ..."
46+
47+ # Check for GPU availability if not in CPU-only mode
48+ if [[ " $CPU_ONLY " == " false" ]]; then
49+ echo " Checking for GPU availability..."
50+ GPU_COUNT=$( kubectl get nodes " -o=custom-columns=GPU:.status.allocatable.nvidia\.com/gpu" --no-headers | grep -v " <none>" | wc -l)
51+
52+ if [[ " $GPU_COUNT " -eq 0 ]]; then
53+ echo " WARNING: No GPUs detected in the cluster!"
54+ echo " The deployment is configured to use GPUs, but none were found."
55+ echo " Options:"
56+ echo " 1. Install the NVIDIA device plugin: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml"
57+ echo " 2. Use --cpu-only flag to deploy without GPU support (not recommended for production)"
58+ echo " 3. Ensure your nodes have GPUs and proper drivers installed"
59+
60+ read -p " Continue with deployment anyway? (y/n): " CONTINUE
61+ if [[ " $CONTINUE " != " y" && " $CONTINUE " != " Y" ]]; then
62+ echo " Deployment aborted."
63+ exit 1
64+ fi
65+
66+ echo " Continuing with deployment despite no GPUs detected..."
67+ else
68+ echo " Found $GPU_COUNT nodes with GPUs available."
69+ fi
70+ fi
71+
72+ # Create ConfigMap with Hugging Face token if provided
73+ if [[ -n " $HF_TOKEN " ]]; then
74+ echo " Using provided Hugging Face token..."
75+ cat << EOF | kubectl apply -n $NAMESPACE -f -
76+ apiVersion: v1
77+ kind: ConfigMap
78+ metadata:
79+ name: agentic-rag-config
80+ data:
81+ config.yaml: |
82+ HUGGING_FACE_HUB_TOKEN: "$HF_TOKEN "
83+ EOF
84+ else
85+ echo " No Hugging Face token provided. Creating empty config..."
86+ cat << EOF | kubectl apply -n $NAMESPACE -f -
87+ apiVersion: v1
88+ kind: ConfigMap
89+ metadata:
90+ name: agentic-rag-config
91+ data:
92+ config.yaml: |
93+ # No Hugging Face token provided
94+ # You can still use Ollama models
95+ EOF
96+ fi
97+
98+ # Apply deployment and service
99+ if [[ " $CPU_ONLY " == " true" ]]; then
100+ echo " Deploying in CPU-only mode (not recommended for production)..."
101+ # Create a temporary CPU-only version of the deployment file
102+ sed ' /nvidia.com\/gpu/d' local-deployment/deployment.yaml > local-deployment/deployment-cpu.yaml
103+ kubectl apply -n $NAMESPACE -f local-deployment/deployment-cpu.yaml
104+ rm local-deployment/deployment-cpu.yaml
105+ else
106+ kubectl apply -n $NAMESPACE -f local-deployment/deployment.yaml
107+ fi
108+
109+ kubectl apply -n $NAMESPACE -f local-deployment/service.yaml
110+
111+ echo " Deployment started. Check status with: kubectl get pods -n $NAMESPACE "
112+ echo " Access the application with: kubectl get service agentic-rag -n $NAMESPACE "
113+ echo " Note: Initial startup may take some time as models are downloaded."
114+
115+ # Provide additional guidance for monitoring GPU usage
116+ if [[ " $CPU_ONLY " == " false" ]]; then
117+ echo " "
118+ echo " To monitor GPU usage:"
119+ echo " 1. Check pod status: kubectl get pods -n $NAMESPACE "
120+ echo " 2. View pod logs: kubectl logs -f deployment/agentic-rag -n $NAMESPACE "
121+ echo " 3. Check GPU allocation: kubectl describe pod -l app=agentic-rag -n $NAMESPACE | grep -A5 'Allocated resources'"
122+ fi
0 commit comments