1
+ #! /bin/bash
2
+
3
+ # Deployment script for Agentic RAG
4
+
5
+ # Function to display usage
6
+ usage () {
7
+ echo " Usage: $0 [--hf-token TOKEN] [--namespace NAMESPACE] [--cpu-only]"
8
+ echo " "
9
+ echo " Options:"
10
+ echo " --hf-token TOKEN Hugging Face token (optional but recommended)"
11
+ echo " --namespace NAMESPACE Kubernetes namespace to deploy to (default: default)"
12
+ echo " --cpu-only Deploy without GPU support (not recommended for production)"
13
+ exit 1
14
+ }
15
+
16
+ # Default values
17
+ NAMESPACE=" default"
18
+ HF_TOKEN=" "
19
+ CPU_ONLY=false
20
+
21
+ # Parse arguments
22
+ while [[ $# -gt 0 ]]; do
23
+ case $1 in
24
+ --hf-token)
25
+ HF_TOKEN=" $2 "
26
+ shift 2
27
+ ;;
28
+ --namespace)
29
+ NAMESPACE=" $2 "
30
+ shift 2
31
+ ;;
32
+ --cpu-only)
33
+ CPU_ONLY=true
34
+ shift
35
+ ;;
36
+ * )
37
+ usage
38
+ ;;
39
+ esac
40
+ done
41
+
42
+ # Create namespace if it doesn't exist
43
+ kubectl get namespace $NAMESPACE > /dev/null 2>&1 || kubectl create namespace $NAMESPACE
44
+
45
+ echo " Deploying Agentic RAG to namespace $NAMESPACE ..."
46
+
47
+ # Check for GPU availability if not in CPU-only mode
48
+ if [[ " $CPU_ONLY " == " false" ]]; then
49
+ echo " Checking for GPU availability..."
50
+ GPU_COUNT=$( kubectl get nodes " -o=custom-columns=GPU:.status.allocatable.nvidia\.com/gpu" --no-headers | grep -v " <none>" | wc -l)
51
+
52
+ if [[ " $GPU_COUNT " -eq 0 ]]; then
53
+ echo " WARNING: No GPUs detected in the cluster!"
54
+ echo " The deployment is configured to use GPUs, but none were found."
55
+ echo " Options:"
56
+ echo " 1. Install the NVIDIA device plugin: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml"
57
+ echo " 2. Use --cpu-only flag to deploy without GPU support (not recommended for production)"
58
+ echo " 3. Ensure your nodes have GPUs and proper drivers installed"
59
+
60
+ read -p " Continue with deployment anyway? (y/n): " CONTINUE
61
+ if [[ " $CONTINUE " != " y" && " $CONTINUE " != " Y" ]]; then
62
+ echo " Deployment aborted."
63
+ exit 1
64
+ fi
65
+
66
+ echo " Continuing with deployment despite no GPUs detected..."
67
+ else
68
+ echo " Found $GPU_COUNT nodes with GPUs available."
69
+ fi
70
+ fi
71
+
72
+ # Create ConfigMap with Hugging Face token if provided
73
+ if [[ -n " $HF_TOKEN " ]]; then
74
+ echo " Using provided Hugging Face token..."
75
+ cat << EOF | kubectl apply -n $NAMESPACE -f -
76
+ apiVersion: v1
77
+ kind: ConfigMap
78
+ metadata:
79
+ name: agentic-rag-config
80
+ data:
81
+ config.yaml: |
82
+ HUGGING_FACE_HUB_TOKEN: "$HF_TOKEN "
83
+ EOF
84
+ else
85
+ echo " No Hugging Face token provided. Creating empty config..."
86
+ cat << EOF | kubectl apply -n $NAMESPACE -f -
87
+ apiVersion: v1
88
+ kind: ConfigMap
89
+ metadata:
90
+ name: agentic-rag-config
91
+ data:
92
+ config.yaml: |
93
+ # No Hugging Face token provided
94
+ # You can still use Ollama models
95
+ EOF
96
+ fi
97
+
98
+ # Apply deployment and service
99
+ if [[ " $CPU_ONLY " == " true" ]]; then
100
+ echo " Deploying in CPU-only mode (not recommended for production)..."
101
+ # Create a temporary CPU-only version of the deployment file
102
+ sed ' /nvidia.com\/gpu/d' local-deployment/deployment.yaml > local-deployment/deployment-cpu.yaml
103
+ kubectl apply -n $NAMESPACE -f local-deployment/deployment-cpu.yaml
104
+ rm local-deployment/deployment-cpu.yaml
105
+ else
106
+ kubectl apply -n $NAMESPACE -f local-deployment/deployment.yaml
107
+ fi
108
+
109
+ kubectl apply -n $NAMESPACE -f local-deployment/service.yaml
110
+
111
+ echo " Deployment started. Check status with: kubectl get pods -n $NAMESPACE "
112
+ echo " Access the application with: kubectl get service agentic-rag -n $NAMESPACE "
113
+ echo " Note: Initial startup may take some time as models are downloaded."
114
+
115
+ # Provide additional guidance for monitoring GPU usage
116
+ if [[ " $CPU_ONLY " == " false" ]]; then
117
+ echo " "
118
+ echo " To monitor GPU usage:"
119
+ echo " 1. Check pod status: kubectl get pods -n $NAMESPACE "
120
+ echo " 2. View pod logs: kubectl logs -f deployment/agentic-rag -n $NAMESPACE "
121
+ echo " 3. Check GPU allocation: kubectl describe pod -l app=agentic-rag -n $NAMESPACE | grep -A5 'Allocated resources'"
122
+ fi
0 commit comments