feat: added test kubernetes files

jasperan · jasperan · commit fc67c11ddf15 · 2025-03-16T20:41:31.000+01:00
diff --git a/agentic_rag/k8s/deploy.sh b/agentic_rag/k8s/deploy.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# Deployment script for Agentic RAG
+
+# Function to display usage
+usage() {
+  echo "Usage: $0 [--hf-token TOKEN] [--namespace NAMESPACE] [--cpu-only]"
+  echo ""
+  echo "Options:"
+  echo "  --hf-token TOKEN     Hugging Face token (optional but recommended)"
+  echo "  --namespace NAMESPACE    Kubernetes namespace to deploy to (default: default)"
+  echo "  --cpu-only           Deploy without GPU support (not recommended for production)"
+  exit 1
+}
+
+# Default values
+NAMESPACE="default"
+HF_TOKEN=""
+CPU_ONLY=false
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --hf-token)
+      HF_TOKEN="$2"
+      shift 2
+      ;;
+    --namespace)
+      NAMESPACE="$2"
+      shift 2
+      ;;
+    --cpu-only)
+      CPU_ONLY=true
+      shift
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+# Create namespace if it doesn't exist
+kubectl get namespace $NAMESPACE > /dev/null 2>&1 || kubectl create namespace $NAMESPACE
+
+echo "Deploying Agentic RAG to namespace $NAMESPACE..."
+
+# Check for GPU availability if not in CPU-only mode
+if [[ "$CPU_ONLY" == "false" ]]; then
+  echo "Checking for GPU availability..."
+  GPU_COUNT=$(kubectl get nodes "-o=custom-columns=GPU:.status.allocatable.nvidia\.com/gpu" --no-headers | grep -v "<none>" | wc -l)
+  
+  if [[ "$GPU_COUNT" -eq 0 ]]; then
+    echo "WARNING: No GPUs detected in the cluster!"
+    echo "The deployment is configured to use GPUs, but none were found."
+    echo "Options:"
+    echo "  1. Install the NVIDIA device plugin: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml"
+    echo "  2. Use --cpu-only flag to deploy without GPU support (not recommended for production)"
+    echo "  3. Ensure your nodes have GPUs and proper drivers installed"
+    
+    read -p "Continue with deployment anyway? (y/n): " CONTINUE
+    if [[ "$CONTINUE" != "y" && "$CONTINUE" != "Y" ]]; then
+      echo "Deployment aborted."
+      exit 1
+    fi
+    
+    echo "Continuing with deployment despite no GPUs detected..."
+  else
+    echo "Found $GPU_COUNT nodes with GPUs available."
+  fi
+fi
+
+# Create ConfigMap with Hugging Face token if provided
+if [[ -n "$HF_TOKEN" ]]; then
+  echo "Using provided Hugging Face token..."
+  cat <<EOF | kubectl apply -n $NAMESPACE -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: agentic-rag-config
+data:
+  config.yaml: |
+    HUGGING_FACE_HUB_TOKEN: "$HF_TOKEN"
+EOF
+else
+  echo "No Hugging Face token provided. Creating empty config..."
+  cat <<EOF | kubectl apply -n $NAMESPACE -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: agentic-rag-config
+data:
+  config.yaml: |
+    # No Hugging Face token provided
+    # You can still use Ollama models
+EOF
+fi
+
+# Apply deployment and service
+if [[ "$CPU_ONLY" == "true" ]]; then
+  echo "Deploying in CPU-only mode (not recommended for production)..."
+  # Create a temporary CPU-only version of the deployment file
+  sed '/nvidia.com\/gpu/d' local-deployment/deployment.yaml > local-deployment/deployment-cpu.yaml
+  kubectl apply -n $NAMESPACE -f local-deployment/deployment-cpu.yaml
+  rm local-deployment/deployment-cpu.yaml
+else
+  kubectl apply -n $NAMESPACE -f local-deployment/deployment.yaml
+fi
+
+kubectl apply -n $NAMESPACE -f local-deployment/service.yaml
+
+echo "Deployment started. Check status with: kubectl get pods -n $NAMESPACE"
+echo "Access the application with: kubectl get service agentic-rag -n $NAMESPACE"
+echo "Note: Initial startup may take some time as models are downloaded."
+
+# Provide additional guidance for monitoring GPU usage
+if [[ "$CPU_ONLY" == "false" ]]; then
+  echo ""
+  echo "To monitor GPU usage:"
+  echo "  1. Check pod status: kubectl get pods -n $NAMESPACE"
+  echo "  2. View pod logs: kubectl logs -f deployment/agentic-rag -n $NAMESPACE"
+  echo "  3. Check GPU allocation: kubectl describe pod -l app=agentic-rag -n $NAMESPACE | grep -A5 'Allocated resources'"
+fi 
diff --git a/agentic_rag/k8s/local-deployment/configmap.yaml b/agentic_rag/k8s/local-deployment/configmap.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: agentic-rag-config
+data:
+  config.yaml: |
+    HUGGING_FACE_HUB_TOKEN: "your-huggingface-token"
+  # Optional OpenAI configuration
+  # .env: |
+  #   OPENAI_API_KEY=your-openai-api-key 
diff --git a/agentic_rag/k8s/local-deployment/deployment.yaml b/agentic_rag/k8s/local-deployment/deployment.yaml
@@ -0,0 +1,123 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: agentic-rag
+  template:
+    metadata:
+      labels:
+        app: agentic-rag
+    spec:
+      containers:
+      - name: agentic-rag
+        image: python:3.10-slim
+        resources:
+          requests:
+            memory: "8Gi"
+            cpu: "2"
+            nvidia.com/gpu: "1"
+          limits:
+            memory: "16Gi"
+            cpu: "4"
+            nvidia.com/gpu: "1"
+        ports:
+        - containerPort: 7860
+          name: gradio
+        - containerPort: 11434
+          name: ollama-api
+        volumeMounts:
+        - name: config-volume
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+        - name: data-volume
+          mountPath: /app/embeddings
+        - name: chroma-volume
+          mountPath: /app/chroma_db
+        - name: ollama-models
+          mountPath: /root/.ollama
+        command: ["/bin/bash", "-c"]
+        args:
+        - |
+          apt-get update && apt-get install -y git curl gnupg
+          
+          # Install NVIDIA drivers and CUDA
+          echo "Installing NVIDIA drivers and CUDA..."
+          curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+          curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+            sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+            tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+          apt-get update && apt-get install -y nvidia-container-toolkit
+          
+          # Verify GPU is available
+          echo "Verifying GPU availability..."
+          nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
+          
+          # Install Ollama
+          echo "Installing Ollama..."
+          curl -fsSL https://ollama.com/install.sh | sh
+          
+          # Configure Ollama to use GPU
+          echo "Configuring Ollama for GPU usage..."
+          mkdir -p /root/.ollama
+          echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
+          
+          # Start Ollama in the background with GPU support
+          echo "Starting Ollama service with GPU support..."
+          ollama serve &
+          
+          # Wait for Ollama to be ready
+          echo "Waiting for Ollama to be ready..."
+          until curl -s http://localhost:11434/api/tags >/dev/null; do
+            sleep 5
+          done
+          
+          # Verify models are using GPU
+          echo "Verifying models are using GPU..."
+          curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
+          
+          # Clone and set up the application
+          cd /app
+          git clone https://github.com/devrel/devrel-labs.git .
+          cd agentic_rag
+          pip install -r requirements.txt
+          
+          # Start the Gradio app
+          echo "Starting Gradio application..."
+          python gradio_app.py
+        env:
+        - name: PYTHONUNBUFFERED
+          value: "1"
+        - name: OLLAMA_HOST
+          value: "http://localhost:11434"
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: "all"
+        - name: NVIDIA_DRIVER_CAPABILITIES
+          value: "compute,utility"
+        - name: TORCH_CUDA_ARCH_LIST
+          value: "7.0;7.5;8.0;8.6"
+      volumes:
+      - name: config-volume
+        configMap:
+          name: agentic-rag-config
+      - name: data-volume
+        emptyDir: {}
+      - name: chroma-volume
+        emptyDir: {}
+      - name: ollama-models
+        emptyDir: {}
+      # For production, consider using persistent volumes instead of emptyDir
+      # - name: data-volume
+      #   persistentVolumeClaim:
+      #     claimName: agentic-rag-data-pvc
+      # - name: chroma-volume
+      #   persistentVolumeClaim:
+      #     claimName: agentic-rag-chroma-pvc
+      # - name: ollama-models
+      #   persistentVolumeClaim:
+      #     claimName: ollama-models-pvc 
diff --git a/agentic_rag/k8s/local-deployment/service.yaml b/agentic_rag/k8s/local-deployment/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  type: LoadBalancer  # Use NodePort if LoadBalancer is not available
+  ports:
+  - port: 80
+    targetPort: 7860
+    protocol: TCP
+    name: http
+  selector:
+    app: agentic-rag