oracle-devrel · jasperan · May 5, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 29, 2025
diff --git a/agentic_rag/.gitignore b/agentic_rag/.gitignore
@@ -7,6 +7,7 @@ __pycache__/
 venv/
 env/
 .env
+kubeconfig
 
 # IDE
 .vscode/
@@ -19,6 +20,27 @@ env/
 embeddings/
 chroma_db/
 docs/*.json
+**/.certs
+**/node_modules
+k8s/kustom/demo/config.yaml
+k8s/kustom/demo/wallet/
+**/generated/
+
+# Terraform
+**/.terraform/*
+*.plan
+*.tfstate
+*.tfstate.*
+crash.log
+crash.*.log
+*.tfvars
+*.tfvars.json
+override.tf
+override.tf.json
+*_override.tf
+*_override.tf.json
+.terraformrc
+terraform.rc
 
 # Distribution / packaging
 dist/

diff --git a/agentic_rag/DEPLOY.md b/agentic_rag/DEPLOY.md
@@ -0,0 +1,120 @@
+# Deploy with Terraform and Kustomize
+
+## TODOS
+
+- Multiple containers for different functions
+  - Gradio
+  - Agents / Inference
+  - Database Access
+- Hugging face token should be a secret
+- Liveness and Readiness
+- Use Load balancer instead of Gradio Live
+- Autoscaling
+
+## Deploy Infrastructure
+
+Install scripts dependencies
+
+```bash
+cd scripts/ && npm install && cd ..
+```
+
+Set environment (answer questions) and generate Terraform `tfvars` file.
+
+```bash
+zx scripts/setenv.mjs
+```
+
+> Alternative: One liner for the yellow commands (for easy copy paste)
+>
+> ```bash
+> cd tf && terraform init && terraform apply -auto-approve
+> ```
+
+Come back to root folder
+
+```bash
+cd ..
+```
+
+Prepare Kubeconfig and namespace:
+
+```bash
+zx scripts/kustom.mjs
+```
+
+## Deploy Application
+
+Export kubeconfig to get access to the Kubernetes Cluster
+
+```bash
+export KUBECONFIG="$(pwd)/tf/generated/kubeconfig"
+```
+
+Check everything works
+
+```bash
+kubectl cluster-info
+```
+
+Deploy the production overlay
+
+```bash
+kubectl apply -k k8s/kustom/overlays/prod
+```
+
+Check all pods are Ready:
+
+```bash
+kubectl get po --namespace=agentic-rag
+```
+
+Get Gradio Live URL:
+
+```bash
+kubectl logs $(kubectl get po -n agentic-rag -l app=agentic-rag -o name) -n agentic-rag | grep "Running on public URL"
+```
+
+Open the URL from the command before in your browser.
+
+Also, you could get the Load Balancer Public IP address:
+
+```bash
+echo "http://$(kubectl get service \
+  -n agentic-rag \
+  -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].status.loadBalancer.ingress[0].ip}')"
+```
+
+To troubleshoot connect to the container
+
+```bash
+kubectl exec -it $(kubectl get po -n agentic-rag -l app=agentic-rag -o name) -n agentic-rag -- sh
+```
+
+## Clean up
+
+Delete the production overlay
+
+```bash
+kubectl delete -k k8s/kustom/overlays/prod
+```
+
+Destroy infrastructure with Terraform.
+
+```bash
+cd tf
+```
+
+```bash
+terraform destroy -auto-approve
+```
+
+```bash
+cd ..
+```
+
+Clean up the artifacts and config files
+
+```bash
+zx scripts/clean.mjs
+```
diff --git a/agentic_rag/OraDBVectorStore.py b/agentic_rag/OraDBVectorStore.py
@@ -22,13 +22,21 @@ def __init__(self, persist_directory: str = "embeddings"):
         username = credentials.get("ORACLE_DB_USERNAME", "ADMIN")
         password = credentials.get("ORACLE_DB_PASSWORD", "")
         dsn = credentials.get("ORACLE_DB_DSN", "")
+        wallet_path = credentials.get("ORACLE_DB_WALLET_LOCATION")
+        wallet_password = credentials.get("ORACLE_DB_WALLET_PASSWORD")
 
         if not password or not dsn:
             raise ValueError("Oracle DB credentials not found in config.yaml. Please set ORACLE_DB_USERNAME, ORACLE_DB_PASSWORD, and ORACLE_DB_DSN.")
 
         # Connect to the database
         try:
-            conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
+            if not wallet_path:
+                print(f'Connecting (no wallet) to dsn {dsn} and user {username}')
+                conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
+            else:
+                print(f'Connecting (with wallet) to dsn {dsn} and user {username}')
+                conn23c = oracledb.connect(user=username, password=password, dsn=dsn, 
+                                           config_dir=wallet_path, wallet_location=wallet_path, wallet_password=wallet_password)
             print("Oracle DB Connection successful!")
         except Exception as e:
             print("Oracle DB Connection failed!", e)

diff --git a/agentic_rag/k8s/kustom/demo/config.yaml.mustache b/agentic_rag/k8s/kustom/demo/config.yaml.mustache
@@ -0,0 +1,6 @@
+HUGGING_FACE_HUB_TOKEN: "{{{ hugging_face_token }}}"
+ORACLE_DB_USERNAME: "{{{ adb_username }}}"
+ORACLE_DB_PASSWORD: "{{{ adb_admin_password }}}"
+ORACLE_DB_DSN: "{{{ adb_service_name }}}"
+ORACLE_DB_WALLET_LOCATION: "{{{ adb_wallet_location }}}"
+ORACLE_DB_WALLET_PASSWORD: "{{{ adb_admin_password }}}"
diff --git a/agentic_rag/k8s/kustom/demo/deployment.yaml b/agentic_rag/k8s/kustom/demo/deployment.yaml
@@ -0,0 +1,144 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: agentic-rag
+  template:
+    metadata:
+      labels:
+        app: agentic-rag
+    spec:
+      tolerations:
+      - key: "nvidia.com/gpu"
+        operator: "Equal"
+        value: "present"
+        effect: "NoSchedule"
+      initContainers:
+      - name: unzip
+        image: busybox
+        command: ["unzip", "/app/walletzip/wallet.zip", "-d", "/app/wallet"]
+        volumeMounts:
+          - name: wallet-config
+            mountPath: /app/walletzip
+          - name: wallet-volume
+            mountPath: /app/wallet
+      containers:
+      - name: agentic-rag
+        image: python:3.10-slim
+        resources:
+          requests:
+            memory: "8Gi"
+            cpu: "2"
+            ephemeral-storage: "50Gi"  # Add this
+          limits:
+            memory: "16Gi"
+            cpu: "4"
+            ephemeral-storage: "100Gi"  # Add this
+        ports:
+        - containerPort: 7860
+          name: gradio
+        - containerPort: 11434
+          name: ollama-api
+        volumeMounts:
+        - name: config-volume
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+        - name: wallet-config
+          mountPath: /app/walletzip
+        - name: wallet-volume
+          mountPath: /app/wallet
+        - name: data-volume
+          mountPath: /app/embeddings
+        - name: chroma-volume
+          mountPath: /app/chroma_db
+        - name: ollama-models
+          mountPath: /root/.ollama
+        command: ["/bin/bash", "-c"]
+        args:
+        - |
+          apt-get update && apt-get install -y git curl gnupg
+
+          # Install NVIDIA drivers and CUDA
+          echo "Installing NVIDIA drivers and CUDA..."
+          curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+          curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+            sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+            tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+          apt-get update && apt-get install -y nvidia-container-toolkit
+
+          # Verify GPU is available
+          echo "Verifying GPU availability..."
+          nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
+
+          # Install Ollama
+          echo "Installing Ollama..."
+          curl -fsSL https://ollama.com/install.sh | sh
+
+          # Configure Ollama to use GPU
+          echo "Configuring Ollama for GPU usage..."
+          mkdir -p /root/.ollama
+          echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
+
+          # Start Ollama in the background with GPU support
+          echo "Starting Ollama service with GPU support..."
+          ollama serve &
+
+          # Wait for Ollama to be ready
+          echo "Waiting for Ollama to be ready..."
+          until curl -s http://localhost:11434/api/tags >/dev/null; do
+            sleep 5
+          done
+
+          # Verify models are using GPU
+          echo "Verifying models are using GPU..."
+          curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
+
+          # Clone and set up the application
+          cd /app
+          git clone -b agentic_rag_automation https://github.com/vmleon/devrel-labs.git
+          cd devrel-labs/agentic_rag
+          pip install -r requirements.txt
+
+          # Move config.yaml file to agentic-rag folder
+          echo "Copying config.yaml to /app/devrel-labs/agentic_rag/config.yaml"
+          cp /app/config.yaml /app/devrel-labs/agentic_rag/config.yaml
+
+          # Start the Gradio app
+          echo "Starting Gradio application..."
+          python gradio_app.py
+        env:
+        - name: PYTHONUNBUFFERED
+          value: "1"
+        - name: OLLAMA_HOST
+          value: "http://localhost:11434"
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: "all"
+        - name: NVIDIA_DRIVER_CAPABILITIES
+          value: "compute,utility"
+        - name: TORCH_CUDA_ARCH_LIST
+          value: "7.0;7.5;8.0;8.6"
+      volumes:
+      - name: config-volume
+        configMap:
+          name: agentic-rag-config
+      - name: wallet-config
+        configMap:
+          name: wallet-zip
+      - name: wallet-volume
+        emptyDir:
+          sizeLimit: 50Mi
+      - name: data-volume
+        persistentVolumeClaim:
+          claimName: agentic-rag-data-pvc
+      - name: chroma-volume
+        persistentVolumeClaim:
+          claimName: agentic-rag-chroma-pvc
+      - name: ollama-models
+        persistentVolumeClaim:
+          claimName: ollama-models-pvc
diff --git a/agentic_rag/k8s/kustom/demo/kustomization.yaml b/agentic_rag/k8s/kustom/demo/kustomization.yaml
@@ -0,0 +1,12 @@
+resources:
+  - pvcs.yaml
+  - deployment.yaml
+  - service.yaml
+configMapGenerator:
+  - name: agentic-rag-config
+    files:
+      - config.yaml
+  - name: wallet-zip
+    files:
+      - wallet/wallet.zip
+namespace: agentic-rag
diff --git a/agentic_rag/k8s/kustom/demo/pvcs.yaml b/agentic_rag/k8s/kustom/demo/pvcs.yaml
@@ -0,0 +1,32 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: agentic-rag-data-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: agentic-rag-chroma-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi  # Larger storage for model files
diff --git a/agentic_rag/k8s/kustom/demo/service.yaml b/agentic_rag/k8s/kustom/demo/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  type: LoadBalancer  # Use NodePort if LoadBalancer is not available
+  ports:
+  - port: 80
+    targetPort: 7860
+    protocol: TCP
+    name: http
+  selector:
+    app: agentic-rag