oracle-devrel
diff --git a/‎agentic_rag/.gitignore‎
Lines changed: 22 additions & 0 deletions b/‎agentic_rag/.gitignore‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎agentic_rag/DEPLOY.md‎
Lines changed: 112 additions & 0 deletions b/‎agentic_rag/DEPLOY.md‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎agentic_rag/OraDBVectorStore.py‎
Lines changed: 7 additions & 1 deletion b/‎agentic_rag/OraDBVectorStore.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎agentic_rag/k8s/kustom/demo/config.yaml.mustache‎
Lines changed: 6 additions & 0 deletions b/‎agentic_rag/k8s/kustom/demo/config.yaml.mustache‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎agentic_rag/k8s/kustom/demo/deployment.yaml‎
Lines changed: 140 additions & 0 deletions b/‎agentic_rag/k8s/kustom/demo/deployment.yaml‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎agentic_rag/k8s/kustom/demo/kustomization.yaml‎
Lines changed: 12 additions & 0 deletions b/‎agentic_rag/k8s/kustom/demo/kustomization.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎agentic_rag/k8s/kustom/demo/pvcs.yaml‎
Lines changed: 32 additions & 0 deletions b/‎agentic_rag/k8s/kustom/demo/pvcs.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎agentic_rag/k8s/kustom/demo/service.yaml‎
Lines changed: 15 additions & 0 deletions b/‎agentic_rag/k8s/kustom/demo/service.yaml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎agentic_rag/k8s/kustom/overlays/prod/kustomization.yaml‎
Lines changed: 2 additions & 0 deletions b/‎agentic_rag/k8s/kustom/overlays/prod/kustomization.yaml‎
Lines changed: 2 additions & 0 deletions
@@ -7,6 +7,7 @@ __pycache__/
 venv/
 env/
 .env
+kubeconfig
 
 # IDE
 .vscode/
@@ -19,6 +20,27 @@ env/
 embeddings/
 chroma_db/
 docs/*.json
+**/.certs
+**/node_modules
+k8s/kustom/demo/config.yaml
+k8s/kustom/demo/wallet/
+**/generated/
+
+# Terraform
+**/.terraform/*
+*.plan
+*.tfstate
+*.tfstate.*
+crash.log
+crash.*.log
+*.tfvars
+*.tfvars.json
+override.tf
+override.tf.json
+*_override.tf
+*_override.tf.json
+.terraformrc
+terraform.rc
 
 # Distribution / packaging
 dist/
 
@@ -0,0 +1,112 @@
+# Deploy with Terraform and Kustomize
+
+## TODOS
+
+- Hugging face token should be a secret
+- PVCs and deployments in separate files
+- multiple deployments/pods for different functions
+- Consider include installation of driver on Kustomize https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml
+- Hugging Face Token optional
+- Autonomous for Vector Search
+
+## Deploy Infrastructure
+
+Install scripts dependencies
+
+```bash
+cd scripts/ && npm install && cd ..
+```
+
+Set environment (answer questions) and generate Terraform `tfvars` file.
+
+```bash
+zx scripts/setenv.mjs
+```
+
+> Alternative: One liner for the yellow commands (for easy copy paste)
+>
+> ```bash
+> cd tf && terraform init && terraform apply -auto-approve
+> ```
+
+Come back to root folder
+
+```bash
+cd ..
+```
+
+Prepare Kubeconfig and namespace:
+
+```bash
+zx scripts/kustom.mjs
+```
+
+## Deploy Application
+
+Export kubeconfig to get access to the Kubernetes Cluster
+
+```bash
+export KUBECONFIG="$(pwd)/tf/generated/kubeconfig"
+```
+
+Check everything works
+
+```bash
+kubectl cluster-info
+```
+
+Deploy the production overlay
+
+```bash
+kubectl apply -k k8s/kustom/overlays/prod
+```
+
+Check all pods are Ready:
+
+```bash
+kubectl wait pod --all --for=condition=Ready --namespace=agentic-rag
+```
+
+Get Gradio Live URL:
+
+```bash
+kubectl logs $(kubectl get po -n agentic-rag -l app=agentic-rag -o name) -n agentic-rag | grep "Running on public URL"
+```
+
+Open the URL from the command before in your browser.
+
+Also, you could get the Load Balancer Public IP address:
+
+```bash
+echo "http://$(kubectl get service \
+  -n agentic-rag \
+  -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].status.loadBalancer.ingress[0].ip}')"
+```
+
+## Clean up
+
+Delete the production overlay
+
+```bash
+kubectl delete -k k8s/kustom/overlays/prod
+```
+
+Destroy infrastructure with Terraform.
+
+```bash
+cd tf
+```
+
+```bash
+terraform destroy -auto-approve
+```
+
+```bash
+cd ..
+```
+
+Clean up the artifacts and config files
+
+```bash
+zx scripts/clean.mjs
+```
@@ -22,13 +22,19 @@ def __init__(self, persist_directory: str = "embeddings"):
         username = credentials.get("ORACLE_DB_USERNAME", "ADMIN")
         password = credentials.get("ORACLE_DB_PASSWORD", "")
         dsn = credentials.get("ORACLE_DB_DSN", "")
+        wallet_path = credentials.get("ORACLE_DB_WALLET_LOCATION")
+        wallet_password = credentials.get("ORACLE_DB_WALLET_PASSWORD")
 
         if not password or not dsn:
             raise ValueError("Oracle DB credentials not found in config.yaml. Please set ORACLE_DB_USERNAME, ORACLE_DB_PASSWORD, and ORACLE_DB_DSN.")
 
         # Connect to the database
         try:
-            conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
+            if not wallet_path:
+                conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
+            else:
+                conn23c = oracledb.connect(user=username, password=password, dsn=dsn, 
+                                           wallet_location=wallet_path, wallet_password=wallet_password)
             print("Oracle DB Connection successful!")
         except Exception as e:
             print("Oracle DB Connection failed!", e)
 
@@ -0,0 +1,6 @@
+HUGGING_FACE_HUB_TOKEN: "{{{ hugging_face_token }}}"
+ORACLE_DB_USERNAME: "{{{ adb_username }}}"
+ORACLE_DB_PASSWORD: "{{{ adb_admin_password }}}"
+ORACLE_DB_DSN: "{{{ adb_service_name }}}"
+ORACLE_DB_WALLET_LOCATION: "{{{ adb_wallet_location }}}"
+ORACLE_DB_WALLET_PASSWORD: "{{{ adb_admin_password }}}"
@@ -0,0 +1,140 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: agentic-rag
+  template:
+    metadata:
+      labels:
+        app: agentic-rag
+    spec:
+      tolerations:
+      - key: "nvidia.com/gpu"
+        operator: "Equal"
+        value: "present"
+        effect: "NoSchedule"
+      initContainers:
+      - name: unzip
+        image: busybox
+        command: ["unzip", "/app/walletzip/wallet.zip", "-d", "/app/wallet"]
+        volumeMounts:
+          - name: wallet-config
+            mountPath: /walletzip
+          - name: wallet-volume
+            mountPath: /wallet
+      containers:
+      - name: agentic-rag
+        image: python:3.10-slim
+        resources:
+          requests:
+            memory: "8Gi"
+            cpu: "2"
+            ephemeral-storage: "50Gi"  # Add this
+          limits:
+            memory: "16Gi"
+            cpu: "4"
+            ephemeral-storage: "100Gi"  # Add this
+        ports:
+        - containerPort: 7860
+          name: gradio
+        - containerPort: 11434
+          name: ollama-api
+        volumeMounts:
+        - name: config-volume
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+        - name: wallet-config
+          mountPath: /app/walletzip
+        - name: wallet-volume
+          mountPath: /app/wallet
+        - name: data-volume
+          mountPath: /app/embeddings
+        - name: chroma-volume
+          mountPath: /app/chroma_db
+        - name: ollama-models
+          mountPath: /root/.ollama
+        command: ["/bin/bash", "-c"]
+        args:
+        - |
+          apt-get update && apt-get install -y git curl gnupg
+          
+          # Install NVIDIA drivers and CUDA
+          echo "Installing NVIDIA drivers and CUDA..."
+          curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+          curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+            sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+            tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+          apt-get update && apt-get install -y nvidia-container-toolkit
+          
+          # Verify GPU is available
+          echo "Verifying GPU availability..."
+          nvidia-smi || echo "WARNING: nvidia-smi command failed. GPU might not be properly configured."
+          
+          # Install Ollama
+          echo "Installing Ollama..."
+          curl -fsSL https://ollama.com/install.sh | sh
+          
+          # Configure Ollama to use GPU
+          echo "Configuring Ollama for GPU usage..."
+          mkdir -p /root/.ollama
+          echo '{"gpu": {"enable": true}}' > /root/.ollama/config.json
+          
+          # Start Ollama in the background with GPU support
+          echo "Starting Ollama service with GPU support..."
+          ollama serve &
+          
+          # Wait for Ollama to be ready
+          echo "Waiting for Ollama to be ready..."
+          until curl -s http://localhost:11434/api/tags >/dev/null; do
+            sleep 5
+          done
+          
+          # Verify models are using GPU
+          echo "Verifying models are using GPU..."
+          curl -s http://localhost:11434/api/tags | grep -q "llama3" && echo "llama3 model is available"
+          
+          # Clone and set up the application
+          cd /app
+          git clone https://github.com/vmleon/devrel-labs.git
+          cd devrel-labs/agentic_rag
+          pip install -r requirements.txt
+          
+          # Start the Gradio app
+          echo "Starting Gradio application..."
+          python gradio_app.py
+        env:
+        - name: PYTHONUNBUFFERED
+          value: "1"
+        - name: OLLAMA_HOST
+          value: "http://localhost:11434"
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: "all"
+        - name: NVIDIA_DRIVER_CAPABILITIES
+          value: "compute,utility"
+        - name: TORCH_CUDA_ARCH_LIST
+          value: "7.0;7.5;8.0;8.6"
+      volumes:
+      - name: config-volume
+        configMap:
+          name: agentic-rag-config
+      - name: wallet-config
+        configMap:
+          name: wallet-zip
+      - name: wallet-volume
+        emptyDir:
+          sizeLimit: 50Mi
+      - name: data-volume
+        persistentVolumeClaim:
+          claimName: agentic-rag-data-pvc
+      - name: chroma-volume
+        persistentVolumeClaim:
+          claimName: agentic-rag-chroma-pvc
+      - name: ollama-models
+        persistentVolumeClaim:
+          claimName: ollama-models-pvc
@@ -0,0 +1,12 @@
+resources:
+  - pvcs.yaml
+  - deployment.yaml
+  - service.yaml
+configMapGenerator:
+  - name: agentic-rag-config
+    files:
+      - config.yaml
+  - name: wallet-zip
+    files:
+      - wallet/wallet.zip
+namespace: agentic-rag
@@ -0,0 +1,32 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: agentic-rag-data-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: agentic-rag-chroma-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi  # Larger storage for model files
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: agentic-rag
+  labels:
+    app: agentic-rag
+spec:
+  type: LoadBalancer  # Use NodePort if LoadBalancer is not available
+  ports:
+  - port: 80
+    targetPort: 7860
+    protocol: TCP
+    name: http
+  selector:
+    app: agentic-rag
@@ -0,0 +1,2 @@
+resources:
+  - "../../demo"