Dembrane
diff --git a/‎core/topic_modeling.py‎
Lines changed: 2 additions & 2 deletions b/‎core/topic_modeling.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎integrations/azure_client.py‎
Lines changed: 3 additions & 2 deletions b/‎integrations/azure_client.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎integrations/rag_client.py‎
Lines changed: 83 additions & 25 deletions b/‎integrations/rag_client.py‎
Lines changed: 83 additions & 25 deletions
diff --git a/‎services/aspect_processor.py‎
Lines changed: 4 additions & 2 deletions b/‎services/aspect_processor.py‎
Lines changed: 4 additions & 2 deletions
@@ -1,14 +1,14 @@
 import os
 from typing import List, Optional
+
 import torch
-import pandas as pd
 from umap import UMAP
+from runpod import RunPodLogger
 from hdbscan import HDBSCAN
 from bertopic import BERTopic
 from bertopic.vectorizers import ClassTfidfTransformer
 from sentence_transformers import SentenceTransformer
 from sklearn.feature_extraction.text import CountVectorizer
-from runpod import RunPodLogger
 
 logger = RunPodLogger()
 
 
@@ -2,9 +2,10 @@
 import json
 import asyncio
 from typing import Dict, List
-from pydantic import BaseModel
-from litellm import completion
+
 from runpod import RunPodLogger
+from litellm import completion
+from pydantic import BaseModel
 
 logger = RunPodLogger()
 
 
@@ -1,18 +1,45 @@
 import os
 from typing import List, Optional
-import requests
+
 import aiohttp
+import requests
 from runpod import RunPodLogger
+from utils.retry import retry_with_backoff, async_retry_with_backoff
+
 from integrations.directus_client import get_directus_token
 
 logger = RunPodLogger()
 
 
+def _make_rag_request(url: str, payload: dict, headers: dict) -> str:
+    """
+    Helper function to make the actual RAG API request.
+
+    Args:
+        url: The API endpoint URL
+        payload: The request payload
+        headers: The request headers
+
+    Returns:
+        str: RAG prompt string from the server
+
+    Raises:
+        Exception: If the API call fails
+    """
+    logger.debug(f"Making RAG API request to {url}")
+    response = requests.post(url, json=payload, headers=headers, timeout=120)
+    response.raise_for_status()
+
+    result = response.text
+    logger.debug("Successfully retrieved RAG prompt")
+    return result
+
+
 def get_rag_prompt(
     query: str, segment_ids: Optional[List[str]] = None, rag_server_url: Optional[str] = None
 ) -> str:
     """
-    Retrieve RAG prompt by calling the external RAG server API.
+    Retrieve RAG prompt by calling the external RAG server API with retry logic.
 
     Args:
         query: The query string to send to the RAG server
@@ -24,7 +51,7 @@ def get_rag_prompt(
 
     Raises:
         ValueError: If RAG_SERVER_URL is not set and no URL is provided
-        Exception: If the API call fails
+        Exception: If the API call fails after all retries
     """
     if rag_server_url is None:
         rag_server_url = os.getenv("RAG_SERVER_URL")
@@ -48,28 +75,58 @@ def get_rag_prompt(
 
     headers = {"Content-Type": "application/json"}
     headers["Authorization"] = f"Bearer {get_directus_token()}"
-    try:
-        logger.debug(f"Making RAG API request to {url}")
-        response = requests.post(url, json=payload, headers=headers, timeout=120)
-        response.raise_for_status()
-
-        result = response.text
-        logger.debug("Successfully retrieved RAG prompt")
-        return result
 
-    except requests.exceptions.RequestException as e:
-        logger.error(f"Error calling API: {e}")
+    try:
+        return retry_with_backoff(
+            _make_rag_request,
+            max_retries=3,
+            initial_delay=2,
+            backoff_factor=2,
+            jitter=0.5,
+            logger=logger,
+            url=url,
+            payload=payload,
+            headers=headers,
+        )
+    except Exception as e:
+        logger.error(f"Error calling API after all retries: {e}")
         if hasattr(e, "response") and e.response is not None:
             logger.error(f"Response status: {e.response.status_code}")
             logger.error(f"Response text: {e.response.text}")
         raise Exception(f"Failed to get RAG prompt from server: {str(e)}") from e
 
 
+async def _make_rag_request_async(url: str, payload: dict, headers: dict) -> str:
+    """
+    Helper function to make the actual async RAG API request.
+
+    Args:
+        url: The API endpoint URL
+        payload: The request payload
+        headers: The request headers
+
+    Returns:
+        str: RAG prompt string from the server
+
+    Raises:
+        Exception: If the API call fails
+    """
+    logger.debug(f"Making async RAG API request to {url}")
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            url, json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)
+        ) as response:
+            response.raise_for_status()
+            result = await response.text()
+            logger.debug("Successfully retrieved RAG prompt")
+            return result
+
+
 async def get_rag_prompt_async(
     query: str, segment_ids: Optional[List[str]] = None, rag_server_url: Optional[str] = None
 ) -> str:
     """
-    Async version of get_rag_prompt for parallel processing.
+    Async version of get_rag_prompt for parallel processing with retry logic.
     """
     if rag_server_url is None:
         rag_server_url = os.getenv("RAG_SERVER_URL")
@@ -95,16 +152,17 @@ async def get_rag_prompt_async(
     headers["Authorization"] = f"Bearer {get_directus_token()}"
 
     try:
-        logger.debug(f"Making async RAG API request to {url}")
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                url, json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)
-            ) as response:
-                response.raise_for_status()
-                result = await response.text()
-                logger.debug("Successfully retrieved RAG prompt")
-                return result
-
+        return await async_retry_with_backoff(
+            _make_rag_request_async,
+            max_retries=3,
+            initial_delay=2,
+            backoff_factor=2,
+            jitter=0.5,
+            logger=logger,
+            url=url,
+            payload=payload,
+            headers=headers,
+        )
     except Exception as e:
-        logger.error(f"Error calling API: {e}")
+        logger.error(f"Error calling API after all retries: {e}")
         raise Exception(f"Failed to get RAG prompt from server: {str(e)}") from e
@@ -1,16 +1,18 @@
 from typing import Dict, List
-from tqdm.asyncio import tqdm
+
 from runpod import RunPodLogger
-from data_model import Aspect
 from prompts import (
     rag_user_prompt,
     rag_system_prompt,
     initial_rag_prompt,
     fallback_get_aspect_response_list_user_prompt,
     fallback_get_aspect_response_list_system_prompt,
 )
+from data_model import Aspect
+from tqdm.asyncio import tqdm
 from integrations.rag_client import get_rag_prompt_async
 from integrations.azure_client import run_formated_llm_call_async
+
 from services.image_generator import get_image_url_async
 
 logger = RunPodLogger()