virtualcell · KacemMathlouthi · Aug 17, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -44,5 +44,4 @@ jobs:
     - name: Run Tests with Poetry
       run: |
         cd backend/app
-        poetry run pytest tests/vcelldb_api_tests.py --maxfail=1 --disable-warnings -q
-        poetry run pytest tests/tool_calling_tests.py --maxfail=1 --disable-warnings -q
+        poetry run pytest tests/ --maxfail=1 --disable-warnings -q
diff --git a/backend/app/main.py b/backend/app/main.py
@@ -3,7 +3,9 @@
 from fastapi.middleware.cors import CORSMiddleware
 from app.core.logger import get_logger
 from app.core.config import settings
-from app.services.knowledge_base_service import create_knowledge_base_collection_if_not_exists
+from app.services.knowledge_base_service import (
+    create_knowledge_base_collection_if_not_exists,
+)
 
 logger = get_logger(__file__)
 
@@ -26,6 +28,7 @@
 logger.info(f"Starting App : \n {ascii_art}")
 logger.info("App Ready")
 
+
 @app.on_event("startup")
 async def startup_event():
     """
@@ -38,6 +41,7 @@ async def startup_event():
     else:
         logger.error(f"Knowledge base initialization failed: {result['message']}")
 
+
 # CORS setup
 app.add_middleware(
     CORSMiddleware,

diff --git a/backend/app/routes/llms_router.py b/backend/app/routes/llms_router.py
@@ -18,7 +18,9 @@ async def query_llm(conversation_history: dict):
     Returns:
         dict: The final response after processing the prompt with the tools.
     """
-    result, bmkeys = await get_llm_response(conversation_history.get("conversation_history", []))
+    result, bmkeys = await get_llm_response(
+        conversation_history.get("conversation_history", [])
+    )
     return {"response": result, "bmkeys": bmkeys}
 
 

diff --git a/backend/app/schemas/tool_schema.py b/backend/app/schemas/tool_schema.py
@@ -2,25 +2,33 @@
 from pydantic import BaseModel, Field
 from enum import Enum
 
+
 class ParameterSchema(BaseModel):
     """Schema for function parameters"""
+
     type: str = "object"
     properties: Dict[str, Dict[str, Any]]
     required: List[str]
     additionalProperties: bool = False
 
+
 class FunctionDefinition(BaseModel):
     """Schema for function definition within a tool"""
+
     name: str
     description: str
     parameters: ParameterSchema
     strict: bool = True
 
+
 class ToolDefinition(BaseModel):
     """Schema for tool definition"""
+
     type: str = "function"
     function: FunctionDefinition
 
+
 class ToolDefinitions(BaseModel):
     """Schema for a list of tool definitions"""
-    tools: List[ToolDefinition] = Field(..., description="List of available tools")
+
+    tools: List[ToolDefinition] = Field(..., description="List of available tools")
diff --git a/backend/app/services/knowledge_base_service.py b/backend/app/services/knowledge_base_service.py
@@ -15,7 +15,9 @@
 
 openai_client = get_openai_client()
 qdrant_client = get_qdrant_client()
-markitdown_client = MarkItDown(llm_client=openai_client, model=settings.AZURE_DEPLOYMENT_NAME)
+markitdown_client = MarkItDown(
+    llm_client=openai_client, model=settings.AZURE_DEPLOYMENT_NAME
+)
 
 KB_COLLECTION_NAME = settings.QDRANT_COLLECTION_NAME
 
@@ -43,6 +45,7 @@ def create_knowledge_base_collection_if_not_exists():
     except Exception as e:
         return {"status": "error", "message": f"Error creating collection: {str(e)}"}
 
+
 def embed_text(text: str):
     """
     Embed a text string using Azure OpenAI.
@@ -51,8 +54,7 @@ def embed_text(text: str):
         text (str): The text to embed.
     """
     response = openai_client.embeddings.create(
-        input=text, 
-        model=settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
+        input=text, model=settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
     )
     return response.data[0].embedding
 
@@ -241,6 +243,7 @@ def delete_knowledge_base_file(
     except Exception as e:
         return {"status": "error", "message": f"Error deleting file: {str(e)}"}
 
+
 @observe(name="GET_SIMILAR_CHUNKS")
 def get_similar_chunks(
     collection_name: str = KB_COLLECTION_NAME, query: str = "", limit: int = 10

diff --git a/backend/app/services/llms_service.py b/backend/app/services/llms_service.py
@@ -99,11 +99,11 @@ async def get_response_with_tools(conversation_history: list[dict]):
 
     # Send back the final response incorporating the tool result
     completion = client.chat.completions.create(
-        name = "GET_RESPONSE_WITH_TOOLS::PROCESS_TOOL_RESULTS",
+        name="GET_RESPONSE_WITH_TOOLS::PROCESS_TOOL_RESULTS",
         model=settings.AZURE_DEPLOYMENT_NAME,
         messages=messages,
         metadata={
-            "tool_calls":tool_calls,
+            "tool_calls": tool_calls,
         },
     )
 
@@ -125,14 +125,20 @@ async def analyse_vcml(biomodel_id: str):
     """
     try:
         # Fetch VCML details
-        vcml = await get_vcml_file(biomodel_id)
+        logger.info(f"Fetching VCML file for biomodel: {biomodel_id}")
+        vcml = await get_vcml_file(biomodel_id, truncate=False)
         # Analyze VCML with LLM
+        logger.info(
+            f"Analyzing VCML file for biomodel: {biomodel_id} with content: {str(vcml[:500])}"
+        )
         vcml_system_prompt = "You are a VCell BioModel Assistant, designed to help users understand and interact with biological models in VCell. Your task is to provide human-readable, concise responses based on the given VCML."
         vcml_prompt = f"Analyze the following VCML content for Biomodel {biomodel_id}: {str(vcml)}"
         vcml_analysis = await get_llm_response(vcml_system_prompt, vcml_prompt)
         return vcml_analysis
     except Exception as e:
-        logger.error(f"Error analyzing VCML for biomodel {biomodel_id}: {str(e)}")
+        logger.error(
+            f"Error analyzing VCML for biomodel {biomodel_id}: {str(e)}", exc_info=True
+        )
         return f"An error occurred during VCML analysis: {str(e)}"
 
 

diff --git a/backend/app/services/vcelldb_service.py b/backend/app/services/vcelldb_service.py
@@ -1,5 +1,6 @@
 from app.core.logger import get_logger
 import httpx
+import asyncio
 from app.schemas.vcelldb_schema import BiomodelRequestParams, SimulationRequestParams
 from urllib.parse import urlencode, quote
 from langfuse import observe
@@ -9,6 +10,31 @@
 logger = get_logger("vcelldb_service")
 
 
+async def check_vcell_connectivity() -> bool:
+    """
+    Check if the VCell API is reachable by attempting to resolve the hostname.
+
+    Returns:
+        bool: True if the API is reachable, False otherwise.
+    """
+    try:
+        import socket
+
+        hostname = "vcell.cam.uchc.edu"
+        logger.info(f"Checking connectivity to {hostname}")
+
+        # Try to resolve the hostname
+        ip_address = socket.gethostbyname(hostname)
+        logger.info(f"Successfully resolved {hostname} to {ip_address}")
+        return True
+    except socket.gaierror as e:
+        logger.error(f"DNS resolution failed for {hostname}: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"Unexpected error checking connectivity: {e}")
+        return False
+
+
 @observe(name="FETCH_BIOMODELS")
 async def fetch_biomodels(params: BiomodelRequestParams) -> dict:
     """
@@ -74,25 +100,79 @@ async def fetch_simulation_details(params: SimulationRequestParams) -> dict:
 
 
 @observe(name="GET_VCML_FILE")
-async def get_vcml_file(biomodel_id: str, truncate: bool = False) -> str:
+async def get_vcml_file(
+    biomodel_id: str, truncate: bool = False, max_retries: int = 3
+) -> str:
     """
-    Fetches the VCML file content for a given biomodel.
+    Fetches the VCML file content for a given biomodel with retry logic.
 
     Args:
         biomodel_id (str): ID of the biomodel.
         truncate (bool): Whether to truncate the VCML file.
+        max_retries (int): Maximum number of retry attempts.
     Returns:
         str: VCML content of the biomodel.
     """
-    async with httpx.AsyncClient() as client:
-        response = await client.get(
-            f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.vcml"
+    logger.info(f"Fetching VCML file for biomodel: {biomodel_id}")
+
+    # Check connectivity first
+    if not await check_vcell_connectivity():
+        logger.error(
+            "VCell API is not reachable. Please check your network connection and DNS settings."
+        )
+        raise Exception(
+            "VCell API is not reachable. Please check your network connection and DNS settings."
         )
-        response.raise_for_status()
-        if truncate:
-            return response.text[:500]
-        else:
-            return response.text
+
+    for attempt in range(max_retries + 1):
+        try:
+            url = f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.vcml"
+            logger.info(
+                f"Requesting URL: {url} (attempt {attempt + 1}/{max_retries + 1})"
+            )
+
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.get(url)
+                logger.info(f"Response status: {response.status_code}")
+                logger.info(f"Response headers: {dict(response.headers)}")
+                response.raise_for_status()
+
+                if truncate:
+                    return response.text[:500]
+                else:
+                    return response.text
+
+        except httpx.HTTPStatusError as e:
+            logger.error(
+                f"HTTP error fetching VCML file for biomodel {biomodel_id}: {e.response.status_code} - {e.response.text}"
+            )
+            if attempt == max_retries:
+                raise e
+            logger.warning(f"Retrying in {2 ** attempt} seconds...")
+            await asyncio.sleep(2**attempt)
+
+        except httpx.RequestError as e:
+            logger.error(
+                f"Request error fetching VCML file for biomodel {biomodel_id}: {str(e)}"
+            )
+            if attempt == max_retries:
+                raise e
+            logger.warning(f"Retrying in {2 ** attempt} seconds...")
+            await asyncio.sleep(2**attempt)
+
+        except Exception as e:
+            logger.error(
+                f"Unexpected error fetching VCML file for biomodel {biomodel_id}: {str(e)}"
+            )
+            if attempt == max_retries:
+                raise e
+            logger.warning(f"Retrying in {2 ** attempt} seconds...")
+            await asyncio.sleep(2**attempt)
+
+    # This should never be reached, but just in case
+    raise Exception(
+        f"Failed to fetch VCML file for biomodel {biomodel_id} after {max_retries + 1} attempts"
+    )
 
 
 @observe(name="GET_SBML_FILE")
@@ -106,12 +186,29 @@ async def get_sbml_file(biomodel_id: str) -> str:
     Returns:
         str: SBML content of the biomodel.
     """
-    async with httpx.AsyncClient() as client:
-        response = await client.get(
-            f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.sbml"
+    try:
+        url = f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.sbml"
+        logger.info(f"Requesting SBML file URL: {url}")
+
+        async with httpx.AsyncClient(timeout=180.0) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            return response.text
+    except httpx.HTTPStatusError as e:
+        logger.error(
+            f"HTTP error fetching SBML file for biomodel {biomodel_id}: {e.response.status_code} - {e.response.text}"
         )
-        response.raise_for_status()
-        return response.text
+        raise e
+    except httpx.RequestError as e:
+        logger.error(
+            f"Request error fetching SBML file for biomodel {biomodel_id}: {str(e)}"
+        )
+        raise e
+    except Exception as e:
+        logger.error(
+            f"Unexpected error fetching SBML file for biomodel {biomodel_id}: {str(e)}"
+        )
+        raise e
 
 
 @observe(name="GET_DIAGRAM_URL")