diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 82f994f..e61cf70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,5 +44,4 @@ jobs: - name: Run Tests with Poetry run: | cd backend/app - poetry run pytest tests/vcelldb_api_tests.py --maxfail=1 --disable-warnings -q - poetry run pytest tests/tool_calling_tests.py --maxfail=1 --disable-warnings -q + poetry run pytest tests/ --maxfail=1 --disable-warnings -q \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py index 2964a43..f48d10b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,7 +3,9 @@ from fastapi.middleware.cors import CORSMiddleware from app.core.logger import get_logger from app.core.config import settings -from app.services.knowledge_base_service import create_knowledge_base_collection_if_not_exists +from app.services.knowledge_base_service import ( + create_knowledge_base_collection_if_not_exists, +) logger = get_logger(__file__) @@ -26,6 +28,7 @@ logger.info(f"Starting App : \n {ascii_art}") logger.info("App Ready") + @app.on_event("startup") async def startup_event(): """ @@ -38,6 +41,7 @@ async def startup_event(): else: logger.error(f"Knowledge base initialization failed: {result['message']}") + # CORS setup app.add_middleware( CORSMiddleware, diff --git a/backend/app/routes/llms_router.py b/backend/app/routes/llms_router.py index 529b66f..35ed27a 100644 --- a/backend/app/routes/llms_router.py +++ b/backend/app/routes/llms_router.py @@ -18,7 +18,9 @@ async def query_llm(conversation_history: dict): Returns: dict: The final response after processing the prompt with the tools. """ - result, bmkeys = await get_llm_response(conversation_history.get("conversation_history", [])) + result, bmkeys = await get_llm_response( + conversation_history.get("conversation_history", []) + ) return {"response": result, "bmkeys": bmkeys} diff --git a/backend/app/schemas/tool_schema.py b/backend/app/schemas/tool_schema.py index 979d4f3..a8e764e 100644 --- a/backend/app/schemas/tool_schema.py +++ b/backend/app/schemas/tool_schema.py @@ -2,25 +2,33 @@ from pydantic import BaseModel, Field from enum import Enum + class ParameterSchema(BaseModel): """Schema for function parameters""" + type: str = "object" properties: Dict[str, Dict[str, Any]] required: List[str] additionalProperties: bool = False + class FunctionDefinition(BaseModel): """Schema for function definition within a tool""" + name: str description: str parameters: ParameterSchema strict: bool = True + class ToolDefinition(BaseModel): """Schema for tool definition""" + type: str = "function" function: FunctionDefinition + class ToolDefinitions(BaseModel): """Schema for a list of tool definitions""" - tools: List[ToolDefinition] = Field(..., description="List of available tools") \ No newline at end of file + + tools: List[ToolDefinition] = Field(..., description="List of available tools") diff --git a/backend/app/services/knowledge_base_service.py b/backend/app/services/knowledge_base_service.py index 1cc7245..86f61d9 100644 --- a/backend/app/services/knowledge_base_service.py +++ b/backend/app/services/knowledge_base_service.py @@ -15,7 +15,9 @@ openai_client = get_openai_client() qdrant_client = get_qdrant_client() -markitdown_client = MarkItDown(llm_client=openai_client, model=settings.AZURE_DEPLOYMENT_NAME) +markitdown_client = MarkItDown( + llm_client=openai_client, model=settings.AZURE_DEPLOYMENT_NAME +) KB_COLLECTION_NAME = settings.QDRANT_COLLECTION_NAME @@ -43,6 +45,7 @@ def create_knowledge_base_collection_if_not_exists(): except Exception as e: return {"status": "error", "message": f"Error creating collection: {str(e)}"} + def embed_text(text: str): """ Embed a text string using Azure OpenAI. @@ -51,8 +54,7 @@ def embed_text(text: str): text (str): The text to embed. """ response = openai_client.embeddings.create( - input=text, - model=settings.AZURE_EMBEDDING_DEPLOYMENT_NAME + input=text, model=settings.AZURE_EMBEDDING_DEPLOYMENT_NAME ) return response.data[0].embedding @@ -241,6 +243,7 @@ def delete_knowledge_base_file( except Exception as e: return {"status": "error", "message": f"Error deleting file: {str(e)}"} + @observe(name="GET_SIMILAR_CHUNKS") def get_similar_chunks( collection_name: str = KB_COLLECTION_NAME, query: str = "", limit: int = 10 diff --git a/backend/app/services/llms_service.py b/backend/app/services/llms_service.py index c0b4d7d..7a04302 100644 --- a/backend/app/services/llms_service.py +++ b/backend/app/services/llms_service.py @@ -99,11 +99,11 @@ async def get_response_with_tools(conversation_history: list[dict]): # Send back the final response incorporating the tool result completion = client.chat.completions.create( - name = "GET_RESPONSE_WITH_TOOLS::PROCESS_TOOL_RESULTS", + name="GET_RESPONSE_WITH_TOOLS::PROCESS_TOOL_RESULTS", model=settings.AZURE_DEPLOYMENT_NAME, messages=messages, metadata={ - "tool_calls":tool_calls, + "tool_calls": tool_calls, }, ) @@ -125,14 +125,20 @@ async def analyse_vcml(biomodel_id: str): """ try: # Fetch VCML details - vcml = await get_vcml_file(biomodel_id) + logger.info(f"Fetching VCML file for biomodel: {biomodel_id}") + vcml = await get_vcml_file(biomodel_id, truncate=False) # Analyze VCML with LLM + logger.info( + f"Analyzing VCML file for biomodel: {biomodel_id} with content: {str(vcml[:500])}" + ) vcml_system_prompt = "You are a VCell BioModel Assistant, designed to help users understand and interact with biological models in VCell. Your task is to provide human-readable, concise responses based on the given VCML." vcml_prompt = f"Analyze the following VCML content for Biomodel {biomodel_id}: {str(vcml)}" vcml_analysis = await get_llm_response(vcml_system_prompt, vcml_prompt) return vcml_analysis except Exception as e: - logger.error(f"Error analyzing VCML for biomodel {biomodel_id}: {str(e)}") + logger.error( + f"Error analyzing VCML for biomodel {biomodel_id}: {str(e)}", exc_info=True + ) return f"An error occurred during VCML analysis: {str(e)}" diff --git a/backend/app/services/vcelldb_service.py b/backend/app/services/vcelldb_service.py index c0dcce4..df76028 100644 --- a/backend/app/services/vcelldb_service.py +++ b/backend/app/services/vcelldb_service.py @@ -1,5 +1,6 @@ from app.core.logger import get_logger import httpx +import asyncio from app.schemas.vcelldb_schema import BiomodelRequestParams, SimulationRequestParams from urllib.parse import urlencode, quote from langfuse import observe @@ -9,6 +10,31 @@ logger = get_logger("vcelldb_service") +async def check_vcell_connectivity() -> bool: + """ + Check if the VCell API is reachable by attempting to resolve the hostname. + + Returns: + bool: True if the API is reachable, False otherwise. + """ + try: + import socket + + hostname = "vcell.cam.uchc.edu" + logger.info(f"Checking connectivity to {hostname}") + + # Try to resolve the hostname + ip_address = socket.gethostbyname(hostname) + logger.info(f"Successfully resolved {hostname} to {ip_address}") + return True + except socket.gaierror as e: + logger.error(f"DNS resolution failed for {hostname}: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error checking connectivity: {e}") + return False + + @observe(name="FETCH_BIOMODELS") async def fetch_biomodels(params: BiomodelRequestParams) -> dict: """ @@ -74,25 +100,79 @@ async def fetch_simulation_details(params: SimulationRequestParams) -> dict: @observe(name="GET_VCML_FILE") -async def get_vcml_file(biomodel_id: str, truncate: bool = False) -> str: +async def get_vcml_file( + biomodel_id: str, truncate: bool = False, max_retries: int = 3 +) -> str: """ - Fetches the VCML file content for a given biomodel. + Fetches the VCML file content for a given biomodel with retry logic. Args: biomodel_id (str): ID of the biomodel. truncate (bool): Whether to truncate the VCML file. + max_retries (int): Maximum number of retry attempts. Returns: str: VCML content of the biomodel. """ - async with httpx.AsyncClient() as client: - response = await client.get( - f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.vcml" + logger.info(f"Fetching VCML file for biomodel: {biomodel_id}") + + # Check connectivity first + if not await check_vcell_connectivity(): + logger.error( + "VCell API is not reachable. Please check your network connection and DNS settings." + ) + raise Exception( + "VCell API is not reachable. Please check your network connection and DNS settings." ) - response.raise_for_status() - if truncate: - return response.text[:500] - else: - return response.text + + for attempt in range(max_retries + 1): + try: + url = f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.vcml" + logger.info( + f"Requesting URL: {url} (attempt {attempt + 1}/{max_retries + 1})" + ) + + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url) + logger.info(f"Response status: {response.status_code}") + logger.info(f"Response headers: {dict(response.headers)}") + response.raise_for_status() + + if truncate: + return response.text[:500] + else: + return response.text + + except httpx.HTTPStatusError as e: + logger.error( + f"HTTP error fetching VCML file for biomodel {biomodel_id}: {e.response.status_code} - {e.response.text}" + ) + if attempt == max_retries: + raise e + logger.warning(f"Retrying in {2 ** attempt} seconds...") + await asyncio.sleep(2**attempt) + + except httpx.RequestError as e: + logger.error( + f"Request error fetching VCML file for biomodel {biomodel_id}: {str(e)}" + ) + if attempt == max_retries: + raise e + logger.warning(f"Retrying in {2 ** attempt} seconds...") + await asyncio.sleep(2**attempt) + + except Exception as e: + logger.error( + f"Unexpected error fetching VCML file for biomodel {biomodel_id}: {str(e)}" + ) + if attempt == max_retries: + raise e + logger.warning(f"Retrying in {2 ** attempt} seconds...") + await asyncio.sleep(2**attempt) + + # This should never be reached, but just in case + raise Exception( + f"Failed to fetch VCML file for biomodel {biomodel_id} after {max_retries + 1} attempts" + ) @observe(name="GET_SBML_FILE") @@ -106,12 +186,29 @@ async def get_sbml_file(biomodel_id: str) -> str: Returns: str: SBML content of the biomodel. """ - async with httpx.AsyncClient() as client: - response = await client.get( - f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.sbml" + try: + url = f"{VCELL_API_BASE_URL}/biomodel/{biomodel_id}/biomodel.sbml" + logger.info(f"Requesting SBML file URL: {url}") + + async with httpx.AsyncClient(timeout=180.0) as client: + response = await client.get(url) + response.raise_for_status() + return response.text + except httpx.HTTPStatusError as e: + logger.error( + f"HTTP error fetching SBML file for biomodel {biomodel_id}: {e.response.status_code} - {e.response.text}" ) - response.raise_for_status() - return response.text + raise e + except httpx.RequestError as e: + logger.error( + f"Request error fetching SBML file for biomodel {biomodel_id}: {str(e)}" + ) + raise e + except Exception as e: + logger.error( + f"Unexpected error fetching SBML file for biomodel {biomodel_id}: {str(e)}" + ) + raise e @observe(name="GET_DIAGRAM_URL") diff --git a/backend/app/tests/test_llms_service.py b/backend/app/tests/test_llms_service.py new file mode 100644 index 0000000..15bf549 --- /dev/null +++ b/backend/app/tests/test_llms_service.py @@ -0,0 +1,90 @@ +import sys +import os +import pytest + +# This tells pytest that all tests in the file should run in asyncio mode. +pytestmark = pytest.mark.asyncio + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) + +from app.services.llms_service import ( + get_llm_response, + get_response_with_tools, + analyse_vcml, + analyse_diagram, +) + + +class TestLLMsService: + """Test class for LLMs service functions.""" + + async def test_get_llm_response_success(self): + """Test successful LLM response generation.""" + result = await get_llm_response( + system_prompt="Answer with a single word, say yes or no, nothing else. no matter what the user asks or says.", + user_prompt="Is the sky blue?", + ) + + assert isinstance(result, str) + assert "yes" in result.lower() + assert len(result) < 5 + + async def test_get_response_with_tools_no_tool_calls(self): + """Test response generation when no tools are called.""" + conversation_history = [{"role": "user", "content": "Hello"}] + + result, bmkeys = await get_response_with_tools(conversation_history) + + assert isinstance(result, str) + assert bmkeys == [] + + async def test_get_response_with_tools_with_tool_calls(self): + """Test response generation when tools are called.""" + conversation_history = [{"role": "user", "content": "Hello"}] + + result, bmkeys = await get_response_with_tools(conversation_history) + + assert isinstance(result, str) + assert bmkeys == [] + + conversation_history = [{"role": "user", "content": "List all calcium models."}] + + result, bmkeys = await get_response_with_tools(conversation_history) + + expected_bmkeys = [ + "273924831", + "271989751", + "254507626", + "211839191", + "211211962", + "191137435", + "114597194", + "13737035", + "2917788", + ] + + assert sorted(bmkeys) == sorted(expected_bmkeys) + assert "273924831" in result + assert "271989751" in result + assert "254507626" in result + assert "211839191" in result + assert "211211962" in result + assert "191137435" in result + assert "114597194" in result + assert "13737035" in result + assert "2917788" in result + + async def test_analyse_vcml_success(self): + """Test successful VCML analysis.""" + result = await analyse_vcml("273924831") + + assert isinstance(result, str) + assert "MouseSpermCalcium" in result + + async def test_analyse_diagram_success(self): + """Test successful diagram analysis.""" + result = await analyse_diagram("273924831") + + assert isinstance(result, str) + assert "diagram" in result.lower() + assert "mousespermcalcium" in result.lower() diff --git a/backend/app/tests/test_vcelldb_service.py b/backend/app/tests/test_vcelldb_service.py new file mode 100644 index 0000000..15afcd3 --- /dev/null +++ b/backend/app/tests/test_vcelldb_service.py @@ -0,0 +1,78 @@ +import sys +import os +import pytest + +# This tells pytest that all tests in the file should run in asyncio mode. +pytestmark = pytest.mark.asyncio + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) + +from app.services.vcelldb_service import ( + fetch_biomodels, + fetch_simulation_details, + get_vcml_file, + fetch_biomodel_applications_files, +) +from app.schemas.vcelldb_schema import BiomodelRequestParams, SimulationRequestParams + + +class TestVCellDBService: + """Test class for VCell DB service functions.""" + + async def test_fetch_biomodels_success(self): + """Test successful biomodel fetching.""" + # Create test parameters + params = BiomodelRequestParams( + bmId="", + bmName="calcium", + category="all", + owner="", + savedLow=None, + savedHigh=None, + startRow=1, + maxRows=1000, + orderBy="date_desc", + ) + + result = await fetch_biomodels(params) + + # Verify the result structure + assert result["models_count"] == 9 + assert "273924831" in result["unique_model_keys (bmkey)"] + assert "271989751" in result["unique_model_keys (bmkey)"] + assert result["data"][0]["bmName"] == "MouseSpermCalcium" + + async def test_fetch_simulation_details_success(self): + """Test successful simulation details fetching.""" + params = SimulationRequestParams(bmId="273924831", simId="263874941") + + result = await fetch_simulation_details(params) + + assert result["key"] == "263874941" + assert result["ownerName"] == "Juliajessica" + assert result["ownerKey"] == "121396185" + assert result["mathKey"] == "263874891" + + async def test_get_vcml_file_truncated(self): + """Test VCML file fetching with truncation.""" + result = await get_vcml_file("273924831", truncate=True) + + assert len(result) == 500 + assert "MouseSpermCalcium" in result + assert "=3.9" files = [ - {file = "pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e"}, - {file = "pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6"}, + {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"}, + {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"}, ] [package.dependencies] @@ -3462,13 +3462,13 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests [[package]] name = "pytest-asyncio" -version = "1.0.0" +version = "1.1.0" description = "Pytest support for asyncio" optional = false python-versions = ">=3.9" files = [ - {file = "pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3"}, - {file = "pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f"}, + {file = "pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf"}, + {file = "pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea"}, ] [package.dependencies] @@ -5054,4 +5054,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.12,<3.14" -content-hash = "326fec0f5bbcf14aac4cb5e501c18d0ddc66a6c7d030f12c40522869f0305edf" +content-hash = "f6b400e15f0c42f12d096e0baf35dcf45207c3164b2cbed4288a32eda8834264" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 2ff0ba0..4c0c64a 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -22,6 +22,8 @@ markitdown = {extras = ["all"], version = "^0.1.2"} notebook = "^7.4.4" ipykernel = "^6.30.0" langfuse = "^3.2.4" +pytest = "^8.4.1" +pytest-asyncio = "^1.1.0" [tool.poetry.group.dev.dependencies]