Merge pull request #6 from OHNLP/fix

JaerongA · web-flow · commit 70bbe5f97c4e · 2026-01-20T12:02:13.000-06:00
Bug fix &amp; error-handling
diff --git a/scripts/batch_mapping.py b/scripts/batch_mapping.py
@@ -57,23 +57,8 @@ async def main(llm_provider, data_path, batch_size):
                 "url": llm_response["url"],
                 "processing_time_sec": agent_result["processing_time_sec"],
                 "reason": llm_response["reason"],
-                "concept_exists": utils.concept_id_exists_in_athena(
-                    llm_response["concept_id"]
-                ),
             }
 
-            if llm_response["concept_id"] and llm_response["name"]:
-                athena_name = utils.get_concept_name_from_athena(
-                    llm_response["concept_id"]
-                )
-                if athena_name is not None:
-                    result["names_match"] = (
-                        athena_name.lower() == llm_response["name"].lower()
-                    )
-                else:
-                    result["names_match"] = None
-            else:
-                result["names_match"] = None
             results.append(result)
 
         batch_results = pd.DataFrame(results)
diff --git a/src/omop_mcp/__init__.py b/src/omop_mcp/__init__.py
@@ -1,11 +1,9 @@
-import asyncio
-
 from . import server
 
 
 def main():
     """Main entry point for the package."""
-    asyncio.run(server.main())
+    server.main()
 
 
 __all__ = ["main", "server"]
diff --git a/src/omop_mcp/server.py b/src/omop_mcp/server.py
@@ -12,6 +12,7 @@
 import mcp.types as types
 from mcp.server.fastmcp import FastMCP
 
+from omop_mcp import utils
 from omop_mcp.prompts import EXAMPLE_INPUT, EXAMPLE_OUTPUT, MCP_DOC_INSTRUCTION
 
 BASE_DIR = Path(__file__).parent
@@ -122,83 +123,56 @@ async def find_omop_concept(
         max_results: Maximum number of candidate concepts to return
 
     Returns:
-        Dict containing candidate concepts or error information.
+        Dict containing candidate concepts or error information if no results found.
     """
     logging.info(
         f"find_omop_concept called with keyword='{keyword}', omop_table='{omop_table}', omop_field='{omop_field}'"
     )
 
-    # Create a new session for each request
-    async with aiohttp.ClientSession() as session:
-        url = "https://athena.ohdsi.org/api/v1/concepts"
-        params = {"query": keyword}
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-            "Accept": "application/json, text/plain, */*",
-            "Accept-Language": "en-US,en;q=0.5",
-            "Referer": "https://athena.ohdsi.org/search-terms",
-            "Origin": "https://athena.ohdsi.org",
-        }
-
-        try:
-            async with session.get(url, params=params, headers=headers) as response:
-                response.raise_for_status()
-                data = await response.json()
-        except aiohttp.ClientError as e:
-            return {
-                "error": f"Failed to query Athena: {str(e)}",
-            }
-
-        logging.debug(f"Athena response: {data}")
-        concepts = []
-        if isinstance(data, dict) and "content" in data:
-            concepts = data["content"]
-        elif isinstance(data, list):
-            concepts = data
-        elif isinstance(data, dict):
-            for key in ("content", "results", "items", "concepts"):
-                if key in data and isinstance(data[key], list):
-                    concepts = data[key]
-                    break
-
-        if not concepts:
-            return {
-                "error": "No results found or unexpected response structure.",
-            }
-
-        # Return multiple candidates with all their metadata for LLM to evaluate
-        candidates = []
-        for i, c in enumerate(concepts[:max_results]):
-            candidate = {
-                "concept_id": c.get("id", ""),
-                "code": c.get("code", ""),
-                "name": c.get("name", ""),
-                "class": c.get("className", ""),
-                "concept": c.get("standardConcept", ""),
-                "validity": c.get("invalidReason", c.get("validity", "")),
-                "domain": c.get("domain", c.get("domainId", "")),
-                "vocab": c.get("vocabulary", c.get("vocabularyId", "")),
-                "url": f"https://athena.ohdsi.org/search-terms/terms/{c.get('id', '')}",
-            }
-            candidates.append(candidate)
+    try:
+        concepts = await utils.search_athena_concept_async(keyword)
+    except Exception as e:
+        logging.error(f"Athena API call failed: {e}")
+        raise RuntimeError(f"Athena API is not accessible: {e}") from e
 
+    if not concepts:
         return {
-            "candidates": candidates,
-            "search_metadata": {
-                "keyword_searched": keyword,
-                "omop_table": omop_table,
-                "omop_field": omop_field,
-                "total_found": len(concepts),
-                "candidates_returned": len(candidates),
-                "selection_guidance": (
-                    "Select the most appropriate concept based on clinical context. "
-                    "Access omop://preferred_vocabularies for vocabulary preferences. "
-                    "Generally prefer Standard + Valid concepts from recommended vocabularies, "
-                    "but context may require different choices (e.g., research needs, "
-                    "specific vocabulary requirements, or non-standard mappings)."
-                ),
-            },
+            "error": f"No results found for keyword '{keyword}'. The search term may not exist in the OMOP vocabulary.",
+        }
+
+    # Return multiple candidates with all their metadata for LLM to evaluate
+    candidates = []
+    for i, c in enumerate(concepts[:max_results]):
+        candidate = {
+            "concept_id": c.get("id", ""),
+            "code": c.get("code", ""),
+            "name": c.get("name", ""),
+            "class": c.get("className", ""),
+            "concept": c.get("standardConcept", ""),
+            "validity": c.get("invalidReason", c.get("validity", "")),
+            "domain": c.get("domain", c.get("domainId", "")),
+            "vocab": c.get("vocabulary", c.get("vocabularyId", "")),
+            "url": f"https://athena.ohdsi.org/search-terms/terms/{c.get('id', '')}",
         }
+        candidates.append(candidate)
+
+    return {
+        "candidates": candidates,
+        "search_metadata": {
+            "keyword_searched": keyword,
+            "omop_table": omop_table,
+            "omop_field": omop_field,
+            "total_found": len(concepts),
+            "candidates_returned": len(candidates),
+            "selection_guidance": (
+                "Select the most appropriate concept based on clinical context. "
+                "Access omop://preferred_vocabularies for vocabulary preferences. "
+                "Generally prefer Standard + Valid concepts from recommended vocabularies, "
+                "but context may require different choices (e.g., research needs, "
+                "specific vocabulary requirements, or non-standard mappings)."
+            ),
+        },
+    }
 
 
 @mcp.tool()
diff --git a/src/omop_mcp/utils.py b/src/omop_mcp/utils.py
@@ -1,44 +1,70 @@
 import re
 
+import aiohttp
 import requests
 
+# Shared constants
+ATHENA_SEARCH_URL = "https://athena.ohdsi.org/search-terms"
+ATHENA_API_URL = "https://athena.ohdsi.org/api/v1/concepts"
+
+# Shared headers
+INITIAL_PAGE_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+}
+
+API_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    "Accept": "application/json",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Referer": "https://athena.ohdsi.org/search-terms",
+    "Origin": "https://athena.ohdsi.org",
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "Sec-Ch-Ua": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"macOS"',
+}
+
+
+def _extract_concepts_from_response(data):
+    """
+    Extract concepts list from Athena API response.
+    Handles various response formats.
+    """
+    if isinstance(data, dict) and "content" in data:
+        return data["content"]
+    elif isinstance(data, list):
+        return data
+    elif isinstance(data, dict):
+        for key in ("content", "results", "items", "concepts"):
+            if key in data and isinstance(data[key], list):
+                return data[key]
+    return []
+
 
 def search_athena_concept(keyword: str):
     """
     Search for OMOP concepts using the Athena web interface.
     This function scrapes the search results from the Athena website.
     """
-    url = "https://athena.ohdsi.org/api/v1/concepts"
+    session = requests.Session()
+    try:
+        session.get(ATHENA_SEARCH_URL, headers=INITIAL_PAGE_HEADERS, timeout=10)
+    except Exception:
+        pass
+
     params = {"query": keyword}
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        "Accept": "application/json, text/plain, */*",
-        "Accept-Language": "en-US,en;q=0.5",
-        "Referer": "https://athena.ohdsi.org/search-terms",
-        "Origin": "https://athena.ohdsi.org",
-    }
 
     try:
-        response = requests.get(url, params=params, headers=headers, timeout=10)
+        response = session.get(
+            ATHENA_API_URL, params=params, headers=API_HEADERS, timeout=10
+        )
         response.raise_for_status()
         data = response.json()
-
-        # Extract concepts from the response
-        concepts = []
-        if isinstance(data, dict) and "content" in data:
-            concepts = data["content"]
-        elif isinstance(data, list):
-            concepts = data
-        elif isinstance(data, dict):
-            for key in ("content", "results", "items", "concepts"):
-                if key in data and isinstance(data[key], list):
-                    concepts = data[key]
-                    break
-        else:
-            concepts = []
-
-        return concepts
-
+        return _extract_concepts_from_response(data)
     except requests.exceptions.RequestException as e:
         print(f"Error searching Athena: {e}")
         return []
@@ -47,6 +73,49 @@ def search_athena_concept(keyword: str):
         return []
 
 
+async def search_athena_concept_async(keyword: str):
+    """
+    Async version of search_athena_concept using aiohttp.
+
+    Raises RuntimeError for API failures (connection/HTTP errors).
+    Returns empty list for successful API calls with no results.
+    """
+    async with aiohttp.ClientSession() as session:
+        # First, visit the search page to establish a session and get cookies
+        try:
+            async with session.get(
+                ATHENA_SEARCH_URL,
+                headers=INITIAL_PAGE_HEADERS,
+                timeout=aiohttp.ClientTimeout(total=10),
+            ) as _:
+                pass
+        except aiohttp.ClientError as e:
+            raise RuntimeError(
+                f"Failed to establish session with Athena API: {e}"
+            ) from e
+        except Exception as e:
+            raise RuntimeError(
+                f"Unexpected error establishing Athena session: {e}"
+            ) from e
+
+        params = {"query": keyword}
+
+        try:
+            async with session.get(
+                ATHENA_API_URL,
+                params=params,
+                headers=API_HEADERS,
+                timeout=aiohttp.ClientTimeout(total=10),
+            ) as response:
+                response.raise_for_status()
+                data = await response.json()
+                return _extract_concepts_from_response(data)
+        except aiohttp.ClientError as e:
+            raise RuntimeError(f"Athena API request failed: {e}") from e
+        except Exception as e:
+            raise RuntimeError(f"Unexpected error calling Athena API: {e}") from e
+
+
 def concept_id_exists_in_athena(concept_id: str) -> bool:
     """Check if a concept exists in Athena."""
     results = search_athena_concept(concept_id)
diff --git a/uv.lock b/uv.lock