Skip to content

Commit 70bbe5f

Browse files
authored
Merge pull request #6 from OHNLP/fix
Bug fix & error-handling
2 parents b975233 + a622bd8 commit 70bbe5f

File tree

5 files changed

+163
-113
lines changed

5 files changed

+163
-113
lines changed

scripts/batch_mapping.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,8 @@ async def main(llm_provider, data_path, batch_size):
5757
"url": llm_response["url"],
5858
"processing_time_sec": agent_result["processing_time_sec"],
5959
"reason": llm_response["reason"],
60-
"concept_exists": utils.concept_id_exists_in_athena(
61-
llm_response["concept_id"]
62-
),
6360
}
6461

65-
if llm_response["concept_id"] and llm_response["name"]:
66-
athena_name = utils.get_concept_name_from_athena(
67-
llm_response["concept_id"]
68-
)
69-
if athena_name is not None:
70-
result["names_match"] = (
71-
athena_name.lower() == llm_response["name"].lower()
72-
)
73-
else:
74-
result["names_match"] = None
75-
else:
76-
result["names_match"] = None
7762
results.append(result)
7863

7964
batch_results = pd.DataFrame(results)

src/omop_mcp/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import asyncio
2-
31
from . import server
42

53

64
def main():
75
"""Main entry point for the package."""
8-
asyncio.run(server.main())
6+
server.main()
97

108

119
__all__ = ["main", "server"]

src/omop_mcp/server.py

Lines changed: 43 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import mcp.types as types
1313
from mcp.server.fastmcp import FastMCP
1414

15+
from omop_mcp import utils
1516
from omop_mcp.prompts import EXAMPLE_INPUT, EXAMPLE_OUTPUT, MCP_DOC_INSTRUCTION
1617

1718
BASE_DIR = Path(__file__).parent
@@ -122,83 +123,56 @@ async def find_omop_concept(
122123
max_results: Maximum number of candidate concepts to return
123124
124125
Returns:
125-
Dict containing candidate concepts or error information.
126+
Dict containing candidate concepts or error information if no results found.
126127
"""
127128
logging.info(
128129
f"find_omop_concept called with keyword='{keyword}', omop_table='{omop_table}', omop_field='{omop_field}'"
129130
)
130131

131-
# Create a new session for each request
132-
async with aiohttp.ClientSession() as session:
133-
url = "https://athena.ohdsi.org/api/v1/concepts"
134-
params = {"query": keyword}
135-
headers = {
136-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
137-
"Accept": "application/json, text/plain, */*",
138-
"Accept-Language": "en-US,en;q=0.5",
139-
"Referer": "https://athena.ohdsi.org/search-terms",
140-
"Origin": "https://athena.ohdsi.org",
141-
}
142-
143-
try:
144-
async with session.get(url, params=params, headers=headers) as response:
145-
response.raise_for_status()
146-
data = await response.json()
147-
except aiohttp.ClientError as e:
148-
return {
149-
"error": f"Failed to query Athena: {str(e)}",
150-
}
151-
152-
logging.debug(f"Athena response: {data}")
153-
concepts = []
154-
if isinstance(data, dict) and "content" in data:
155-
concepts = data["content"]
156-
elif isinstance(data, list):
157-
concepts = data
158-
elif isinstance(data, dict):
159-
for key in ("content", "results", "items", "concepts"):
160-
if key in data and isinstance(data[key], list):
161-
concepts = data[key]
162-
break
163-
164-
if not concepts:
165-
return {
166-
"error": "No results found or unexpected response structure.",
167-
}
168-
169-
# Return multiple candidates with all their metadata for LLM to evaluate
170-
candidates = []
171-
for i, c in enumerate(concepts[:max_results]):
172-
candidate = {
173-
"concept_id": c.get("id", ""),
174-
"code": c.get("code", ""),
175-
"name": c.get("name", ""),
176-
"class": c.get("className", ""),
177-
"concept": c.get("standardConcept", ""),
178-
"validity": c.get("invalidReason", c.get("validity", "")),
179-
"domain": c.get("domain", c.get("domainId", "")),
180-
"vocab": c.get("vocabulary", c.get("vocabularyId", "")),
181-
"url": f"https://athena.ohdsi.org/search-terms/terms/{c.get('id', '')}",
182-
}
183-
candidates.append(candidate)
132+
try:
133+
concepts = await utils.search_athena_concept_async(keyword)
134+
except Exception as e:
135+
logging.error(f"Athena API call failed: {e}")
136+
raise RuntimeError(f"Athena API is not accessible: {e}") from e
184137

138+
if not concepts:
185139
return {
186-
"candidates": candidates,
187-
"search_metadata": {
188-
"keyword_searched": keyword,
189-
"omop_table": omop_table,
190-
"omop_field": omop_field,
191-
"total_found": len(concepts),
192-
"candidates_returned": len(candidates),
193-
"selection_guidance": (
194-
"Select the most appropriate concept based on clinical context. "
195-
"Access omop://preferred_vocabularies for vocabulary preferences. "
196-
"Generally prefer Standard + Valid concepts from recommended vocabularies, "
197-
"but context may require different choices (e.g., research needs, "
198-
"specific vocabulary requirements, or non-standard mappings)."
199-
),
200-
},
140+
"error": f"No results found for keyword '{keyword}'. The search term may not exist in the OMOP vocabulary.",
141+
}
142+
143+
# Return multiple candidates with all their metadata for LLM to evaluate
144+
candidates = []
145+
for i, c in enumerate(concepts[:max_results]):
146+
candidate = {
147+
"concept_id": c.get("id", ""),
148+
"code": c.get("code", ""),
149+
"name": c.get("name", ""),
150+
"class": c.get("className", ""),
151+
"concept": c.get("standardConcept", ""),
152+
"validity": c.get("invalidReason", c.get("validity", "")),
153+
"domain": c.get("domain", c.get("domainId", "")),
154+
"vocab": c.get("vocabulary", c.get("vocabularyId", "")),
155+
"url": f"https://athena.ohdsi.org/search-terms/terms/{c.get('id', '')}",
201156
}
157+
candidates.append(candidate)
158+
159+
return {
160+
"candidates": candidates,
161+
"search_metadata": {
162+
"keyword_searched": keyword,
163+
"omop_table": omop_table,
164+
"omop_field": omop_field,
165+
"total_found": len(concepts),
166+
"candidates_returned": len(candidates),
167+
"selection_guidance": (
168+
"Select the most appropriate concept based on clinical context. "
169+
"Access omop://preferred_vocabularies for vocabulary preferences. "
170+
"Generally prefer Standard + Valid concepts from recommended vocabularies, "
171+
"but context may require different choices (e.g., research needs, "
172+
"specific vocabulary requirements, or non-standard mappings)."
173+
),
174+
},
175+
}
202176

203177

204178
@mcp.tool()

src/omop_mcp/utils.py

Lines changed: 95 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,70 @@
11
import re
22

3+
import aiohttp
34
import requests
45

6+
# Shared constants
7+
ATHENA_SEARCH_URL = "https://athena.ohdsi.org/search-terms"
8+
ATHENA_API_URL = "https://athena.ohdsi.org/api/v1/concepts"
9+
10+
# Shared headers
11+
INITIAL_PAGE_HEADERS = {
12+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
13+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
14+
"Accept-Language": "en-US,en;q=0.9",
15+
}
16+
17+
API_HEADERS = {
18+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
19+
"Accept": "application/json",
20+
"Accept-Language": "en-US,en;q=0.9",
21+
"Referer": "https://athena.ohdsi.org/search-terms",
22+
"Origin": "https://athena.ohdsi.org",
23+
"Sec-Fetch-Dest": "empty",
24+
"Sec-Fetch-Mode": "cors",
25+
"Sec-Fetch-Site": "same-origin",
26+
"Sec-Ch-Ua": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
27+
"Sec-Ch-Ua-Mobile": "?0",
28+
"Sec-Ch-Ua-Platform": '"macOS"',
29+
}
30+
31+
32+
def _extract_concepts_from_response(data):
33+
"""
34+
Extract concepts list from Athena API response.
35+
Handles various response formats.
36+
"""
37+
if isinstance(data, dict) and "content" in data:
38+
return data["content"]
39+
elif isinstance(data, list):
40+
return data
41+
elif isinstance(data, dict):
42+
for key in ("content", "results", "items", "concepts"):
43+
if key in data and isinstance(data[key], list):
44+
return data[key]
45+
return []
46+
547

648
def search_athena_concept(keyword: str):
749
"""
850
Search for OMOP concepts using the Athena web interface.
951
This function scrapes the search results from the Athena website.
1052
"""
11-
url = "https://athena.ohdsi.org/api/v1/concepts"
53+
session = requests.Session()
54+
try:
55+
session.get(ATHENA_SEARCH_URL, headers=INITIAL_PAGE_HEADERS, timeout=10)
56+
except Exception:
57+
pass
58+
1259
params = {"query": keyword}
13-
headers = {
14-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
15-
"Accept": "application/json, text/plain, */*",
16-
"Accept-Language": "en-US,en;q=0.5",
17-
"Referer": "https://athena.ohdsi.org/search-terms",
18-
"Origin": "https://athena.ohdsi.org",
19-
}
2060

2161
try:
22-
response = requests.get(url, params=params, headers=headers, timeout=10)
62+
response = session.get(
63+
ATHENA_API_URL, params=params, headers=API_HEADERS, timeout=10
64+
)
2365
response.raise_for_status()
2466
data = response.json()
25-
26-
# Extract concepts from the response
27-
concepts = []
28-
if isinstance(data, dict) and "content" in data:
29-
concepts = data["content"]
30-
elif isinstance(data, list):
31-
concepts = data
32-
elif isinstance(data, dict):
33-
for key in ("content", "results", "items", "concepts"):
34-
if key in data and isinstance(data[key], list):
35-
concepts = data[key]
36-
break
37-
else:
38-
concepts = []
39-
40-
return concepts
41-
67+
return _extract_concepts_from_response(data)
4268
except requests.exceptions.RequestException as e:
4369
print(f"Error searching Athena: {e}")
4470
return []
@@ -47,6 +73,49 @@ def search_athena_concept(keyword: str):
4773
return []
4874

4975

76+
async def search_athena_concept_async(keyword: str):
77+
"""
78+
Async version of search_athena_concept using aiohttp.
79+
80+
Raises RuntimeError for API failures (connection/HTTP errors).
81+
Returns empty list for successful API calls with no results.
82+
"""
83+
async with aiohttp.ClientSession() as session:
84+
# First, visit the search page to establish a session and get cookies
85+
try:
86+
async with session.get(
87+
ATHENA_SEARCH_URL,
88+
headers=INITIAL_PAGE_HEADERS,
89+
timeout=aiohttp.ClientTimeout(total=10),
90+
) as _:
91+
pass
92+
except aiohttp.ClientError as e:
93+
raise RuntimeError(
94+
f"Failed to establish session with Athena API: {e}"
95+
) from e
96+
except Exception as e:
97+
raise RuntimeError(
98+
f"Unexpected error establishing Athena session: {e}"
99+
) from e
100+
101+
params = {"query": keyword}
102+
103+
try:
104+
async with session.get(
105+
ATHENA_API_URL,
106+
params=params,
107+
headers=API_HEADERS,
108+
timeout=aiohttp.ClientTimeout(total=10),
109+
) as response:
110+
response.raise_for_status()
111+
data = await response.json()
112+
return _extract_concepts_from_response(data)
113+
except aiohttp.ClientError as e:
114+
raise RuntimeError(f"Athena API request failed: {e}") from e
115+
except Exception as e:
116+
raise RuntimeError(f"Unexpected error calling Athena API: {e}") from e
117+
118+
50119
def concept_id_exists_in_athena(concept_id: str) -> bool:
51120
"""Check if a concept exists in Athena."""
52121
results = search_athena_concept(concept_id)

uv.lock

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)