|
1 | 1 | from app.core.logger import get_logger |
2 | 2 | import httpx |
3 | 3 | import asyncio |
| 4 | +import re |
4 | 5 | from app.schemas.vcelldb_schema import BiomodelRequestParams, SimulationRequestParams |
5 | 6 | from urllib.parse import urlencode, quote |
6 | 7 | from langfuse import observe |
|
10 | 11 | logger = get_logger("vcelldb_service") |
11 | 12 |
|
12 | 13 |
|
| 14 | +def sanitize_vcml_content(vcml_content: str) -> str: |
| 15 | + """ |
| 16 | + Sanitizes VCML content by removing all Image tags and their content. |
| 17 | + |
| 18 | + Args: |
| 19 | + vcml_content (str): Raw VCML content as string. |
| 20 | + |
| 21 | + Returns: |
| 22 | + str: Sanitized VCML content with all Image tags removed. |
| 23 | + """ |
| 24 | + # Remove all Image tags and their content using regex |
| 25 | + # This pattern matches <Image ...> ... </Image> including nested content |
| 26 | + # The pattern handles multiline content and preserves the rest of the XML structure |
| 27 | + sanitized_content = re.sub( |
| 28 | + r'<Image[^>]*>.*?</Image>', |
| 29 | + '', |
| 30 | + vcml_content, |
| 31 | + flags=re.DOTALL | re.MULTILINE |
| 32 | + ) |
| 33 | + |
| 34 | + # Clean up any extra whitespace that might be left after removing images |
| 35 | + sanitized_content = re.sub(r'\n\s*\n', '\n', sanitized_content) |
| 36 | + |
| 37 | + logger.info("VCML content sanitized: Image tags removed") |
| 38 | + return sanitized_content |
| 39 | + |
| 40 | + |
13 | 41 | async def check_vcell_connectivity() -> bool: |
14 | 42 | """ |
15 | 43 | Check if the VCell API is reachable by attempting to resolve the hostname. |
@@ -138,9 +166,9 @@ async def get_vcml_file( |
138 | 166 | response.raise_for_status() |
139 | 167 |
|
140 | 168 | if truncate: |
141 | | - return response.text[:500] |
| 169 | + return sanitize_vcml_content(response.text[:500]) |
142 | 170 | else: |
143 | | - return response.text |
| 171 | + return sanitize_vcml_content(response.text) |
144 | 172 |
|
145 | 173 | except httpx.HTTPStatusError as e: |
146 | 174 | logger.error( |
|
0 commit comments