biocontext-ai
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/nucleotide_archive_mcp/tools/downloads.py‎
Lines changed: 71 additions & 67 deletions b/‎src/nucleotide_archive_mcp/tools/downloads.py‎
Lines changed: 71 additions & 67 deletions
diff --git a/‎src/nucleotide_archive_mcp/tools/metadata.py‎
Lines changed: 58 additions & 69 deletions b/‎src/nucleotide_archive_mcp/tools/metadata.py‎
Lines changed: 58 additions & 69 deletions
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
 
 [project]
 name = "nucleotide-archive-mcp"
-version = "0.0.3"
+version = "0.0.4"
 description = "MCP server for searching European Nucleotide Archive (ENA) datasets. Find RNA-seq studies, retrieve metadata, and discover related publications to validate research hypotheses."
 readme = "README.md"
 license = { file = "LICENSE" }
 
@@ -4,6 +4,7 @@
 from typing import Annotated
 
 import httpx
+from pydantic import Field
 
 from nucleotide_archive_mcp.ena_client import ENAClient
 from nucleotide_archive_mcp.mcp import mcp
@@ -107,100 +108,103 @@ async def _fetch_download_urls(
 
 @mcp.tool
 async def get_download_urls(
-    study_accession: Annotated[str, "Study accession from search_rna_studies (e.g., 'PRJDB2345')"],
-    file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq",
-    include_md5: Annotated[bool, "Include MD5 checksums for file verification"] = True,
+    study_accession: Annotated[
+        str,
+        Field(
+            description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats",
+            examples=["PRJDB2345", "PRJNA123456", "SRP417965"],
+        ),
+    ],
+    file_format: Annotated[
+        str,
+        Field(
+            description="File format to download (fastq, submitted, or sra). FASTQ is most common",
+            examples=["fastq", "submitted", "sra"],
+        ),
+    ] = "fastq",
+    include_md5: Annotated[
+        bool,
+        Field(
+            description="Include MD5 checksums for file integrity verification",
+        ),
+    ] = True,
 ) -> dict:
     """Get FTP download URLs for all sequencing data files in a study.
 
-    **LLM Usage**: Call after search_rna_studies() to get download URLs for selected studies.
-    Returns FTP URLs that can be used with wget/curl or passed to generate_download_script().
-
-    Parameters
+    Usage Tips
     ----------
-    study_accession : str
-        Study accession from search results (e.g., "PRJDB2345", "PRJNA123456", "SRP417965")
-    file_format : str, optional
-        File format: "fastq" (processed FASTQ files, most common), "submitted" (original
-        submitted files), "sra" (SRA format). Default: "fastq"
-    include_md5 : bool, optional
-        Include MD5 checksums for file integrity verification. Default: True
+    Call after search_rna_studies() to get download URLs for selected studies. Returns FTP URLs
+    that can be used with wget/curl or passed to generate_download_script().
 
     Returns
     -------
     dict
-        - study_accession (str): Queried study
-        - file_count (int): Total number of files
-        - total_size_gb (float): Total download size in GB
-        - runs (list[dict]): Per-run file info, each containing:
+        Dictionary containing:
+        - study_accession: Queried study
+        - file_count: Total number of files
+        - total_size_gb: Total download size in GB
+        - runs: List of per-run file info, each with:
             - run_accession: Run identifier
             - file_count: Files in this run (2 for paired-end)
             - size_gb: Run size in GB
             - urls: List of FTP URLs (ftp://...)
             - md5_checksums: List of MD5 hashes (if include_md5=True)
-
-    Examples
-    --------
-    Get FASTQ URLs after finding studies:
-        study_accession="PRJDB2345"
-
-    Check file sizes before downloading:
-        study_accession="SRP417965", file_format="fastq"
+        - message: Info message if no files found
+        - error: Error message if any
     """
     return await _fetch_download_urls(study_accession, file_format, include_md5)
 
 
 @mcp.tool
 async def generate_download_script(
-    study_accession: Annotated[str, "Study accession from search_rna_studies"],
-    output_path: Annotated[str | None, "Save path for script (e.g., './download.sh'). None=return only"] = None,
-    script_type: Annotated[str, "Download tool: 'wget' or 'curl'"] = "wget",
-    file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq",
+    study_accession: Annotated[
+        str,
+        Field(
+            description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats",
+            examples=["PRJDB2345", "SRP417965", "PRJNA123456"],
+        ),
+    ],
+    output_path: Annotated[
+        str | None,
+        Field(
+            description="File path to save script (e.g., './download.sh'). If None, returns script content without saving. Script will be made executable (chmod 755)",
+            examples=["./download.sh", "./download_study.sh", None],
+        ),
+    ] = None,
+    script_type: Annotated[
+        str,
+        Field(
+            description="Download tool to use (wget or curl). wget is recommended for resumable downloads with -nc flag",
+            examples=["wget", "curl"],
+        ),
+    ] = "wget",
+    file_format: Annotated[
+        str,
+        Field(
+            description="File format to download (fastq, submitted, or sra). FASTQ is most common",
+            examples=["fastq", "submitted", "sra"],
+        ),
+    ] = "fastq",
 ) -> dict:
     """Generate executable bash script to download all study data files.
 
-    **LLM Usage**: After identifying interesting studies, generate a download script for the
-    user to execute. Returns script content and optionally saves to file. Script includes
-    MD5 verification commands.
-
-    **Typical workflow**:
-    1. search_rna_studies() → find studies
-    2. get_study_details() → verify it's the right study
-    3. generate_download_script() → create download script
-    4. User executes the script to download data
-
-    Parameters
+    Usage Tips
     ----------
-    study_accession : str
-        Study accession (e.g., "PRJDB2345", "SRP417965")
-    output_path : str, optional
-        File path to save script (e.g., "./download_study.sh"). If None, returns
-        script content without saving. Script will be made executable (chmod 755).
-    script_type : str, optional
-        Download tool: "wget" (recommended, resumable with -nc) or "curl" (resumable with -C -)
-    file_format : str, optional
-        File format: "fastq" (most common), "submitted", "sra"
+    After identifying interesting studies, generate a download script for the user to execute.
+    Returns script content and optionally saves to file. Script includes MD5 verification commands.
+    Typical workflow: search_rna_studies() → get_study_details() → generate_download_script().
 
     Returns
     -------
     dict
-        - study_accession (str): Queried study
-        - script_content (str): Complete bash script (can be directly executed)
-        - file_count (int): Number of files script will download
-        - total_size_gb (float): Total download size
-        - script_path (str): Save location (if output_path provided)
-        - message (str): Success/error message
-
-    Examples
-    --------
-    Generate and return wget script:
-        study_accession="PRJDB2345"
-
-    Save wget script to file (recommended):
-        study_accession="SRP417965", output_path="./download_srp417965.sh"
-
-    Generate curl-based script:
-        study_accession="PRJDB2345", script_type="curl"
+        Dictionary containing:
+        - study_accession: Queried study
+        - script_content: Complete bash script ready to execute
+        - file_count: Number of files the script will download
+        - total_size_gb: Total download size in GB
+        - script_path: Save location (if output_path provided)
+        - message: Success message (if saved to file)
+        - error: Error message if any
     """
     # First get the download URLs
     url_data = await _fetch_download_urls(
 
@@ -1,54 +1,48 @@
 """Tools for discovering available fields and metadata."""
 
-from typing import Any
+from typing import Annotated, Any
 
 import httpx
+from pydantic import Field
 
 from nucleotide_archive_mcp.ena_client import ENAClient
 from nucleotide_archive_mcp.mcp import mcp
 
 
 @mcp.tool
 async def get_available_fields(
-    result_type: str = "read_study",
-    field_category: str = "all",
+    result_type: Annotated[
+        str,
+        Field(
+            description="Type of ENA data to query (read_study, study, sample, read_run, read_experiment, analysis)",
+            examples=["read_study", "sample", "read_run"],
+        ),
+    ] = "read_study",
+    field_category: Annotated[
+        str,
+        Field(
+            description="Which fields to return (all, search, return)",
+            examples=["all", "search", "return"],
+        ),
+    ] = "all",
 ) -> dict:
     """Get available search and return fields for an ENA result type.
 
-    This tool helps you discover what fields you can search on and what
-    metadata fields are available for a given data type in ENA.
-
-    Parameters
+    Usage Tips
     ----------
-    result_type : str, optional
-        Type of data to query. Common options:
-        - "read_study": RNA-seq studies (default, recommended)
-        - "study": All studies
-        - "sample": Sample records
-        - "read_run": Individual sequencing runs
-        - "read_experiment": Sequencing experiments
-        - "analysis": Analysis records
-    field_category : str, optional
-        Which fields to return:
-        - "all": Both search and return fields (default)
-        - "search": Only searchable fields
-        - "return": Only returnable fields
+    Use to discover what fields you can search on and what metadata fields are available
+    for a given data type in ENA. Helpful for building custom queries with build_custom_query().
 
     Returns
     -------
     dict
         Dictionary containing:
         - result_type: The queried result type
-        - search_fields: List of searchable fields (if requested)
-        - return_fields: List of returnable fields (if requested)
-
-    Examples
-    --------
-    Get all fields for RNA-seq studies:
-        result_type="read_study"
-
-    Get only searchable fields for samples:
-        result_type="sample", field_category="search"
+        - search_fields: List of searchable fields with id, description, type (if requested)
+        - search_fields_count: Number of search fields (if requested)
+        - return_fields: List of returnable fields with id, description, type (if requested)
+        - return_fields_count: Number of return fields (if requested)
+        - error: Error message if any
     """
     client = ENAClient()
 
@@ -96,20 +90,19 @@ async def get_available_fields(
 async def get_result_types() -> dict:
     """Get all available result types (data categories) in ENA.
 
-    This tool shows what types of data you can search for in the
-    European Nucleotide Archive.
+    Usage Tips
+    ----------
+    Use to discover what types of data you can search for in the European Nucleotide Archive.
+    Most users will use read_study or study for RNA-seq searches.
 
     Returns
     -------
     dict
         Dictionary containing:
         - count: Number of available result types
-        - result_types: List of result types with descriptions
-
-    Examples
-    --------
-    Discover what data types are available:
-        (no parameters needed)
+        - result_types: List of result types with id, description, primaryAccessionType, recordCount, lastUpdated
+        - recommended_for_rna_studies: Recommended types for RNA studies
+        - error: Error message if any
     """
     client = ENAClient()
 
@@ -145,47 +138,43 @@ async def get_result_types() -> dict:
 
 @mcp.tool
 async def build_custom_query(
-    field_conditions: list[dict[str, str]],
-    operator: str = "AND",
+    field_conditions: Annotated[
+        list[dict[str, str]],
+        Field(
+            description='List of conditions, each with "field", "operator" (=, >=, <=, !=, contains), and "value"',
+            examples=[
+                [
+                    {"field": "tax_id", "operator": "=", "value": "9606"},
+                    {"field": "library_strategy", "operator": "=", "value": "RNA-Seq"},
+                ]
+            ],
+        ),
+    ],
+    operator: Annotated[
+        str,
+        Field(
+            description="Logical operator to combine conditions (AND or OR)",
+            examples=["AND", "OR"],
+        ),
+    ] = "AND",
 ) -> dict:
     """Build a custom ENA query from field conditions.
 
-    This advanced tool helps construct complex queries by combining multiple
-    field conditions with logical operators. Useful for precise filtering.
-
-    Parameters
+    Usage Tips
     ----------
-    field_conditions : list[dict]
-        List of conditions, each with keys:
-        - "field": Field name (e.g., "tax_id", "library_strategy")
-        - "operator": Comparison operator ("=", ">=", "<=", "!=", or "contains")
-        - "value": Value to compare
-    operator : str, optional
-        Logical operator to combine conditions: "AND" or "OR" (default: "AND")
+    Advanced tool for constructing complex queries by combining multiple field conditions with
+    logical operators. Use for precise filtering beyond what search_rna_studies() offers.
+    Call get_available_fields() first to discover searchable field names.
 
     Returns
     -------
     dict
         Dictionary containing:
         - query: The constructed ENA query string
-        - field_count: Number of conditions
-        - example_usage: How to use this query
-
-    Examples
-    --------
-    Build a query for human RNA-seq studies after 2020:
-        field_conditions=[
-            {"field": "tax_id", "operator": "=", "value": "9606"},
-            {"field": "library_strategy", "operator": "=", "value": "RNA-Seq"},
-            {"field": "first_public", "operator": ">=", "value": "2020-01-01"}
-        ]
-
-    Find studies with specific keywords:
-        field_conditions=[
-            {"field": "study_title", "operator": "contains", "value": "cancer"},
-            {"field": "study_description", "operator": "contains", "value": "treatment"}
-        ],
-        operator="OR"
+        - field_count: Number of conditions used
+        - operator: Logical operator used
+        - example_usage: How to use this query with other tools
+        - error: Error message if any
     """
     if not field_conditions:
         return {