Skip to content

Commit 8f3f95a

Browse files
committed
Improve docstrings
1 parent a79ff53 commit 8f3f95a

File tree

6 files changed

+320
-329
lines changed

6 files changed

+320
-329
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
44

55
[project]
66
name = "nucleotide-archive-mcp"
7-
version = "0.0.3"
7+
version = "0.0.4"
88
description = "MCP server for searching European Nucleotide Archive (ENA) datasets. Find RNA-seq studies, retrieve metadata, and discover related publications to validate research hypotheses."
99
readme = "README.md"
1010
license = { file = "LICENSE" }

src/nucleotide_archive_mcp/tools/downloads.py

Lines changed: 71 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Annotated
55

66
import httpx
7+
from pydantic import Field
78

89
from nucleotide_archive_mcp.ena_client import ENAClient
910
from nucleotide_archive_mcp.mcp import mcp
@@ -107,100 +108,103 @@ async def _fetch_download_urls(
107108

108109
@mcp.tool
109110
async def get_download_urls(
110-
study_accession: Annotated[str, "Study accession from search_rna_studies (e.g., 'PRJDB2345')"],
111-
file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq",
112-
include_md5: Annotated[bool, "Include MD5 checksums for file verification"] = True,
111+
study_accession: Annotated[
112+
str,
113+
Field(
114+
description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats",
115+
examples=["PRJDB2345", "PRJNA123456", "SRP417965"],
116+
),
117+
],
118+
file_format: Annotated[
119+
str,
120+
Field(
121+
description="File format to download (fastq, submitted, or sra). FASTQ is most common",
122+
examples=["fastq", "submitted", "sra"],
123+
),
124+
] = "fastq",
125+
include_md5: Annotated[
126+
bool,
127+
Field(
128+
description="Include MD5 checksums for file integrity verification",
129+
),
130+
] = True,
113131
) -> dict:
114132
"""Get FTP download URLs for all sequencing data files in a study.
115133
116-
**LLM Usage**: Call after search_rna_studies() to get download URLs for selected studies.
117-
Returns FTP URLs that can be used with wget/curl or passed to generate_download_script().
118-
119-
Parameters
134+
Usage Tips
120135
----------
121-
study_accession : str
122-
Study accession from search results (e.g., "PRJDB2345", "PRJNA123456", "SRP417965")
123-
file_format : str, optional
124-
File format: "fastq" (processed FASTQ files, most common), "submitted" (original
125-
submitted files), "sra" (SRA format). Default: "fastq"
126-
include_md5 : bool, optional
127-
Include MD5 checksums for file integrity verification. Default: True
136+
Call after search_rna_studies() to get download URLs for selected studies. Returns FTP URLs
137+
that can be used with wget/curl or passed to generate_download_script().
128138
129139
Returns
130140
-------
131141
dict
132-
- study_accession (str): Queried study
133-
- file_count (int): Total number of files
134-
- total_size_gb (float): Total download size in GB
135-
- runs (list[dict]): Per-run file info, each containing:
142+
Dictionary containing:
143+
- study_accession: Queried study
144+
- file_count: Total number of files
145+
- total_size_gb: Total download size in GB
146+
- runs: List of per-run file info, each with:
136147
- run_accession: Run identifier
137148
- file_count: Files in this run (2 for paired-end)
138149
- size_gb: Run size in GB
139150
- urls: List of FTP URLs (ftp://...)
140151
- md5_checksums: List of MD5 hashes (if include_md5=True)
141-
142-
Examples
143-
--------
144-
Get FASTQ URLs after finding studies:
145-
study_accession="PRJDB2345"
146-
147-
Check file sizes before downloading:
148-
study_accession="SRP417965", file_format="fastq"
152+
- message: Info message if no files found
153+
- error: Error message if any
149154
"""
150155
return await _fetch_download_urls(study_accession, file_format, include_md5)
151156

152157

153158
@mcp.tool
154159
async def generate_download_script(
155-
study_accession: Annotated[str, "Study accession from search_rna_studies"],
156-
output_path: Annotated[str | None, "Save path for script (e.g., './download.sh'). None=return only"] = None,
157-
script_type: Annotated[str, "Download tool: 'wget' or 'curl'"] = "wget",
158-
file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq",
160+
study_accession: Annotated[
161+
str,
162+
Field(
163+
description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats",
164+
examples=["PRJDB2345", "SRP417965", "PRJNA123456"],
165+
),
166+
],
167+
output_path: Annotated[
168+
str | None,
169+
Field(
170+
description="File path to save script (e.g., './download.sh'). If None, returns script content without saving. Script will be made executable (chmod 755)",
171+
examples=["./download.sh", "./download_study.sh", None],
172+
),
173+
] = None,
174+
script_type: Annotated[
175+
str,
176+
Field(
177+
description="Download tool to use (wget or curl). wget is recommended for resumable downloads with -nc flag",
178+
examples=["wget", "curl"],
179+
),
180+
] = "wget",
181+
file_format: Annotated[
182+
str,
183+
Field(
184+
description="File format to download (fastq, submitted, or sra). FASTQ is most common",
185+
examples=["fastq", "submitted", "sra"],
186+
),
187+
] = "fastq",
159188
) -> dict:
160189
"""Generate executable bash script to download all study data files.
161190
162-
**LLM Usage**: After identifying interesting studies, generate a download script for the
163-
user to execute. Returns script content and optionally saves to file. Script includes
164-
MD5 verification commands.
165-
166-
**Typical workflow**:
167-
1. search_rna_studies() → find studies
168-
2. get_study_details() → verify it's the right study
169-
3. generate_download_script() → create download script
170-
4. User executes the script to download data
171-
172-
Parameters
191+
Usage Tips
173192
----------
174-
study_accession : str
175-
Study accession (e.g., "PRJDB2345", "SRP417965")
176-
output_path : str, optional
177-
File path to save script (e.g., "./download_study.sh"). If None, returns
178-
script content without saving. Script will be made executable (chmod 755).
179-
script_type : str, optional
180-
Download tool: "wget" (recommended, resumable with -nc) or "curl" (resumable with -C -)
181-
file_format : str, optional
182-
File format: "fastq" (most common), "submitted", "sra"
193+
After identifying interesting studies, generate a download script for the user to execute.
194+
Returns script content and optionally saves to file. Script includes MD5 verification commands.
195+
Typical workflow: search_rna_studies() → get_study_details() → generate_download_script().
183196
184197
Returns
185198
-------
186199
dict
187-
- study_accession (str): Queried study
188-
- script_content (str): Complete bash script (can be directly executed)
189-
- file_count (int): Number of files script will download
190-
- total_size_gb (float): Total download size
191-
- script_path (str): Save location (if output_path provided)
192-
- message (str): Success/error message
193-
194-
Examples
195-
--------
196-
Generate and return wget script:
197-
study_accession="PRJDB2345"
198-
199-
Save wget script to file (recommended):
200-
study_accession="SRP417965", output_path="./download_srp417965.sh"
201-
202-
Generate curl-based script:
203-
study_accession="PRJDB2345", script_type="curl"
200+
Dictionary containing:
201+
- study_accession: Queried study
202+
- script_content: Complete bash script ready to execute
203+
- file_count: Number of files the script will download
204+
- total_size_gb: Total download size in GB
205+
- script_path: Save location (if output_path provided)
206+
- message: Success message (if saved to file)
207+
- error: Error message if any
204208
"""
205209
# First get the download URLs
206210
url_data = await _fetch_download_urls(

src/nucleotide_archive_mcp/tools/metadata.py

Lines changed: 58 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,48 @@
11
"""Tools for discovering available fields and metadata."""
22

3-
from typing import Any
3+
from typing import Annotated, Any
44

55
import httpx
6+
from pydantic import Field
67

78
from nucleotide_archive_mcp.ena_client import ENAClient
89
from nucleotide_archive_mcp.mcp import mcp
910

1011

1112
@mcp.tool
1213
async def get_available_fields(
13-
result_type: str = "read_study",
14-
field_category: str = "all",
14+
result_type: Annotated[
15+
str,
16+
Field(
17+
description="Type of ENA data to query (read_study, study, sample, read_run, read_experiment, analysis)",
18+
examples=["read_study", "sample", "read_run"],
19+
),
20+
] = "read_study",
21+
field_category: Annotated[
22+
str,
23+
Field(
24+
description="Which fields to return (all, search, return)",
25+
examples=["all", "search", "return"],
26+
),
27+
] = "all",
1528
) -> dict:
1629
"""Get available search and return fields for an ENA result type.
1730
18-
This tool helps you discover what fields you can search on and what
19-
metadata fields are available for a given data type in ENA.
20-
21-
Parameters
31+
Usage Tips
2232
----------
23-
result_type : str, optional
24-
Type of data to query. Common options:
25-
- "read_study": RNA-seq studies (default, recommended)
26-
- "study": All studies
27-
- "sample": Sample records
28-
- "read_run": Individual sequencing runs
29-
- "read_experiment": Sequencing experiments
30-
- "analysis": Analysis records
31-
field_category : str, optional
32-
Which fields to return:
33-
- "all": Both search and return fields (default)
34-
- "search": Only searchable fields
35-
- "return": Only returnable fields
33+
Use to discover what fields you can search on and what metadata fields are available
34+
for a given data type in ENA. Helpful for building custom queries with build_custom_query().
3635
3736
Returns
3837
-------
3938
dict
4039
Dictionary containing:
4140
- result_type: The queried result type
42-
- search_fields: List of searchable fields (if requested)
43-
- return_fields: List of returnable fields (if requested)
44-
45-
Examples
46-
--------
47-
Get all fields for RNA-seq studies:
48-
result_type="read_study"
49-
50-
Get only searchable fields for samples:
51-
result_type="sample", field_category="search"
41+
- search_fields: List of searchable fields with id, description, type (if requested)
42+
- search_fields_count: Number of search fields (if requested)
43+
- return_fields: List of returnable fields with id, description, type (if requested)
44+
- return_fields_count: Number of return fields (if requested)
45+
- error: Error message if any
5246
"""
5347
client = ENAClient()
5448

@@ -96,20 +90,19 @@ async def get_available_fields(
9690
async def get_result_types() -> dict:
9791
"""Get all available result types (data categories) in ENA.
9892
99-
This tool shows what types of data you can search for in the
100-
European Nucleotide Archive.
93+
Usage Tips
94+
----------
95+
Use to discover what types of data you can search for in the European Nucleotide Archive.
96+
Most users will use read_study or study for RNA-seq searches.
10197
10298
Returns
10399
-------
104100
dict
105101
Dictionary containing:
106102
- count: Number of available result types
107-
- result_types: List of result types with descriptions
108-
109-
Examples
110-
--------
111-
Discover what data types are available:
112-
(no parameters needed)
103+
- result_types: List of result types with id, description, primaryAccessionType, recordCount, lastUpdated
104+
- recommended_for_rna_studies: Recommended types for RNA studies
105+
- error: Error message if any
113106
"""
114107
client = ENAClient()
115108

@@ -145,47 +138,43 @@ async def get_result_types() -> dict:
145138

146139
@mcp.tool
147140
async def build_custom_query(
148-
field_conditions: list[dict[str, str]],
149-
operator: str = "AND",
141+
field_conditions: Annotated[
142+
list[dict[str, str]],
143+
Field(
144+
description='List of conditions, each with "field", "operator" (=, >=, <=, !=, contains), and "value"',
145+
examples=[
146+
[
147+
{"field": "tax_id", "operator": "=", "value": "9606"},
148+
{"field": "library_strategy", "operator": "=", "value": "RNA-Seq"},
149+
]
150+
],
151+
),
152+
],
153+
operator: Annotated[
154+
str,
155+
Field(
156+
description="Logical operator to combine conditions (AND or OR)",
157+
examples=["AND", "OR"],
158+
),
159+
] = "AND",
150160
) -> dict:
151161
"""Build a custom ENA query from field conditions.
152162
153-
This advanced tool helps construct complex queries by combining multiple
154-
field conditions with logical operators. Useful for precise filtering.
155-
156-
Parameters
163+
Usage Tips
157164
----------
158-
field_conditions : list[dict]
159-
List of conditions, each with keys:
160-
- "field": Field name (e.g., "tax_id", "library_strategy")
161-
- "operator": Comparison operator ("=", ">=", "<=", "!=", or "contains")
162-
- "value": Value to compare
163-
operator : str, optional
164-
Logical operator to combine conditions: "AND" or "OR" (default: "AND")
165+
Advanced tool for constructing complex queries by combining multiple field conditions with
166+
logical operators. Use for precise filtering beyond what search_rna_studies() offers.
167+
Call get_available_fields() first to discover searchable field names.
165168
166169
Returns
167170
-------
168171
dict
169172
Dictionary containing:
170173
- query: The constructed ENA query string
171-
- field_count: Number of conditions
172-
- example_usage: How to use this query
173-
174-
Examples
175-
--------
176-
Build a query for human RNA-seq studies after 2020:
177-
field_conditions=[
178-
{"field": "tax_id", "operator": "=", "value": "9606"},
179-
{"field": "library_strategy", "operator": "=", "value": "RNA-Seq"},
180-
{"field": "first_public", "operator": ">=", "value": "2020-01-01"}
181-
]
182-
183-
Find studies with specific keywords:
184-
field_conditions=[
185-
{"field": "study_title", "operator": "contains", "value": "cancer"},
186-
{"field": "study_description", "operator": "contains", "value": "treatment"}
187-
],
188-
operator="OR"
174+
- field_count: Number of conditions used
175+
- operator: Logical operator used
176+
- example_usage: How to use this query with other tools
177+
- error: Error message if any
189178
"""
190179
if not field_conditions:
191180
return {

0 commit comments

Comments
 (0)