|
4 | 4 | from typing import Annotated |
5 | 5 |
|
6 | 6 | import httpx |
| 7 | +from pydantic import Field |
7 | 8 |
|
8 | 9 | from nucleotide_archive_mcp.ena_client import ENAClient |
9 | 10 | from nucleotide_archive_mcp.mcp import mcp |
@@ -107,100 +108,103 @@ async def _fetch_download_urls( |
107 | 108 |
|
108 | 109 | @mcp.tool |
109 | 110 | async def get_download_urls( |
110 | | - study_accession: Annotated[str, "Study accession from search_rna_studies (e.g., 'PRJDB2345')"], |
111 | | - file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq", |
112 | | - include_md5: Annotated[bool, "Include MD5 checksums for file verification"] = True, |
| 111 | + study_accession: Annotated[ |
| 112 | + str, |
| 113 | + Field( |
| 114 | + description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats", |
| 115 | + examples=["PRJDB2345", "PRJNA123456", "SRP417965"], |
| 116 | + ), |
| 117 | + ], |
| 118 | + file_format: Annotated[ |
| 119 | + str, |
| 120 | + Field( |
| 121 | + description="File format to download (fastq, submitted, or sra). FASTQ is most common", |
| 122 | + examples=["fastq", "submitted", "sra"], |
| 123 | + ), |
| 124 | + ] = "fastq", |
| 125 | + include_md5: Annotated[ |
| 126 | + bool, |
| 127 | + Field( |
| 128 | + description="Include MD5 checksums for file integrity verification", |
| 129 | + ), |
| 130 | + ] = True, |
113 | 131 | ) -> dict: |
114 | 132 | """Get FTP download URLs for all sequencing data files in a study. |
115 | 133 |
|
116 | | - **LLM Usage**: Call after search_rna_studies() to get download URLs for selected studies. |
117 | | - Returns FTP URLs that can be used with wget/curl or passed to generate_download_script(). |
118 | | -
|
119 | | - Parameters |
| 134 | + Usage Tips |
120 | 135 | ---------- |
121 | | - study_accession : str |
122 | | - Study accession from search results (e.g., "PRJDB2345", "PRJNA123456", "SRP417965") |
123 | | - file_format : str, optional |
124 | | - File format: "fastq" (processed FASTQ files, most common), "submitted" (original |
125 | | - submitted files), "sra" (SRA format). Default: "fastq" |
126 | | - include_md5 : bool, optional |
127 | | - Include MD5 checksums for file integrity verification. Default: True |
| 136 | + Call after search_rna_studies() to get download URLs for selected studies. Returns FTP URLs |
| 137 | + that can be used with wget/curl or passed to generate_download_script(). |
128 | 138 |
|
129 | 139 | Returns |
130 | 140 | ------- |
131 | 141 | dict |
132 | | - - study_accession (str): Queried study |
133 | | - - file_count (int): Total number of files |
134 | | - - total_size_gb (float): Total download size in GB |
135 | | - - runs (list[dict]): Per-run file info, each containing: |
| 142 | + Dictionary containing: |
| 143 | + - study_accession: Queried study |
| 144 | + - file_count: Total number of files |
| 145 | + - total_size_gb: Total download size in GB |
| 146 | + - runs: List of per-run file info, each with: |
136 | 147 | - run_accession: Run identifier |
137 | 148 | - file_count: Files in this run (2 for paired-end) |
138 | 149 | - size_gb: Run size in GB |
139 | 150 | - urls: List of FTP URLs (ftp://...) |
140 | 151 | - md5_checksums: List of MD5 hashes (if include_md5=True) |
141 | | -
|
142 | | - Examples |
143 | | - -------- |
144 | | - Get FASTQ URLs after finding studies: |
145 | | - study_accession="PRJDB2345" |
146 | | -
|
147 | | - Check file sizes before downloading: |
148 | | - study_accession="SRP417965", file_format="fastq" |
| 152 | + - message: Info message if no files found |
| 153 | + - error: Error message if any |
149 | 154 | """ |
150 | 155 | return await _fetch_download_urls(study_accession, file_format, include_md5) |
151 | 156 |
|
152 | 157 |
|
153 | 158 | @mcp.tool |
154 | 159 | async def generate_download_script( |
155 | | - study_accession: Annotated[str, "Study accession from search_rna_studies"], |
156 | | - output_path: Annotated[str | None, "Save path for script (e.g., './download.sh'). None=return only"] = None, |
157 | | - script_type: Annotated[str, "Download tool: 'wget' or 'curl'"] = "wget", |
158 | | - file_format: Annotated[str, "File format: 'fastq', 'submitted', or 'sra'"] = "fastq", |
| 160 | + study_accession: Annotated[ |
| 161 | + str, |
| 162 | + Field( |
| 163 | + description="Study accession from search results. Accepts SRP/ERP/DRP or PRJNA/PRJEB/PRJDB formats", |
| 164 | + examples=["PRJDB2345", "SRP417965", "PRJNA123456"], |
| 165 | + ), |
| 166 | + ], |
| 167 | + output_path: Annotated[ |
| 168 | + str | None, |
| 169 | + Field( |
| 170 | + description="File path to save script (e.g., './download.sh'). If None, returns script content without saving. Script will be made executable (chmod 755)", |
| 171 | + examples=["./download.sh", "./download_study.sh", None], |
| 172 | + ), |
| 173 | + ] = None, |
| 174 | + script_type: Annotated[ |
| 175 | + str, |
| 176 | + Field( |
| 177 | + description="Download tool to use (wget or curl). wget is recommended for resumable downloads with -nc flag", |
| 178 | + examples=["wget", "curl"], |
| 179 | + ), |
| 180 | + ] = "wget", |
| 181 | + file_format: Annotated[ |
| 182 | + str, |
| 183 | + Field( |
| 184 | + description="File format to download (fastq, submitted, or sra). FASTQ is most common", |
| 185 | + examples=["fastq", "submitted", "sra"], |
| 186 | + ), |
| 187 | + ] = "fastq", |
159 | 188 | ) -> dict: |
160 | 189 | """Generate executable bash script to download all study data files. |
161 | 190 |
|
162 | | - **LLM Usage**: After identifying interesting studies, generate a download script for the |
163 | | - user to execute. Returns script content and optionally saves to file. Script includes |
164 | | - MD5 verification commands. |
165 | | -
|
166 | | - **Typical workflow**: |
167 | | - 1. search_rna_studies() → find studies |
168 | | - 2. get_study_details() → verify it's the right study |
169 | | - 3. generate_download_script() → create download script |
170 | | - 4. User executes the script to download data |
171 | | -
|
172 | | - Parameters |
| 191 | + Usage Tips |
173 | 192 | ---------- |
174 | | - study_accession : str |
175 | | - Study accession (e.g., "PRJDB2345", "SRP417965") |
176 | | - output_path : str, optional |
177 | | - File path to save script (e.g., "./download_study.sh"). If None, returns |
178 | | - script content without saving. Script will be made executable (chmod 755). |
179 | | - script_type : str, optional |
180 | | - Download tool: "wget" (recommended, resumable with -nc) or "curl" (resumable with -C -) |
181 | | - file_format : str, optional |
182 | | - File format: "fastq" (most common), "submitted", "sra" |
| 193 | + After identifying interesting studies, generate a download script for the user to execute. |
| 194 | + Returns script content and optionally saves to file. Script includes MD5 verification commands. |
| 195 | + Typical workflow: search_rna_studies() → get_study_details() → generate_download_script(). |
183 | 196 |
|
184 | 197 | Returns |
185 | 198 | ------- |
186 | 199 | dict |
187 | | - - study_accession (str): Queried study |
188 | | - - script_content (str): Complete bash script (can be directly executed) |
189 | | - - file_count (int): Number of files script will download |
190 | | - - total_size_gb (float): Total download size |
191 | | - - script_path (str): Save location (if output_path provided) |
192 | | - - message (str): Success/error message |
193 | | -
|
194 | | - Examples |
195 | | - -------- |
196 | | - Generate and return wget script: |
197 | | - study_accession="PRJDB2345" |
198 | | -
|
199 | | - Save wget script to file (recommended): |
200 | | - study_accession="SRP417965", output_path="./download_srp417965.sh" |
201 | | -
|
202 | | - Generate curl-based script: |
203 | | - study_accession="PRJDB2345", script_type="curl" |
| 200 | + Dictionary containing: |
| 201 | + - study_accession: Queried study |
| 202 | + - script_content: Complete bash script ready to execute |
| 203 | + - file_count: Number of files the script will download |
| 204 | + - total_size_gb: Total download size in GB |
| 205 | + - script_path: Save location (if output_path provided) |
| 206 | + - message: Success message (if saved to file) |
| 207 | + - error: Error message if any |
204 | 208 | """ |
205 | 209 | # First get the download URLs |
206 | 210 | url_data = await _fetch_download_urls( |
|
0 commit comments