Skip to content

Commit 026a223

Browse files
committed
feat: add search_datasets tool
1 parent d5bb5c8 commit 026a223

File tree

7 files changed

+284
-44
lines changed

7 files changed

+284
-44
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,12 @@ For HTTP/SSE mode, connect using a local URL in your MCP client configuration:
142142
You can also debug the server using the [MCP Inspector](https://github.com/modelcontextprotocol/inspector) tool:
143143

144144
```bash
145+
# Run in UI mode with stdio transport (can switch to HTTP/SSE in the Web UI as needed)
145146
npx @modelcontextprotocol/inspector uv run modelscope-mcp-server
146-
```
147147

148-
Uses stdio transport by default; switch to HTTP/SSE in the Web UI as needed.
148+
# Run in CLI mode with HTTP transportt (can do operations across tools, resources, and prompts)
149+
npx @modelcontextprotocol/inspector --cli http://127.0.0.1:8000/mcp/ --transport http --method tools/list
150+
```
149151

150152
### Testing
151153

demo.py

Lines changed: 75 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@
1313
from modelscope_mcp_server.settings import settings
1414
from modelscope_mcp_server.utils.metadata import get_server_name_with_version
1515

16+
# Global counter for demo step numbering
17+
demo_step = 0
18+
19+
20+
def print_step_title(tool_name: str, task_description: str) -> None:
21+
"""Print demo step title."""
22+
global demo_step
23+
demo_step += 1
24+
print(f"{demo_step}. 🛠️ Tool: {tool_name}")
25+
print(f" • Task: {task_description}")
26+
1627

1728
def parse_tool_response(result) -> dict:
1829
"""Parse tool response and return JSON data."""
@@ -27,10 +38,10 @@ def parse_tool_response(result) -> dict:
2738

2839
async def demo_user_info(client: Client) -> None:
2940
"""Demo getting current user information."""
30-
print("1. 🛠️ Tool: get_current_user")
31-
print(" • Task: 👤 Get current user information")
41+
tool_name = "get_current_user"
42+
print_step_title(tool_name, "👤 Get current user information")
3243

33-
result = await client.call_tool("get_current_user", {})
44+
result = await client.call_tool(tool_name, {})
3445
data = parse_tool_response(result)
3546

3647
username = data.get("username", "N/A")
@@ -43,10 +54,10 @@ async def demo_user_info(client: Client) -> None:
4354

4455
async def demo_environment_info(client: Client) -> None:
4556
"""Demo getting environment information."""
46-
print("2. 🛠️ Tool: get_environment_info")
47-
print(" • Task: 🔧 Get current MCP server environment information")
57+
tool_name = "get_environment_info"
58+
print_step_title(tool_name, "🔧 Get current MCP server environment information")
4859

49-
result = await client.call_tool("get_environment_info", {})
60+
result = await client.call_tool(tool_name, {})
5061
data = parse_tool_response(result)
5162

5263
print(f" • Result: {data}")
@@ -55,11 +66,13 @@ async def demo_environment_info(client: Client) -> None:
5566

5667
async def demo_search_models(client: Client) -> None:
5768
"""Demo searching models."""
58-
print("3. 🛠️ Tool: search_models")
59-
print(" • Task: 🔍 Search text-generation models (keyword='DeepSeek', support inference, limit 3 results)")
69+
tool_name = "search_models"
70+
print_step_title(
71+
tool_name, "🔍 Search text-generation models (keyword='DeepSeek', support inference, limit 3 results)"
72+
)
6073

6174
result = await client.call_tool(
62-
"search_models",
75+
tool_name,
6376
{
6477
"query": "DeepSeek",
6578
"task": "text-generation",
@@ -82,40 +95,73 @@ async def demo_search_models(client: Client) -> None:
8295
print()
8396

8497

98+
async def demo_search_datasets(client: Client) -> None:
99+
"""Demo searching datasets."""
100+
tool_name = "search_datasets"
101+
print_step_title(tool_name, "📊 Search datasets (keyword='金融', sort='downloads', limit 3 results)")
102+
103+
result = await client.call_tool(
104+
tool_name,
105+
{
106+
"query": "金融",
107+
"sort": "downloads",
108+
"limit": 3,
109+
},
110+
)
111+
data = parse_tool_response(result)
112+
113+
if isinstance(data, list) and data:
114+
summaries = []
115+
for dataset in data:
116+
name = dataset.get("name", "N/A")
117+
chinese_name = dataset.get("chinese_name", "N/A")
118+
downloads = dataset.get("downloads_count", 0)
119+
likes = dataset.get("likes_count", 0)
120+
summaries.append(f"{name} ({chinese_name}) - Downloads {downloads:,}, Likes {likes}")
121+
print(f" • Result: Found {len(data)} items - {' | '.join(summaries)}")
122+
else:
123+
print(" • Result: No datasets found")
124+
print()
125+
126+
85127
async def demo_search_papers(client: Client) -> None:
86128
"""Demo searching papers."""
87-
print("4. 🛠️ Tool: search_papers")
88-
print(" • Task: 📚 Search academic papers (keyword='Qwen3', sort='hot', limit 1 result)")
129+
tool_name = "search_papers"
130+
print_step_title(tool_name, "📚 Search papers (keyword='Qwen3', sort='hot', limit 3 result)")
89131

90132
result = await client.call_tool(
91-
"search_papers",
133+
tool_name,
92134
{
93135
"query": "Qwen3",
94136
"sort": "hot",
95-
"limit": 1,
137+
"limit": 3,
96138
},
97139
)
98140
data = parse_tool_response(result)
99141

100142
if isinstance(data, list) and data:
101-
paper = data[0]
102-
title = paper.get("title", "N/A")
103-
arxiv_id = paper.get("arxiv_id", "N/A")
104-
view_count = paper.get("view_count", 0)
105-
modelscope_url = paper.get("modelscope_url", "N/A")
106-
print(f" • Result: '{title}' ArXiv ID={arxiv_id}, Views={view_count:,} ModelScope URL={modelscope_url}")
143+
summaries = []
144+
for paper in data:
145+
title = paper.get("title", "N/A")
146+
arxiv_id = paper.get("arxiv_id", "N/A")
147+
view_count = paper.get("view_count", 0)
148+
modelscope_url = paper.get("modelscope_url", "N/A")
149+
summaries.append(f"{title} (ArXiv={arxiv_id}, Views={view_count:,} URL={modelscope_url})")
150+
print(f" • Result: Found {len(data)} items - {' | '.join(summaries)}")
107151
else:
108152
print(" • Result: No papers found")
109153
print()
110154

111155

112156
async def demo_search_mcp_servers(client: Client) -> None:
113157
"""Demo searching MCP servers."""
114-
print("5. 🛠️ Tool: search_mcp_servers")
115-
print(" • Task: 🔍 Search MCP servers (keyword='Chrome', category='browser-automation', limit 3 results)")
158+
tool_name = "search_mcp_servers"
159+
print_step_title(
160+
tool_name, "🔍 Search MCP servers (keyword='Chrome', category='browser-automation', limit 3 results)"
161+
)
116162

117163
result = await client.call_tool(
118-
"search_mcp_servers",
164+
tool_name,
119165
{
120166
"search": "Chrome",
121167
"category": "browser-automation",
@@ -139,24 +185,15 @@ async def demo_search_mcp_servers(client: Client) -> None:
139185

140186
async def demo_generate_image(client: Client) -> None:
141187
"""Demo image generation."""
142-
print("6. 🛠️ Tool: generate_image")
143-
print(" • Task: 🎨 Generate image (prompt='A curious cat wearing a tiny wizard hat in candy cloud kingdom')")
188+
tool_name = "generate_image"
189+
prompt = "A curious cat wearing a tiny wizard hat in candy cloud kingdom"
190+
print_step_title(tool_name, f"🎨 Generate image with prompt: {prompt}")
144191

145-
result = await client.call_tool(
146-
"generate_image",
147-
{
148-
"prompt": "A curious cat wearing a tiny wizard hat in candy cloud kingdom",
149-
},
150-
)
192+
result = await client.call_tool(tool_name, {"prompt": prompt})
151193
data = parse_tool_response(result)
152194

153-
image_url = data.get("image_url")
154-
model = data.get("model")
155-
156-
if not image_url:
157-
raise RuntimeError("Missing required field 'image_url' in response")
158-
if not model:
159-
raise RuntimeError("Missing required field 'model' in response")
195+
image_url = data.get("image_url", "N/A")
196+
model = data.get("model", "N/A")
160197

161198
print(f" • Result: Image generated using model '{model}' - URL: {image_url}")
162199
print()
@@ -204,6 +241,7 @@ async def main() -> None:
204241
await demo_user_info(client)
205242
await demo_environment_info(client)
206243
await demo_search_models(client)
244+
await demo_search_datasets(client)
207245
await demo_search_papers(client)
208246
await demo_search_mcp_servers(client)
209247

src/modelscope_mcp_server/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .settings import settings
1515
from .tools.aigc import register_aigc_tools
1616
from .tools.context import register_context_tools
17+
from .tools.dataset import register_dataset_tools
1718
from .tools.mcp import register_mcp_tools
1819
from .tools.model import register_model_tools
1920
from .tools.paper import register_paper_tools
@@ -28,9 +29,7 @@ def create_mcp_server() -> FastMCP:
2829

2930
mcp = FastMCP(
3031
name=get_server_name_with_version(),
31-
instructions="""
32-
This server provides tools for calling ModelScope (魔搭社区) API.
33-
""",
32+
instructions="This server provides tools for calling ModelScope (魔搭社区) API.",
3433
)
3534

3635
# Add middleware in logical order
@@ -42,6 +41,7 @@ def create_mcp_server() -> FastMCP:
4241
# Register all tools
4342
register_context_tools(mcp)
4443
register_model_tools(mcp)
44+
register_dataset_tools(mcp)
4545
register_paper_tools(mcp)
4646
register_mcp_tools(mcp)
4747
register_aigc_tools(mcp)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""ModelScope MCP Server Dataset tools.
2+
3+
Provides tools for dataset-related operations in the ModelScope MCP Server,
4+
such as searching for datasets and retrieving dataset details.
5+
"""
6+
7+
from typing import Annotated, Literal
8+
9+
from fastmcp import FastMCP
10+
from fastmcp.utilities import logging
11+
from pydantic import Field
12+
13+
from ..client import default_client
14+
from ..settings import settings
15+
from ..types import Dataset
16+
17+
logger = logging.get_logger(__name__)
18+
19+
20+
def register_dataset_tools(mcp: FastMCP) -> None:
21+
"""Register all dataset-related tools with the MCP server.
22+
23+
Args:
24+
mcp (FastMCP): The MCP server instance
25+
26+
"""
27+
28+
@mcp.tool(
29+
annotations={
30+
"title": "Search Datasets",
31+
}
32+
)
33+
async def search_datasets(
34+
query: Annotated[
35+
str,
36+
Field(
37+
description="Keyword to search for related datasets. "
38+
"Leave empty to get all datasets based on other filters."
39+
),
40+
] = "",
41+
sort: Annotated[
42+
Literal["default", "downloads", "likes", "gmt_modified"],
43+
Field(description="Sort order"),
44+
] = "default",
45+
limit: Annotated[int, Field(description="Maximum number of datasets to return", ge=1, le=30)] = 10,
46+
) -> list[Dataset]:
47+
"""Search for datasets on ModelScope."""
48+
url = f"{settings.main_domain}/api/v1/dolphin/datasets"
49+
50+
params = {
51+
"Query": query,
52+
"Sort": sort,
53+
"PageNumber": 1,
54+
"PageSize": limit,
55+
}
56+
57+
response = default_client.get(url, params=params)
58+
59+
datasets_data = response.get("Data", [])
60+
61+
datasets = []
62+
for dataset_data in datasets_data:
63+
path = dataset_data.get("Namespace", "")
64+
name = dataset_data.get("Name", "")
65+
modelscope_url = f"{settings.main_domain}/datasets/{path}/{name}"
66+
67+
if not path or not name:
68+
logger.warning(f"Skipping dataset with invalid path or name: {dataset_data}")
69+
continue
70+
71+
dataset = Dataset(
72+
id=f"{path}/{name}",
73+
path=path,
74+
name=name,
75+
chinese_name=dataset_data.get("ChineseName", ""),
76+
created_by=dataset_data.get("CreatedBy", ""),
77+
license=dataset_data.get("License", ""),
78+
modelscope_url=modelscope_url,
79+
downloads_count=dataset_data.get("Downloads", 0),
80+
likes_count=dataset_data.get("Likes", 0),
81+
created_at=dataset_data.get("GmtCreate", 0),
82+
updated_at=dataset_data.get("LastUpdatedTime", 0),
83+
)
84+
datasets.append(dataset)
85+
86+
return datasets

src/modelscope_mcp_server/tools/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ async def search_models(
3434
query: Annotated[
3535
str,
3636
Field(
37-
description="Keyword to search for related models (e.g., 'Flux' will find models related to Flux). "
38-
"Leave empty to skip keyword matching and get all models based on other filters."
37+
description="Keyword to search for related models. "
38+
"Leave empty to get all models based on other filters."
3939
),
4040
] = "",
4141
task: Annotated[
@@ -118,6 +118,7 @@ async def search_models(
118118
name=name,
119119
chinese_name=model_data.get("ChineseName", ""),
120120
created_by=model_data.get("CreatedBy"),
121+
license=model_data.get("License", ""),
121122
modelscope_url=modelscope_url,
122123
# Non-empty value means True, else False
123124
support_inference=bool(model_data.get("SupportInference", "")),

src/modelscope_mcp_server/types.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class Model(BaseModel):
3939
name: Annotated[str, Field(description="Model name, for example 'DeepSeek-R1'")]
4040
chinese_name: Annotated[str, Field(description="Chinese name")]
4141
created_by: Annotated[str, Field(description="User who created the model")]
42+
license: Annotated[str, Field(description="Open source license")]
4243

4344
# Links
4445
modelscope_url: Annotated[str, Field(description="Detail page URL on ModelScope")]
@@ -55,6 +56,29 @@ class Model(BaseModel):
5556
updated_at: Annotated[int, Field(description="Last updated time (unix timestamp, seconds)")] = 0
5657

5758

59+
class Dataset(BaseModel):
60+
"""Dataset information."""
61+
62+
# Basic information
63+
id: Annotated[str, Field(description="Unique dataset ID, formatted as 'path/name'")]
64+
path: Annotated[str, Field(description="Dataset path, for example 'opencompass'")]
65+
name: Annotated[str, Field(description="Dataset name, for example 'mmlu'")]
66+
chinese_name: Annotated[str, Field(description="Chinese name")]
67+
created_by: Annotated[str, Field(description="User who created the dataset")]
68+
license: Annotated[str, Field(description="Open source license")]
69+
70+
# Links
71+
modelscope_url: Annotated[str, Field(description="Detail page URL on ModelScope")]
72+
73+
# Metrics
74+
downloads_count: Annotated[int, Field(description="Number of downloads")] = 0
75+
likes_count: Annotated[int, Field(description="Number of likes")] = 0
76+
77+
# Timestamps
78+
created_at: Annotated[int, Field(description="Created time (unix timestamp, seconds)")] = 0
79+
updated_at: Annotated[int, Field(description="Last updated time (unix timestamp, seconds)")] = 0
80+
81+
5882
class Paper(BaseModel):
5983
"""Paper information."""
6084

0 commit comments

Comments
 (0)