Skip to content

Commit d3eda6a

Browse files
committed
add miroapi support
1 parent 9ccab07 commit d3eda6a

File tree

6 files changed

+361
-18
lines changed

6 files changed

+361
-18
lines changed

docs/mkdocs/docs/miro_api.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# MiroAPI
2+
3+
!!! warning "Preview Documentation"
4+
This service is currently in preview and limited to internal access. Public release will follow once it is production-ready.
5+
6+
## Overview
7+
MiroAPI provides an internal caching layer for Serper Search and Jina Scrape to reduce costs, speed up development, and enable reproducible "go-back-in-time" sandbox runs by serving recorded results when available.
8+
9+
### Step 1: Apply for a MiroAPI key
10+
Request a MiroAPI key through the internal portal.
11+
12+
### Step 2: Configure .env
13+
```
14+
# API for Google Search (recommended)
15+
SERPER_API_KEY="svc-miro-api01-replace-with-your-key"
16+
SERPER_BASE_URL="https://miro-api.miromind.site/serper"
17+
18+
# API for Web Scraping (recommended)
19+
JINA_API_KEY="svc-miro-api01-replace-with-your-key"
20+
JINA_BASE_URL="https://miro-api.miromind.site/jina"
21+
```
22+
23+
24+

docs/mkdocs/mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ nav:
7474
- tool-python: tool_python.md
7575
- Advanced Features:
7676
- E2B Advanced Features: e2b_advanced_features.md
77+
- MiroAPI: miro_api.md
7778
- Add New Tools: contribute_tools.md
7879

7980
- LLM Clients:
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Copyright 2025 Miromind.ai
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
adapted from
17+
https://github.com/MiroMindAI/MiroRL/blob/5073693549ffe05a157a1886e87650ef3be6606e/mirorl/tools/serper_search.py#L1
18+
"""
19+
20+
import os
21+
from typing import Any, Dict
22+
23+
import requests
24+
from mcp.server.fastmcp import FastMCP
25+
from tenacity import (
26+
retry,
27+
retry_if_exception_type,
28+
stop_after_attempt,
29+
wait_exponential,
30+
)
31+
32+
from .utils.url_unquote import decode_http_urls_in_dict
33+
34+
SERPER_BASE_URL = os.getenv("SERPER_BASE_URL", "https://google.serper.dev")
35+
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
36+
37+
38+
# Initialize FastMCP server
39+
mcp = FastMCP("serper-mcp-server")
40+
41+
42+
@retry(
43+
stop=stop_after_attempt(3),
44+
wait=wait_exponential(multiplier=1, min=4, max=10),
45+
retry=retry_if_exception_type(
46+
(requests.ConnectionError, requests.Timeout, requests.HTTPError)
47+
),
48+
)
49+
def make_serper_request(
50+
payload: Dict[str, Any], headers: Dict[str, str]
51+
) -> requests.Response:
52+
"""Make HTTP request to Serper API with retry logic."""
53+
response = requests.post(f"{SERPER_BASE_URL}/search", json=payload, headers=headers)
54+
response.raise_for_status()
55+
return response
56+
57+
58+
def _is_huggingface_dataset_or_space_url(url):
59+
"""
60+
Check if the URL is a HuggingFace dataset or space URL.
61+
:param url: The URL to check
62+
:return: True if it's a HuggingFace dataset or space URL, False otherwise
63+
"""
64+
if not url:
65+
return False
66+
return "huggingface.co/datasets" in url or "huggingface.co/spaces" in url
67+
68+
69+
@mcp.tool()
70+
def google_search(
71+
q: str,
72+
gl: str = "us",
73+
hl: str = "en",
74+
location: str | None = None,
75+
num: int | None = None,
76+
tbs: str | None = None,
77+
page: int | None = None,
78+
autocorrect: bool | None = None,
79+
) -> Dict[str, Any]:
80+
"""
81+
Tool to perform web searches via Serper API and retrieve rich results.
82+
83+
It is able to retrieve organic search results, people also ask,
84+
related searches, and knowledge graph.
85+
86+
Args:
87+
q: Search query string
88+
gl: Optional region code for search results in ISO 3166-1 alpha-2 format (e.g., 'us')
89+
hl: Optional language code for search results in ISO 639-1 format (e.g., 'en')
90+
location: Optional location for search results (e.g., 'SoHo, New York, United States', 'California, United States')
91+
num: Number of results to return (default: 10)
92+
tbs: Time-based search filter ('qdr:h' for past hour, 'qdr:d' for past day, 'qdr:w' for past week,
93+
'qdr:m' for past month, 'qdr:y' for past year)
94+
page: Page number of results to return (default: 1)
95+
autocorrect: Whether to autocorrect spelling in query
96+
97+
Returns:
98+
Dictionary containing search results and metadata.
99+
"""
100+
# Check for API key
101+
if not SERPER_API_KEY:
102+
return {
103+
"success": False,
104+
"error": "SERPER_API_KEY environment variable not set",
105+
"results": [],
106+
}
107+
108+
# Validate required parameter
109+
if not q or not q.strip():
110+
return {
111+
"success": False,
112+
"error": "Search query 'q' is required and cannot be empty",
113+
"results": [],
114+
}
115+
116+
try:
117+
# Build payload with all supported parameters
118+
payload: dict[str, Any] = {
119+
"q": q.strip(),
120+
"gl": gl,
121+
"hl": hl,
122+
}
123+
124+
# Add optional parameters if provided
125+
if location:
126+
payload["location"] = location
127+
if num is not None:
128+
payload["num"] = num
129+
else:
130+
payload["num"] = 10 # Default
131+
if tbs:
132+
payload["tbs"] = tbs
133+
if page is not None:
134+
payload["page"] = page
135+
if autocorrect is not None:
136+
payload["autocorrect"] = autocorrect
137+
138+
# Set up headers
139+
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
140+
141+
# Make the API request
142+
response = make_serper_request(payload, headers)
143+
data = response.json()
144+
145+
# filter out HuggingFace dataset or space urls
146+
organic_results = []
147+
if "organic" in data:
148+
for item in data["organic"]:
149+
if _is_huggingface_dataset_or_space_url(item.get("link", "")):
150+
continue
151+
organic_results.append(item)
152+
153+
# Keep all original fields, but overwrite "organic"
154+
response_data = dict(data)
155+
response_data["organic"] = organic_results
156+
response_data = decode_http_urls_in_dict(response_data)
157+
158+
return response_data
159+
160+
except Exception as e:
161+
return {"success": False, "error": f"Unexpected error: {str(e)}", "results": []}
162+
163+
164+
if __name__ == "__main__":
165+
mcp.run()

src/tool/mcp_servers/searching_mcp_server.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import sys
56
import os
67
import json
78
import requests
@@ -17,7 +18,11 @@
1718

1819

1920
SERPER_API_KEY = os.environ.get("SERPER_API_KEY", "")
21+
SERPER_BASE_URL = os.environ.get("SERPER_BASE_URL", "https://google.serper.dev")
2022
JINA_API_KEY = os.environ.get("JINA_API_KEY", "")
23+
JINA_BASE_URL = os.environ.get("JINA_BASE_URL", "https://r.jina.ai")
24+
25+
IS_MIRO_API = True if "miro" in SERPER_BASE_URL or "miro" in JINA_BASE_URL else False
2126

2227
# Google search result filtering environment variables
2328
REMOVE_SNIPPETS = os.environ.get("REMOVE_SNIPPETS", "").lower() in ("true", "1", "yes")
@@ -122,11 +127,18 @@ async def google_search(
122127
arguments["location"] = location
123128
if tbs:
124129
arguments["tbs"] = tbs
125-
server_params = StdioServerParameters(
126-
command="npx",
127-
args=["-y", "serper-search-scrape-mcp-server"],
128-
env={"SERPER_API_KEY": SERPER_API_KEY},
129-
)
130+
if IS_MIRO_API:
131+
server_params = StdioServerParameters(
132+
command=sys.executable,
133+
args=["-m", "src.tool.mcp_servers.miroapi_serper_mcp_server"],
134+
env={"SERPER_API_KEY": SERPER_API_KEY, "SERPER_BASE_URL": SERPER_BASE_URL},
135+
)
136+
else:
137+
server_params = StdioServerParameters(
138+
command="npx",
139+
args=["-y", "serper-search-scrape-mcp-server"],
140+
env={"SERPER_API_KEY": SERPER_API_KEY},
141+
)
130142
result_content = ""
131143
retry_count = 0
132144
max_retries = 5
@@ -348,7 +360,12 @@ async def search_wiki_revision(
348360
content = await smart_request(
349361
url=base_url,
350362
params=params,
351-
env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY},
363+
env={
364+
"SERPER_API_KEY": SERPER_API_KEY,
365+
"JINA_API_KEY": JINA_API_KEY,
366+
"SERPER_BASE_URL": SERPER_BASE_URL,
367+
"JINA_BASE_URL": JINA_BASE_URL,
368+
},
352369
)
353370
data = request_to_json(content)
354371

@@ -527,6 +544,8 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) ->
527544
env={
528545
"SERPER_API_KEY": SERPER_API_KEY,
529546
"JINA_API_KEY": JINA_API_KEY,
547+
"SERPER_BASE_URL": SERPER_BASE_URL,
548+
"JINA_BASE_URL": JINA_BASE_URL,
530549
},
531550
)
532551
data = request_to_json(content)
@@ -585,7 +604,12 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) ->
585604
content = await smart_request(
586605
url=base_url,
587606
params={"url": url},
588-
env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY},
607+
env={
608+
"SERPER_API_KEY": SERPER_API_KEY,
609+
"JINA_API_KEY": JINA_API_KEY,
610+
"SERPER_BASE_URL": SERPER_BASE_URL,
611+
"JINA_BASE_URL": JINA_BASE_URL,
612+
},
589613
)
590614
data = request_to_json(content)
591615
if "archived_snapshots" in data and "closest" in data["archived_snapshots"]:
@@ -664,7 +688,13 @@ async def scrape_website(url: str) -> str:
664688
"""
665689
# TODO: Long Content Handling
666690
return await smart_request(
667-
url, env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY}
691+
url,
692+
env={
693+
"SERPER_API_KEY": SERPER_API_KEY,
694+
"JINA_API_KEY": JINA_API_KEY,
695+
"SERPER_BASE_URL": SERPER_BASE_URL,
696+
"JINA_BASE_URL": JINA_BASE_URL,
697+
},
668698
)
669699

670700

src/tool/mcp_servers/utils/smart_request.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import urllib.parse
1414
from markitdown import MarkItDown
1515
import io
16+
from typing import Optional
17+
import os
1618

1719

1820
def request_to_json(content: str) -> dict:
@@ -30,13 +32,16 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
3032
if env:
3133
JINA_API_KEY = env.get("JINA_API_KEY", "")
3234
SERPER_API_KEY = env.get("SERPER_API_KEY", "")
35+
JINA_BASE_URL = env.get("JINA_BASE_URL", "https://r.jina.ai")
3336
else:
3437
JINA_API_KEY = ""
3538
SERPER_API_KEY = ""
3639

3740
if JINA_API_KEY == "" and SERPER_API_KEY == "":
3841
return "[ERROR]: JINA_API_KEY and SERPER_API_KEY are not set, smart_request is not available."
3942

43+
IS_MIRO_API = True if "miro" in JINA_BASE_URL else False
44+
4045
# Auto-add https:// if no protocol is specified
4146
protocol_hint = ""
4247
if not url.startswith(("http://", "https://")):
@@ -65,21 +70,24 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
6570
):
6671
youtube_hint = "[NOTE]: If you need to get information about its visual or audio content, please use tool 'visual_audio_youtube_analyzing' instead. This tool may not be able to provide visual and audio content of a YouTube Video.\n\n"
6772

68-
content, jina_err = await scrape_jina(url, JINA_API_KEY)
73+
content, jina_err = await scrape_jina(url, JINA_API_KEY, JINA_BASE_URL)
6974
if jina_err:
7075
error_msg += f"Failed to get content from Jina.ai: {jina_err}\n"
7176
elif content is None or content.strip() == "":
7277
error_msg += "No content got from Jina.ai.\n"
7378
else:
7479
return protocol_hint + youtube_hint + content
7580

76-
content, serper_err = await scrape_serper(url, SERPER_API_KEY)
77-
if serper_err:
78-
error_msg += f"Failed to get content from SERPER: {serper_err}\n"
79-
elif content is None or content.strip() == "":
80-
error_msg += "No content got from SERPER.\n"
81-
else:
82-
return protocol_hint + youtube_hint + content
81+
if not IS_MIRO_API:
82+
# Try Serper API for scraping if not using Miro API
83+
# (Miro API does not support caching Serper scraping results)
84+
content, serper_err = await scrape_serper(url, SERPER_API_KEY)
85+
if serper_err:
86+
error_msg += f"Failed to get content from SERPER: {serper_err}\n"
87+
elif content is None or content.strip() == "":
88+
error_msg += "No content got from SERPER.\n"
89+
else:
90+
return protocol_hint + youtube_hint + content
8391

8492
content, request_err = scrape_request(url)
8593
if request_err:
@@ -99,7 +107,9 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
99107
await asyncio.sleep(4**retry_count)
100108

101109

102-
async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]:
110+
async def scrape_jina(
111+
url: str, jina_api_key: str, jina_base_url: str
112+
) -> tuple[str, str]:
103113
# Use Jina.ai reader API to convert URL to LLM-friendly text
104114
if jina_api_key == "":
105115
return (
@@ -116,7 +126,7 @@ async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]:
116126
"X-With-Shadow-Dom": "true",
117127
}
118128

119-
jina_url = f"https://r.jina.ai/{url}"
129+
jina_url = f"{jina_base_url}/{url}"
120130
try:
121131
response = requests.get(jina_url, headers=jina_headers, timeout=120)
122132
if response.status_code == 422:

0 commit comments

Comments
 (0)