Skip to content

Commit 26afa3d

Browse files
committed
Add engine generator and refresh engine schemas
1 parent 804ce87 commit 26afa3d

File tree

112 files changed

+1416
-990
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+1416
-990
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ COPY pyproject.toml /app/
88
COPY README.md /app/
99
COPY src /app/src
1010
COPY engines /app/engines
11+
COPY build-engines.py /app/build-engines.py
1112

1213
RUN uv sync
1314

1415
ENV PATH="/app/.venv/bin:$PATH"
1516

17+
RUN python /app/build-engines.py
18+
1619
EXPOSE 8000
1720

1821
CMD ["python", "src/server.py"]

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ uv sync && uv run src/server.py
102102
# Docker
103103
docker build -t serpapi-mcp . && docker run -p 8000:8000 serpapi-mcp
104104

105+
# Regenerate engine resources (Playground scrape)
106+
python build-engines.py
107+
105108
# Testing with MCP Inspector
106109
npx @modelcontextprotocol/inspector
107110
# Configure: URL mcp.serpapi.com/YOUR_KEY/mcp, Transport "Streamable HTTP transport"

build-engines.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env python3
2+
"""Build SerpApi engine parameter data for MCP usage."""
3+
4+
from __future__ import annotations
5+
6+
import html
7+
import json
8+
from pathlib import Path
9+
from urllib.request import Request, urlopen
10+
11+
from bs4 import BeautifulSoup
12+
from markdownify import markdownify
13+
14+
PLAYGROUND_URL = "https://serpapi.com/playground"
15+
EXCLUDED_ENGINES = {
16+
"google_scholar_profiles",
17+
"google_light_fast",
18+
"google_lens_image_sources",
19+
}
20+
PARAM_KEEP_KEYS = {"html", "type", "options", "required"}
21+
OUTPUT_DIR = Path("engines")
22+
TIMEOUT_SECONDS = 30
23+
USER_AGENT = "Mozilla/5.0"
24+
25+
26+
def html_to_markdown(value: str) -> str:
27+
"""Convert HTML to markdown, normalizing whitespace."""
28+
md = markdownify(html.unescape(value), strip=["a"])
29+
return " ".join(md.split())
30+
31+
32+
33+
def normalize_options(options: list[object]) -> list[object]:
34+
"""Normalize option values, simplifying [value, label] pairs where possible."""
35+
normalized = []
36+
for option in options:
37+
if isinstance(option, list) and option:
38+
value = option[0]
39+
label = option[1] if len(option) > 1 else None
40+
if label is not None and (isinstance(value, (int, float)) or (isinstance(value, str) and value.isdigit())) and value != label:
41+
normalized.append(option)
42+
else:
43+
normalized.append(value)
44+
else:
45+
normalized.append(option)
46+
return normalized
47+
48+
49+
def fetch_props(url: str) -> dict[str, object]:
50+
"""Fetch playground HTML and extract React props."""
51+
req = Request(url, headers={"User-Agent": USER_AGENT})
52+
with urlopen(req, timeout=TIMEOUT_SECONDS) as resp:
53+
page_html = resp.read().decode("utf-8", errors="ignore")
54+
soup = BeautifulSoup(page_html, "html.parser")
55+
node = soup.find(attrs={"data-react-props": True})
56+
if not node:
57+
raise RuntimeError("Failed to locate data-react-props in playground HTML.")
58+
return json.loads(html.unescape(node["data-react-props"]))
59+
60+
61+
def normalize_engine(engine: str, payload: dict[str, object]) -> dict[str, object]:
62+
"""Normalize engine payload, extracting relevant parameter metadata."""
63+
normalized_params: dict[str, dict[str, object]] = {}
64+
common_params: dict[str, dict[str, object]] = {}
65+
if isinstance(payload, dict):
66+
for group_name, group in payload.items():
67+
if not isinstance(group, dict):
68+
continue
69+
if not isinstance(params := group.get("parameters"), dict):
70+
continue
71+
for param_name, param in params.items():
72+
if not isinstance(param, dict):
73+
continue
74+
filtered = {k: v for k, v in param.items() if k in PARAM_KEEP_KEYS}
75+
if isinstance(options := filtered.get("options"), list):
76+
filtered["options"] = normalize_options(options)
77+
if isinstance(html_value := filtered.pop("html", None), str):
78+
filtered["description"] = html_to_markdown(html_value)
79+
if filtered:
80+
filtered["group"] = group_name
81+
if group_name == "serpapi_parameters":
82+
common_params[param_name] = filtered
83+
else:
84+
normalized_params[param_name] = filtered
85+
86+
return {"engine": engine, "params": normalized_params, "common_params": common_params}
87+
88+
89+
def main() -> int:
90+
"""Main entry point: fetch playground data and generate engine files."""
91+
props = fetch_props(PLAYGROUND_URL)
92+
if not isinstance(params := props.get("parameters"), dict):
93+
raise RuntimeError("Playground props missing 'parameters' map.")
94+
95+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
96+
engines = []
97+
98+
for engine, payload in sorted(params.items()):
99+
if not isinstance(engine, str) or engine in EXCLUDED_ENGINES:
100+
continue
101+
if not isinstance(payload, dict):
102+
continue
103+
(OUTPUT_DIR / f"{engine}.json").write_text(
104+
json.dumps(normalize_engine(engine, payload), indent=2), encoding="utf-8"
105+
)
106+
engines.append(engine)
107+
108+
print(f"Wrote {len(engines)} engine files to {OUTPUT_DIR}")
109+
return 0
110+
111+
112+
if __name__ == "__main__":
113+
raise SystemExit(main())

engines/amazon.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
"amazon.com|es_US",
8989
"amazon.com.mx|pt_MX"
9090
],
91-
"description": "Parameter defines the language to use for the Amazon search. It's a locale name represented as _. (e.g., on amazon.com `en_US` for English, `es_US` for Spanish, or on amazon.co.jp `ja_JP` for Japanese). Head to Amazon languages for a full list of supported Amazon languages.",
91+
"description": "Parameter defines the language to use for the Amazon search. It's a locale name represented as \\_. (e.g., on amazon.com `en_US` for English, `es_US` for Spanish, or on amazon.co.jp `ja_JP` for Japanese). Head to Amazon languages for a full list of supported Amazon languages.",
9292
"group": "localization"
9393
},
9494
"delivery_zip": {
@@ -358,7 +358,7 @@
358358
"group": "advanced_filters"
359359
},
360360
"rh": {
361-
"description": "Parameter defines items filtering based on their attributes. The structure is a list of `key:value` pairs separated by `,`. For example `n:16318031,p_n_cpf_eligible:21512497011,p_72:1248897011` to filter for products in Coffee department (`n:16318031`) that are Climate Pledge Friendly (`p_n_cpf_eligible:21512497011`) and rated 4 Stars & Up (`p_72:1248897011`).",
361+
"description": "Parameter defines items filtering based on their attributes. The structure is a list of `key:value` pairs separated by `,`. For example `n:16318031,p_n_cpf_eligible:21512497011,p_72:1248897011` to filter for products in **Coffee** department (`n:16318031`) that are **Climate Pledge Friendly** (`p_n_cpf_eligible:21512497011`) and rated **4 Stars & Up** (`p_72:1248897011`).",
362362
"group": "advanced_filters"
363363
},
364364
"dc": {
@@ -390,11 +390,11 @@
390390
},
391391
"no_cache": {
392392
"type": "checkbox",
393-
"description": "Parameter will force SerpApi to fetch the Amazon results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no_cache and async parameters should not be used together.",
393+
"description": "Parameter will force SerpApi to fetch the Amazon results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no\\_cache and async parameters should not be used together.",
394394
"group": "serpapi_parameters"
395395
},
396396
"async": {
397-
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
397+
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no\\_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
398398
"group": "serpapi_parameters"
399399
},
400400
"zero_trace": {

engines/amazon_product.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
"amazon.com|es_US",
8989
"amazon.com.mx|pt_MX"
9090
],
91-
"description": "Parameter defines the language to use for the Amazon search. It's a locale name represented as _. (e.g., on amazon.com `en_US` for English, `es_US` for Spanish, or on amazon.co.jp `ja_JP` for Japanese). Head to Amazon languages for a full list of supported Amazon languages.",
91+
"description": "Parameter defines the language to use for the Amazon search. It's a locale name represented as \\_. (e.g., on amazon.com `en_US` for English, `es_US` for Spanish, or on amazon.co.jp `ja_JP` for Japanese). Head to Amazon languages for a full list of supported Amazon languages.",
9292
"group": "localization"
9393
},
9494
"delivery_zip": {
@@ -364,11 +364,11 @@
364364
},
365365
"no_cache": {
366366
"type": "checkbox",
367-
"description": "Parameter will force SerpApi to fetch the Amazon Product results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no_cache and async parameters should not be used together.",
367+
"description": "Parameter will force SerpApi to fetch the Amazon Product results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no\\_cache and async parameters should not be used together.",
368368
"group": "serpapi_parameters"
369369
},
370370
"async": {
371-
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
371+
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no\\_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
372372
"group": "serpapi_parameters"
373373
},
374374
"zero_trace": {

engines/apple_app_store.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@
406406
"Weather - 6001"
407407
]
408408
],
409-
"description": "Parameter allows to only show app results for a specific category, or genre. E.g. category_id=`6014` will only return apps that have \"Games\" as at least one of their categories, or genres. Head to the Apple Categories for a full list of supported Apple Categories/Genres.",
409+
"description": "Parameter allows to only show app results for a specific category, or genre. E.g. category\\_id=`6014` will only return apps that have \"Games\" as at least one of their categories, or genres. Head to the Apple Categories for a full list of supported Apple Categories/Genres.",
410410
"group": "advanced_parameters"
411411
}
412412
},
@@ -428,11 +428,11 @@
428428
},
429429
"no_cache": {
430430
"type": "checkbox",
431-
"description": "Parameter will force SerpApi to fetch the Apple App Store results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no_cache and async parameters should not be used together.",
431+
"description": "Parameter will force SerpApi to fetch the Apple App Store results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no\\_cache and async parameters should not be used together.",
432432
"group": "serpapi_parameters"
433433
},
434434
"async": {
435-
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
435+
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no\\_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
436436
"group": "serpapi_parameters"
437437
},
438438
"zero_trace": {

engines/apple_product.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,11 @@
187187
},
188188
"no_cache": {
189189
"type": "checkbox",
190-
"description": "Parameter will force SerpApi to fetch the Apple Product results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no_cache and async parameters should not be used together.",
190+
"description": "Parameter will force SerpApi to fetch the Apple Product results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no\\_cache and async parameters should not be used together.",
191191
"group": "serpapi_parameters"
192192
},
193193
"async": {
194-
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
194+
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no\\_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
195195
"group": "serpapi_parameters"
196196
},
197197
"zero_trace": {

engines/apple_reviews.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@
184184
"mostfavorable",
185185
"mostcritical"
186186
],
187-
"description": "Parameter is used for sorting reviews for the iOS App Store (iPhone and iPad). It can be set to: `mostrecent`: Most recent (default), `mosthelpful`: Most helpful, `mostfavorable`: Most favorable, `mostcritical`: Most critical This parameter has no effect on the macOS App Store. Reviews from the macOS App Store will always be sorted from most recent to least recent.",
187+
"description": "Parameter is used for sorting reviews for the iOS App Store (iPhone and iPad). It can be set to:",
188188
"group": "advanced_apple_reviews_parameters"
189189
}
190190
},
@@ -196,11 +196,11 @@
196196
},
197197
"no_cache": {
198198
"type": "checkbox",
199-
"description": "Parameter will force SerpApi to fetch the Apple Reviews results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no_cache and async parameters should not be used together.",
199+
"description": "Parameter will force SerpApi to fetch the Apple Reviews results even if a cached version is already present. A cache is served only if the query and all parameters are exactly the same. Cache expires after 1h. Cached searches are free, and are not counted towards your searches per month. It can be set to `false` (default) to allow results from the cache, or `true` to disallow results from the cache. no\\_cache and async parameters should not be used together.",
200200
"group": "serpapi_parameters"
201201
},
202202
"async": {
203-
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
203+
"description": "Parameter defines the way you want to submit your search to SerpApi. It can be set to `false` (default) to open an HTTP connection and keep it open until you got your search results, or `true` to just submit your search to SerpApi and retrieve them later. In this case, you'll need to use our Searches Archive API to retrieve your results. async and no\\_cache parameters should not be used together. async should not be used on accounts with Ludicrous Speed enabled.",
204204
"group": "serpapi_parameters"
205205
},
206206
"zero_trace": {

0 commit comments

Comments
 (0)