Skip to content

Commit d955134

Browse files
AadityaBhootaAaditya Bhoota
andauthored
feat: Adding Facet Filtering to search_documentation (#1883)
* Initial commit to filter by facets * updating test cases * Simplifying prompt * fixing typo * Updating prompt to be more accurate * Minor prompt improvements * updating tests and edge cases * updating tests * addressing PR comments * addressing PR comments * Minor test case update to be more robust --------- Co-authored-by: Aaditya Bhoota <[email protected]>
1 parent d3656c2 commit d955134

File tree

9 files changed

+433
-113
lines changed

9 files changed

+433
-113
lines changed

src/aws-documentation-mcp-server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ read_documentation(url: str) -> str
171171
Searches AWS documentation using the official AWS Documentation Search API.
172172

173173
```python
174-
search_documentation(search_phrase: str, limit: int) -> list[dict]
174+
search_documentation(ctx: Context, search_phrase: str, limit: int, product_types: Optional[List[str]], guide_types: Optional[List[str]]) -> SearchResponse
175175
```
176176

177177
### recommend (global only)

src/aws-documentation-mcp-server/awslabs/aws_documentation_mcp_server/models.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"""Data models for AWS Documentation MCP Server."""
1515

1616
from pydantic import BaseModel
17-
from typing import Optional
17+
from typing import Dict, List, Optional
1818

1919

2020
class SearchResult(BaseModel):
@@ -23,10 +23,17 @@ class SearchResult(BaseModel):
2323
rank_order: int
2424
url: str
2525
title: str
26-
query_id: str
2726
context: Optional[str] = None
2827

2928

29+
class SearchResponse(BaseModel):
30+
"""Complete search response including results and facets."""
31+
32+
search_results: List[SearchResult]
33+
facets: Optional[Dict[str, List[str]]] = None
34+
query_id: str
35+
36+
3037
class RecommendationResult(BaseModel):
3138
"""Recommendation result from AWS documentation."""
3239

src/aws-documentation-mcp-server/awslabs/aws_documentation_mcp_server/server_aws.py

Lines changed: 73 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# Import models
2222
from awslabs.aws_documentation_mcp_server.models import (
2323
RecommendationResult,
24+
SearchResponse,
2425
SearchResult,
2526
)
2627
from awslabs.aws_documentation_mcp_server.server_utils import (
@@ -36,7 +37,7 @@
3637
from loguru import logger
3738
from mcp.server.fastmcp import Context, FastMCP
3839
from pydantic import Field
39-
from typing import List
40+
from typing import List, Optional
4041

4142

4243
SEARCH_API_URL = 'https://proxy.search.docs.aws.amazon.com/search'
@@ -170,7 +171,15 @@ async def search_documentation(
170171
ge=1,
171172
le=50,
172173
),
173-
) -> List[SearchResult]:
174+
product_types: Optional[List[str]] = Field(
175+
default=None,
176+
description='Filter results by AWS product/service (e.g., ["Amazon Simple Storage Service"])',
177+
),
178+
guide_types: Optional[List[str]] = Field(
179+
default=None,
180+
description='Filter results by guide type (e.g., ["User Guide", "API Reference", "Developer Guide"])',
181+
),
182+
) -> SearchResponse:
174183
"""Search AWS documentation using the official AWS Documentation Search API.
175184
176185
## Usage
@@ -184,22 +193,34 @@ async def search_documentation(
184193
- Include service names to narrow results (e.g., "S3 bucket versioning" instead of just "versioning")
185194
- Use quotes for exact phrase matching (e.g., "AWS Lambda function URLs")
186195
- Include abbreviations and alternative terms to improve results
196+
- Use guide_type and product_type filters found from a SearchResponse's "facets" property:
197+
- Filter only for broad search queries with patterns:
198+
- "What is [service]?" -> product_types: ["Amazon Simple Storage Service"]
199+
- "How to use <service 1> with <service 2>?" -> product_types: [<service 1>, <service 2>]
200+
- "[service] getting started" -> product_types: [<service>] + guide_types: ["User Guide, "Developer Guide"]
201+
- "API reference for [service]" -> product_types: [<service>] + guide_types: ["API Reference"]
187202
188203
## Result Interpretation
189204
190-
Each result includes:
191-
- rank_order: The relevance ranking (lower is more relevant)
192-
- url: The documentation page URL
193-
- title: The page title
194-
- context: A brief excerpt or summary (if available)
205+
Each SearchResponse includes:
206+
- search_results: List of documentation pages, each with:
207+
- rank_order: The relevance ranking (lower is more relevant)
208+
- url: The documentation page URL
209+
- title: The page title
210+
- context: A brief excerpt or summary (if available)
211+
- facets: Available filters (product_types, guide_types) for refining searches
212+
- query_id: Unique identifier for this search session
213+
195214
196215
Args:
197216
ctx: MCP context for logging and error handling
198217
search_phrase: Search phrase to use
199218
limit: Maximum number of results to return
219+
product_types: Filter by AWS product/service
220+
guide_types: Filter by guide type
200221
201222
Returns:
202-
List of search results with URLs, titles, query ID, and context snippets
223+
List of search results with URLs, titles, query ID, context snippets, and facets for filtering
203224
"""
204225
logger.debug(f'Searching AWS documentation for: {search_phrase}')
205226

@@ -215,6 +236,18 @@ async def search_documentation(
215236
if any(term in search_phrase.lower() for term in modifier['terms']):
216237
request_body['contextAttributes'].extend(modifier['domains'])
217238

239+
# Add product and guide filters if provided
240+
if product_types:
241+
for product in product_types:
242+
request_body['contextAttributes'].append(
243+
{'key': 'aws-docs-search-product', 'value': product}
244+
)
245+
if guide_types:
246+
for guide in guide_types:
247+
request_body['contextAttributes'].append(
248+
{'key': 'aws-docs-search-guide', 'value': guide}
249+
)
250+
218251
search_url_with_session = f'{SEARCH_API_URL}?session={SESSION_UUID}'
219252

220253
async with httpx.AsyncClient() as client:
@@ -233,38 +266,45 @@ async def search_documentation(
233266
error_msg = f'Error searching AWS docs: {str(e)}'
234267
logger.error(error_msg)
235268
await ctx.error(error_msg)
236-
return [SearchResult(rank_order=1, url='', title=error_msg, query_id='', context=None)]
269+
return SearchResponse(
270+
search_results=[SearchResult(rank_order=1, url='', title=error_msg, context=None)],
271+
facets=None,
272+
query_id='',
273+
)
237274

238275
if response.status_code >= 400:
239276
error_msg = f'Error searching AWS docs - status code {response.status_code}'
240277
logger.error(error_msg)
241278
await ctx.error(error_msg)
242-
return [
243-
SearchResult(
244-
rank_order=1,
245-
url='',
246-
title=error_msg,
247-
query_id='',
248-
context=None,
249-
)
250-
]
279+
return SearchResponse(
280+
search_results=[SearchResult(rank_order=1, url='', title=error_msg, context=None)],
281+
facets=None,
282+
query_id='',
283+
)
251284

252285
try:
253286
data = response.json()
254-
query_id = data.get('queryId')
287+
query_id = data.get('queryId', '')
288+
raw_facets = data.get('facets', {})
289+
290+
# Parse facets to rename keys
291+
facets = {}
292+
if raw_facets:
293+
for key, value in raw_facets.items():
294+
if key == 'aws-docs-search-product':
295+
facets['product_types'] = value
296+
elif key == 'aws-docs-search-guide':
297+
facets['guide_types'] = value
298+
255299
except json.JSONDecodeError as e:
256300
error_msg = f'Error parsing search results: {str(e)}'
257301
logger.error(error_msg)
258302
await ctx.error(error_msg)
259-
return [
260-
SearchResult(
261-
rank_order=1,
262-
url='',
263-
title=error_msg,
264-
query_id='',
265-
context=None,
266-
)
267-
]
303+
return SearchResponse(
304+
search_results=[SearchResult(rank_order=1, url='', title=error_msg, context=None)],
305+
facets=None,
306+
query_id='',
307+
)
268308

269309
results = []
270310
if 'suggestions' in data:
@@ -290,15 +330,17 @@ async def search_documentation(
290330
rank_order=i + 1,
291331
url=text_suggestion.get('link', ''),
292332
title=text_suggestion.get('title', ''),
293-
query_id=query_id,
294333
context=context,
295334
)
296335
)
297336

298337
logger.debug(f'Found {len(results)} search results for: {search_phrase}')
299338
logger.debug(f'Search query ID: {query_id}')
300-
add_search_result_cache_item(results)
301-
return results
339+
final_search_response = SearchResponse(
340+
search_results=results, facets=facets if facets else None, query_id=query_id
341+
)
342+
add_search_result_cache_item(final_search_response)
343+
return final_search_response
302344

303345

304346
@mcp.tool()

src/aws-documentation-mcp-server/awslabs/aws_documentation_mcp_server/server_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
import httpx
1515
import os
16-
from awslabs.aws_documentation_mcp_server.models import SearchResult
16+
from awslabs.aws_documentation_mcp_server.models import SearchResponse
1717
from awslabs.aws_documentation_mcp_server.util import (
1818
extract_content_from_html,
1919
format_documentation_result,
@@ -105,20 +105,20 @@ async def read_documentation_impl(
105105
SEARCH_RESULT_CACHE = deque(maxlen=3)
106106

107107

108-
def add_search_result_cache_item(search_results: list[SearchResult]) -> None:
108+
def add_search_result_cache_item(search_response: SearchResponse) -> None:
109109
"""Adds list of SearchResult items to cache.
110110
111111
Add search results to the front of the cache, to ensure that
112112
the most recent query ID is ahead for duplicate URLs.
113113
114114
Args:
115-
search_results: List returned by the search_documentation tool
115+
search_response: SearchResponse object returned by the search_documentation tool
116116
117117
Returns:
118118
None; updates the global SEARCH_RESULT_CACHE
119119
120120
"""
121-
SEARCH_RESULT_CACHE.appendleft(search_results)
121+
SEARCH_RESULT_CACHE.appendleft(search_response)
122122

123123

124124
def get_query_id_from_cache(url: str) -> Optional[str]:
@@ -134,11 +134,11 @@ def get_query_id_from_cache(url: str) -> Optional[str]:
134134
Query ID of URL, or None
135135
136136
"""
137-
for _, search_results in enumerate(SEARCH_RESULT_CACHE):
138-
for search_result in search_results:
137+
for _, search_response in enumerate(SEARCH_RESULT_CACHE):
138+
for search_result in search_response.search_results:
139139
if search_result.url == url:
140140
# Sanitization of query_id just in case
141-
query_id = quote(search_result.query_id)
141+
query_id = quote(search_response.query_id)
142142
return query_id
143143

144144
return None

0 commit comments

Comments
 (0)