Skip to content

Commit ff29989

Browse files
john-walkoeclaude
andcommitted
feat: bulk trial lookup + server-side sort for documents
search_trials_minimal: - trial_number now accepts list[str] for bulk lookup (OR semantics) - Auto-chunks lists > 100 into sequential API calls (USPTO hard cap) and merges results transparently — caller passes one list, gets one response regardless of size - query_info reports bulk_lookup, input_count, matched_count, chunks_used, and truncated flag when count < input_count - FilterBuilder.add_if updated to accept str | list[str] ptab_get_documents (trials): - Server-side sort now active: POST body includes sort: [{field: documentData.documentFilingDate, order: asc|desc}] - sort_order='asc' + offset=0 now reliably returns oldest documents (Petition, POPR, Institution Decision) for any proceeding size - Offset and limit remain server-side via POST pagination Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent cf6b2c0 commit ff29989

File tree

3 files changed

+112
-49
lines changed

3 files changed

+112
-49
lines changed

src/ptab_mcp/api/ptab_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,8 @@ async def search_trial_documents(
451451
try:
452452
body = {
453453
"filters": [{"name": "trialNumber", "value": [trial_number]}],
454-
"pagination": {"offset": offset, "limit": limit}
455-
# sort omitted until field name is confirmed against live API;
456-
# sort_order is applied client-side in ptab_get_documents
454+
"pagination": {"offset": offset, "limit": limit},
455+
"sort": [{"field": "documentData.documentFilingDate", "order": sort_order}]
457456
}
458457
logger.debug(f"Document search request body: {json.dumps(body, indent=2)}")
459458
return await self._make_request(

src/ptab_mcp/main.py

Lines changed: 96 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from .shared.safe_logger import get_safe_logger
4141
import json
4242
import logging
43-
from typing import Optional, List
43+
from typing import Optional, List, Union
4444
from datetime import datetime
4545
from pathlib import Path
4646
import os
@@ -188,7 +188,7 @@ def get_local_proxy_port() -> int:
188188

189189
@mcp.tool()
190190
async def search_trials_minimal(
191-
trial_number: Optional[str] = None,
191+
trial_number: Optional[Union[str, List[str]]] = None,
192192
patent_number: Optional[str] = None,
193193
petitioner_name: Optional[str] = None,
194194
patent_owner_name: Optional[str] = None,
@@ -245,7 +245,9 @@ async def search_trials_minimal(
245245
- For cost optimization: ptab_get_guidance(section='cost')
246246
247247
Args:
248-
trial_number: Trial number (IPR2024-00123, PGR2025-00045, CBM2023-00001)
248+
trial_number: Single trial number (IPR2024-00123) OR list for bulk lookup
249+
(["IPR2024-00123", "IPR2024-00965", ...] up to 200).
250+
Bulk list executes as a single API call (OR semantics).
249251
patent_number: Patent number (8524787, US8524787, etc.)
250252
petitioner_name: Petitioner party name (e.g., "Apple Inc")
251253
patent_owner_name: Patent owner name (e.g., "Samsung Electronics")
@@ -259,7 +261,8 @@ async def search_trials_minimal(
259261
Examples: ["trialNumber", "trialMetaData.trialStatusCategory"]
260262
If not provided, uses predefined "trials_minimal" field set.
261263
NOTE: documentBag fields are forbidden (use ptab_get_documents instead)
262-
limit: Maximum results (default 50, max 100)
264+
limit: Maximum results (default 50). Normal max: 100. Bulk lookup max: 200.
265+
Auto-raised to len(trial_number) when a list is passed.
263266
264267
Returns:
265268
JSON string with filtered trial data (minimal or custom field set)
@@ -276,8 +279,15 @@ async def search_trials_minimal(
276279
api_client = get_api_client()
277280

278281
# Validate inputs
282+
bulk_lookup = False
279283
if trial_number:
280-
trial_number = validate_trial_number(trial_number)
284+
if isinstance(trial_number, list):
285+
if len(trial_number) > 200:
286+
raise ValueError("trial_number list exceeds maximum of 200 entries")
287+
trial_number = [validate_trial_number(tn) for tn in trial_number]
288+
bulk_lookup = len(trial_number) > 1
289+
else:
290+
trial_number = validate_trial_number(trial_number)
281291

282292
if patent_number:
283293
patent_number = validate_patent_number(patent_number)
@@ -294,7 +304,13 @@ async def search_trials_minimal(
294304
if trial_type:
295305
trial_type = validate_trial_type(trial_type)
296306

297-
limit = validate_limit(limit, max_limit=100)
307+
# For bulk lookups, auto-chunking handles lists > 100 transparently.
308+
# The per-chunk limit is always 100 (USPTO API hard cap).
309+
# For single-value queries, enforce the normal 100 ceiling.
310+
if bulk_lookup:
311+
limit = 100 # each chunk uses this; total results = chunks × matches
312+
else:
313+
limit = validate_limit(limit, max_limit=100)
298314

299315
# Build filters using FilterBuilder pattern
300316
from .util.filter_builder import FilterBuilder
@@ -322,39 +338,81 @@ async def search_trials_minimal(
322338
field_list = field_manager.get_fields("trials_minimal")
323339
field_set_name = "trials_minimal"
324340

325-
# Make API call
326-
raw_response = await api_client.search_trials(
327-
filters=filters if filters else None,
328-
range_filters=range_filters if range_filters else None,
329-
pagination={"offset": 0, "limit": limit},
330-
fields=field_list
331-
)
341+
# Make API call — auto-chunk when list exceeds USPTO's 100-row hard limit.
342+
# Chunks are sequential (USPTO burst=1); results are merged transparently.
343+
API_CHUNK_SIZE = 100
344+
chunks_used = 1
345+
346+
if bulk_lookup and len(trial_number) > API_CHUNK_SIZE:
347+
chunks = [
348+
trial_number[i:i + API_CHUNK_SIZE]
349+
for i in range(0, len(trial_number), API_CHUNK_SIZE)
350+
]
351+
merged_bag = []
352+
merged_count = 0
353+
354+
for chunk in chunks:
355+
chunk_filters, _ = (FilterBuilder()
356+
.add_if(Fields.TRIAL_NUMBER, chunk)
357+
.add_if(Fields.PATENT_NUMBER, patent_number)
358+
.add_if(Fields.PETITIONER_NAME, petitioner_name)
359+
.add_if(Fields.PATENT_OWNER_NAME, patent_owner_name)
360+
.add_if(Fields.TRIAL_TYPE, trial_type)
361+
.add_if(Fields.TRIAL_STATUS, trial_status)
362+
.add_if(Fields.TECH_CENTER, tech_center)
363+
.build())
364+
365+
chunk_resp = await api_client.search_trials(
366+
filters=chunk_filters if chunk_filters else None,
367+
range_filters=range_filters if range_filters else None,
368+
pagination={"offset": 0, "limit": API_CHUNK_SIZE},
369+
fields=field_list
370+
)
371+
372+
if chunk_resp.get("error"):
373+
return json.dumps(chunk_resp, indent=2)
374+
375+
merged_bag.extend(chunk_resp.get("patentTrialProceedingDataBag", []))
376+
merged_count += chunk_resp.get("count", 0)
377+
378+
raw_response = {"patentTrialProceedingDataBag": merged_bag, "count": merged_count}
379+
chunks_used = len(chunks)
380+
else:
381+
raw_response = await api_client.search_trials(
382+
filters=filters if filters else None,
383+
range_filters=range_filters if range_filters else None,
384+
pagination={"offset": 0, "limit": limit},
385+
fields=field_list
386+
)
332387

333388
# Check for API error
334389
if raw_response.get("error"):
335390
return json.dumps(raw_response, indent=2)
336391

337392
# Filter response (custom fields vs predefined set)
338393
if fields:
339-
# Custom fields - use filter_response_custom()
340-
filtered_response = field_manager.filter_response_custom(
341-
raw_response,
342-
fields
343-
)
394+
filtered_response = field_manager.filter_response_custom(raw_response, fields)
344395
else:
345-
# Predefined tier - use standard filtering
346-
filtered_response = field_manager.filter_response(
347-
raw_response,
348-
field_set_name
349-
)
396+
filtered_response = field_manager.filter_response(raw_response, field_set_name)
350397

351398
# Format for output
399+
extra_query_info = {}
400+
if bulk_lookup:
401+
input_count = len(trial_number)
402+
matched_count = filtered_response.get("count", 0)
403+
extra_query_info["bulk_lookup"] = True
404+
extra_query_info["input_count"] = input_count
405+
extra_query_info["matched_count"] = matched_count
406+
extra_query_info["chunks_used"] = chunks_used
407+
if matched_count < input_count:
408+
extra_query_info["truncated"] = True
352409
formatted = format_trial_response(
353410
trials=filtered_response.get("patentTrialProceedingDataBag", []),
354411
query_info=create_query_info(
355412
filters=filters,
356413
range_filters=range_filters,
357-
pagination={"offset": 0, "limit": limit}
414+
pagination={"offset": 0, "limit": limit},
415+
**extra_query_info
358416
),
359417
field_set=field_set_name,
360418
context_info=filtered_response.get("context_info"),
@@ -768,16 +826,17 @@ async def ptab_get_documents(
768826
769827
**limit** - Max documents to return (default: 50, max: 200). Applied AFTER filtering.
770828
771-
**offset** - Skip the first N documents after sorting (default: 0, client-side).
772-
Applied after sort_order so results are consistent.
773-
Example: sort_order='asc', offset=5, limit=10 → documents 6-15 oldest-first.
829+
**offset** - Skip the first N documents (default: 0).
830+
For trials: server-side — sent directly to the POST search endpoint.
831+
Example: sort_order='asc', offset=25, limit=25 → documents 26-50 oldest-first.
774832
775-
**sort_order** - Sort direction applied client-side to the API response (default: "desc"):
776-
- "desc": Newest first (default — same as previous behavior)
833+
**sort_order** - Sort direction (default: "desc"):
834+
- "desc": Newest first (default)
777835
- "asc": Oldest first — surfaces the Petition, POPR, Institution Decision,
778-
and early exhibits which the API returns last in default order.
779-
NOTE: The USPTO documents endpoint does not support server-side sort/pagination
780-
query params. sort_order and offset operate on whatever the API returns (~25 docs).
836+
and early exhibits filed at the beginning of the proceeding.
837+
For trials: sort is server-side (documentData.documentFilingDate), so offset=0
838+
with sort_order='asc' reliably returns the oldest documents (Petition, etc.).
839+
For appeals/interferences: sort is client-side on whatever the GET endpoint returns.
781840
782841
RETRIEVING EARLY DOCUMENTS (Petition, POPR, Institution Decision):
783842
# Oldest documents first — Petition, POPR, early exhibits
@@ -895,9 +954,9 @@ async def ptab_get_documents(
895954
identifier: Trial number (IPR2024-00123), appeal number (2024-001234), or interference number
896955
identifier_type: Type of proceeding - "trial" (default), "appeal", or "interference"
897956
limit: Max documents to return (default: 50, max: 200)
898-
offset: Skip first N documents after sorting (client-side, default: 0).
899-
sort_order: Client-side sort direction - "desc" (newest first, default) or "asc" (oldest first).
900-
Use "asc" to surface the Petition and earliest filings first.
957+
offset: Skip first N documents (default: 0). Server-side for trials, client-side for appeals/interferences.
958+
sort_order: Sort direction - "desc" (newest first, default) or "asc" (oldest first).
959+
Server-side for trials (by documentFilingDate); client-side for appeals/interferences.
901960
document_title: Case-insensitive substring match on documentTypeDescriptionText.
902961
Use to target specific document types, e.g. 'Final Written Decision',
903962
'Institution Decision', 'Petition for Inter Partes', 'Patent Owner Response'.
@@ -1077,8 +1136,8 @@ async def ptab_get_documents(
10771136
]
10781137
filters_applied["outcome_category"] = outcome_category
10791138

1080-
# Sort client-side (server-side sort omitted until field name is confirmed).
1081-
# For trials: offset/limit are server-side; sort is client-side on returned page.
1139+
# Sort: trials use server-side sort (documentData.documentFilingDate in POST body).
1140+
# Client-side sort here serves as a tiebreaker/fallback and handles appeals/interferences.
10821141
# For appeals/interferences: offset/limit/sort are all client-side.
10831142
def _sort_key(doc):
10841143
return doc.get("documentFilingDate") or doc.get("lastModifiedDateTime") or ""

src/ptab_mcp/util/filter_builder.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
Implements the Builder Pattern with method chaining for clean, readable code.
66
"""
77

8-
from typing import List, Dict, Any, Optional
8+
from typing import List, Dict, Any, Optional, Union
99

1010

1111
class FilterBuilder:
@@ -48,28 +48,33 @@ def __init__(self):
4848
self._filters: List[Dict[str, Any]] = []
4949
self._range_filters: List[Dict[str, Any]] = []
5050

51-
def add_if(self, field_name: str, value: Optional[str]) -> 'FilterBuilder':
51+
def add_if(self, field_name: str, value: Optional[Union[str, List[str]]]) -> 'FilterBuilder':
5252
"""
5353
Add exact-match filter only if value is not None (fluent interface).
54+
Accepts a single string or a list of strings (OR semantics in the API).
5455
5556
Args:
5657
field_name: API field name (e.g., "trialNumber", "patentOwnerData.patentNumber")
57-
value: Filter value (only added if not None or empty string)
58+
value: Filter value — string or list of strings (only added if not None/empty)
5859
5960
Returns:
6061
Self for method chaining
6162
6263
Example:
6364
>>> builder = FilterBuilder()
64-
>>> builder.add_if("trialNumber", "IPR2024-00123") # Added
65+
>>> builder.add_if("trialNumber", "IPR2024-00123") # Single value
66+
>>> builder.add_if("trialNumber", ["IPR2024-00123", "IPR2024-00965"]) # Bulk OR
6567
>>> builder.add_if("patentNumber", None) # Skipped
6668
>>> builder.add_if("petitioner", "") # Skipped
6769
"""
68-
if value is not None and value != "":
69-
self._filters.append({
70-
"name": field_name,
71-
"value": [value]
72-
})
70+
if value is None or value == "" or value == []:
71+
return self
72+
if isinstance(value, list):
73+
values = [v for v in value if v]
74+
if values:
75+
self._filters.append({"name": field_name, "value": values})
76+
else:
77+
self._filters.append({"name": field_name, "value": [value]})
7378
return self
7479

7580
def add_range_if(

0 commit comments

Comments
 (0)