Skip to content

Commit 8a3d218

Browse files
committed
fix: handle nested response structure in DocSplitterClient status polling
- Extract status from nested 'data' structure in wait_for_completion - Support both uppercase and lowercase status values - Add comprehensive test for nested response format - Fixes infinite polling issue with real doc-splitter API
1 parent efa4076 commit 8a3d218

File tree

2 files changed

+90
-5
lines changed

2 files changed

+90
-5
lines changed

src/apihub_client/doc_splitter.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,16 @@ def upload(
6767
self.logger.debug("Request Headers Sent: %s", response.request.headers)
6868
self.logger.debug("Request URL: %s", response.request.url)
6969

70-
if response.status_code != 200:
70+
if response.status_code not in [200, 202]:
7171
self.logger.error("Upload failed: %s", response.text)
7272
raise ApiHubClientException(response.text, response.status_code)
7373

7474
data = response.json()
75-
job_id = data.get("job_id")
75+
# Extract job_id from the nested data structure
76+
if "data" in data and isinstance(data["data"], dict):
77+
job_id = data["data"].get("job_id")
78+
else:
79+
job_id = data.get("job_id")
7680
self.logger.info("Upload completed successfully. Job ID: %s", job_id)
7781

7882
# If wait_for_completion is True, poll for status and return final result
@@ -171,13 +175,17 @@ def wait_for_completion(
171175

172176
while time.time() - start_time < timeout:
173177
status_result = self.get_job_status(job_id)
174-
status = status_result.get("status")
178+
# Extract status from nested data structure
179+
if "data" in status_result and isinstance(status_result["data"], dict):
180+
status = status_result["data"].get("status")
181+
else:
182+
status = status_result.get("status")
175183
self.logger.info("Current status: %s", status)
176184

177-
if status == "COMPLETED":
185+
if status and status.upper() == "COMPLETED":
178186
self.logger.info("Processing completed")
179187
return status_result
180-
elif status == "FAILED":
188+
elif status and status.upper() == "FAILED":
181189
self.logger.error("Processing failed")
182190
raise ApiHubClientException(
183191
f"Processing failed for job_id: {job_id}",

test/test_doc_splitter.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,42 @@ def test_upload_success(self, client, mock_file_content):
5555
)
5656
assert request.headers["apikey"] == "test_api_key"
5757

58+
def test_upload_success_202_nested_response(self, client, mock_file_content):
59+
"""Test successful file upload with 202 status and nested response."""
60+
with requests_mock.Mocker() as m:
61+
# Mock successful upload response with 202 and nested data structure
62+
m.post(
63+
"http://localhost:8005/api/v1/doc-splitter/documents/upload",
64+
json={
65+
"data": {
66+
"filename": "test.pdf",
67+
"job_id": "93bbb7ab-3291-429c-8923-3b179e7ae5bf",
68+
"pages": 2,
69+
"size_bytes": 63580,
70+
"status": "queued",
71+
"upload_timestamp": "2025-08-23T04:28:32.424535Z",
72+
"user_limits": {
73+
"current_jobs": 1,
74+
"jobs_today": 3,
75+
"max_jobs_per_day": 5000,
76+
"max_parallel_jobs": 5,
77+
},
78+
},
79+
"request_id": "eb75ac1e-a224-4624-a4f2-6c813ddc2b3c",
80+
"success": True,
81+
"timestamp": "2025-08-23T04:28:32.424612",
82+
},
83+
status_code=202,
84+
)
85+
86+
with patch("builtins.open", mock_open(read_data=mock_file_content)):
87+
result = client.upload(file_path="/test/document.pdf")
88+
89+
# Verify we got the nested response structure
90+
assert result["data"]["job_id"] == "93bbb7ab-3291-429c-8923-3b179e7ae5bf"
91+
assert result["data"]["status"] == "queued"
92+
assert result["success"] is True
93+
5894
def test_upload_file_not_found(self, client):
5995
"""Test upload with non-existent file."""
6096
with patch("builtins.open", side_effect=FileNotFoundError("File not found")):
@@ -317,6 +353,47 @@ def test_wait_for_completion_failed_status(self, client):
317353

318354
assert "Processing failed for job_id: test-job" in exc_info.value.message
319355

356+
def test_wait_for_completion_nested_response(self, client):
357+
"""Test wait_for_completion with nested response structure."""
358+
with requests_mock.Mocker() as m:
359+
# Mock status responses with nested structure (processing, then completed)
360+
m.get(
361+
"http://localhost:8005/api/v1/doc-splitter/jobs/status?job_id=test-nested",
362+
[
363+
{
364+
"json": {
365+
"data": {
366+
"status": "processing",
367+
"job_id": "test-nested",
368+
"current_step": "page_image_gen",
369+
},
370+
"success": True,
371+
},
372+
"status_code": 200,
373+
},
374+
{
375+
"json": {
376+
"data": {
377+
"status": "completed",
378+
"job_id": "test-nested",
379+
"finished_at": "2025-08-23T04:40:00.000000Z",
380+
},
381+
"success": True,
382+
},
383+
"status_code": 200,
384+
},
385+
],
386+
)
387+
388+
with patch("time.sleep") as mock_sleep:
389+
result = client.wait_for_completion("test-nested", polling_interval=1)
390+
391+
# Should successfully complete and return the nested structure
392+
assert result["data"]["status"] == "completed"
393+
assert result["data"]["job_id"] == "test-nested"
394+
assert result["success"] is True
395+
mock_sleep.assert_called_with(1)
396+
320397
def test_logging_output(self, client, caplog, mock_file_content):
321398
"""Test that appropriate logging messages are generated."""
322399
with requests_mock.Mocker() as m:

0 commit comments

Comments
 (0)