Skip to content

Commit 26acb09

Browse files
committed
Add test for non-seekable content
1 parent 8978272 commit 26acb09

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

tests/test_pdfparser.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,56 @@ async def mock_poller_result():
253253
assert captured_bodies[0].bytes_source == b"pdf content bytes"
254254

255255

256+
@pytest.mark.asyncio
257+
async def test_parse_with_non_seekable_stream(monkeypatch):
258+
mock_poller = MagicMock()
259+
captured_bodies: list[AnalyzeDocumentRequest] = []
260+
261+
async def mock_begin_analyze_document(self, model_id, **kwargs):
262+
captured_bodies.append(kwargs["body"])
263+
return mock_poller
264+
265+
async def mock_poller_result():
266+
return AnalyzeResult(
267+
content="Page content",
268+
pages=[DocumentPage(page_number=1, spans=[DocumentSpan(offset=0, length=12)])],
269+
tables=[],
270+
figures=[],
271+
)
272+
273+
monkeypatch.setattr(DocumentIntelligenceClient, "begin_analyze_document", mock_begin_analyze_document)
274+
monkeypatch.setattr(mock_poller, "result", mock_poller_result)
275+
276+
class NonSeekableStream:
277+
def __init__(self, data: bytes, name: str):
278+
self._data = data
279+
self._name = name
280+
self._consumed = False
281+
282+
@property
283+
def name(self) -> str: # type: ignore[override]
284+
return self._name
285+
286+
def read(self) -> bytes:
287+
return self._data
288+
289+
parser = DocumentAnalysisParser(
290+
endpoint="https://example.com",
291+
credential=MockAzureCredential(),
292+
)
293+
294+
stream = NonSeekableStream(b"pdf content bytes", "nonseekable.pdf")
295+
pages = [page async for page in parser.parse(stream)]
296+
297+
assert len(pages) == 1
298+
assert pages[0].page_num == 0
299+
assert pages[0].offset == 0
300+
assert pages[0].text == "Page content"
301+
assert len(captured_bodies) == 1
302+
assert isinstance(captured_bodies[0], AnalyzeDocumentRequest)
303+
assert captured_bodies[0].bytes_source == b"pdf content bytes"
304+
305+
256306
@pytest.mark.asyncio
257307
async def test_parse_doc_with_tables(monkeypatch):
258308
mock_poller = MagicMock()

0 commit comments

Comments
 (0)