Skip to content

Commit dfa9f46

Browse files
committed
Update tests
1 parent 65e4341 commit dfa9f46

File tree

3 files changed

+133
-1
lines changed

3 files changed

+133
-1
lines changed

.github/workflows/ci-checks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,5 @@ jobs:
6060
working-directory: image_processing
6161

6262
- name: Run PyTest
63-
run: uv run pytest --cov=image_processing
63+
run: uv run pytest --cov=. --cov-config=.coveragerc
6464
working-directory: image_processing

image_processing/tests/image_processing/test_layout_analysis.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,29 @@ def __init__(self, content):
3737
self.content = content
3838

3939

40+
class DummyPoller:
41+
def __init__(self, result, operation_id):
42+
self._result = result
43+
self.details = {"operation_id": operation_id}
44+
45+
async def result(self):
46+
return self._result
47+
48+
49+
class DummyDocIntelligenceClient:
50+
async def __aenter__(self):
51+
return self
52+
53+
async def __aexit__(self, exc_type, exc, tb):
54+
pass
55+
56+
async def begin_analyze_document(self, **kwargs):
57+
# Create a dummy page spanning the first 5 characters.
58+
dummy_page = DummyPage(0, 5, 1)
59+
dummy_result = DummyResult("HelloWorld", pages=[dummy_page], figures=[])
60+
return DummyPoller(dummy_result, "dummy_op")
61+
62+
4063
class DummyFigure:
4164
def __init__(self, id, offset, length, page_number, caption_content):
4265
self.id = id # note: process_figures_from_extracted_content checks "if figure.id is None"
@@ -361,3 +384,54 @@ async def test_process_layout_analysis_missing_source():
361384
assert result["data"] is None
362385
assert result["errors"] is not None
363386
assert "Pass a valid source" in result["errors"][0]["message"]
387+
388+
389+
@pytest.mark.asyncio
390+
async def test_analyse_document_success(monkeypatch, tmp_path):
391+
# Create a temporary file with dummy content.
392+
tmp_file = tmp_path / "dummy.txt"
393+
tmp_file.write_bytes(b"dummy content")
394+
395+
la = LayoutAnalysis(
396+
record_id=999,
397+
source="https://dummyaccount.blob.core.windows.net/container/path/to/dummy.txt",
398+
)
399+
400+
# Use an async function to return our dummy Document Intelligence client.
401+
async def dummy_get_doc_intelligence_client():
402+
return DummyDocIntelligenceClient()
403+
404+
monkeypatch.setattr(
405+
la, "get_document_intelligence_client", dummy_get_doc_intelligence_client
406+
)
407+
408+
await la.analyse_document(str(tmp_file))
409+
410+
assert la.result is not None
411+
assert la.operation_id == "dummy_op"
412+
# Check that the dummy result contains the expected content.
413+
assert la.result.content == "HelloWorld"
414+
415+
416+
def test_create_page_wise_content():
417+
# Test create_page_wise_content using a dummy result with one page.
418+
la = LayoutAnalysis(record_id=100, source="dummy")
419+
420+
# Create a dummy result with content "HelloWorld"
421+
# and a page with a span from index 0 with length 5.
422+
class DummyResultContent:
423+
pass
424+
425+
dummy_result = DummyResultContent()
426+
dummy_result.content = "HelloWorld"
427+
dummy_result.pages = [DummyPage(0, 5, 1)]
428+
la.result = dummy_result
429+
430+
layouts = la.create_page_wise_content()
431+
assert isinstance(layouts, list)
432+
assert len(layouts) == 1
433+
layout = layouts[0]
434+
# The page content should be the substring "Hello"
435+
assert layout.content == "Hello"
436+
assert layout.page_number == 1
437+
assert layout.page_offsets == 0

image_processing/tests/image_processing/test_mark_up_cleaner.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,61 @@ async def test_clean(cleaner, sample_text, figures):
127127
assert "chunk_figures" in result["data"]
128128
assert len(result["data"]["chunk_figures"]) == 1
129129
assert result["data"]["chunk_figures"][0]["FigureId"] == "12345"
130+
131+
132+
def test_get_sections_empty_text(cleaner):
133+
# When no text is provided, no sections should be found.
134+
sections = cleaner.get_sections("")
135+
assert sections == []
136+
137+
138+
def test_get_figure_ids_no_figures(cleaner):
139+
# When the text contains no figure tags, an empty list should be returned.
140+
text = "This text does not include any figures."
141+
assert cleaner.get_figure_ids(text) == []
142+
143+
144+
def test_remove_markdown_tags_unknown_tag(cleaner):
145+
# When a tag in tag_patterns does not match anything, text remains unchanged.
146+
text = "This is a basic text without markdown."
147+
tag_patterns = {"nonexistent": r"(pattern)"}
148+
result = cleaner.remove_markdown_tags(text, tag_patterns)
149+
assert result == text
150+
151+
152+
def test_clean_text_and_extract_metadata_empty_text(cleaner, figures):
153+
# Passing an empty text should result in error handling and an empty string being returned.
154+
result = cleaner.clean_text_and_extract_metadata("", figures)
155+
assert result == ""
156+
157+
158+
@pytest.mark.asyncio
159+
async def test_clean_missing_chunk(cleaner):
160+
# When record['data'] is missing the "chunk" key, an exception is raised and the error branch returns a proper error dict.
161+
record = {
162+
"recordId": "3",
163+
"data": {"figures": []},
164+
}
165+
result = await cleaner.clean(record)
166+
assert result["recordId"] == "3"
167+
assert result["data"] is None
168+
assert result["errors"] is not None
169+
assert "Failed to cleanup data" in result["errors"][0]["message"]
170+
171+
172+
@pytest.mark.asyncio
173+
async def test_clean_with_invalid_figures_structure(cleaner):
174+
# When figure dicts don't have the expected structure for FigureHolder,
175+
# the construction in clean() will raise an exception and trigger error branch.
176+
record = {
177+
"recordId": "4",
178+
"data": {
179+
"chunk": "Some text with # Header",
180+
# Figures are missing required keys.
181+
"figures": [{"invalid_key": "no_fig_id"}],
182+
},
183+
}
184+
result = await cleaner.clean(record)
185+
assert result["recordId"] == "4"
186+
assert result["data"] is None
187+
assert result["errors"] is not None

0 commit comments

Comments
 (0)