|
11 | 11 |
|
12 | 12 | logger = logging.getLogger(__name__) |
13 | 13 |
|
| 14 | +# Test tolerance constants for better maintainability |
| 15 | +COORDINATE_TOLERANCE = 2 |
| 16 | +PERCENTAGE_TOLERANCE = 0.05 |
| 17 | +PAGE_HEIGHT_TOLERANCE = 5 |
| 18 | +OCR_SIMILARITY_THRESHOLD = 0.90 |
| 19 | + |
14 | 20 |
|
15 | 21 | def test_get_usage_info(client_v2: LLMWhispererClientV2) -> None: |
16 | 22 | usage_info = client_v2.get_usage_info() |
@@ -103,12 +109,12 @@ def test_highlight(client_v2: LLMWhispererClientV2, data_dir: str, input_file: s |
103 | 109 |
|
104 | 110 | # Assert line 2 data |
105 | 111 | line2 = highlight_data["2"] |
106 | | - assert line2["base_y"] == pytest.approx(155, abs=2) |
107 | | - assert line2["base_y_percent"] == pytest.approx(4.8927, abs=0.05) # Using approx for float comparison |
108 | | - assert line2["height"] == pytest.approx(51, abs=2) |
109 | | - assert line2["height_percent"] == pytest.approx(1.6098, abs=0.05) # Using approx for float comparison |
| 112 | + assert line2["base_y"] == pytest.approx(155, abs=COORDINATE_TOLERANCE) |
| 113 | + assert line2["base_y_percent"] == pytest.approx(4.8927, abs=PERCENTAGE_TOLERANCE) |
| 114 | + assert line2["height"] == pytest.approx(51, abs=COORDINATE_TOLERANCE) |
| 115 | + assert line2["height_percent"] == pytest.approx(1.6098, abs=PERCENTAGE_TOLERANCE) |
110 | 116 | assert line2["page"] == 0 |
111 | | - assert line2["page_height"] == pytest.approx(3168, abs=5) |
| 117 | + assert line2["page_height"] == pytest.approx(3168, abs=PAGE_HEIGHT_TOLERANCE) |
112 | 118 |
|
113 | 119 |
|
114 | 120 | @pytest.mark.parametrize( |
@@ -237,13 +243,13 @@ def assert_extracted_text(file_path: str, whisper_result: dict, mode: str, outpu |
237 | 243 | assert whisper_result["status_code"] == 200 |
238 | 244 |
|
239 | 245 | # For OCR based processing |
240 | | - threshold = 0.90 |
| 246 | + threshold = OCR_SIMILARITY_THRESHOLD |
241 | 247 |
|
242 | 248 | # For text based processing |
243 | 249 | if mode == "native_text" and output_mode == "text": |
244 | 250 | threshold = 0.99 |
245 | 251 | elif mode == "low_cost": |
246 | | - threshold = 0.90 |
| 252 | + threshold = OCR_SIMILARITY_THRESHOLD |
247 | 253 | extracted_text = whisper_result["extraction"]["result_text"] |
248 | 254 | similarity = SequenceMatcher(None, extracted_text, exp).ratio() |
249 | 255 |
|
|
0 commit comments