Skip to content

Commit 60333f9

Browse files
author
Jason Scheirer
authored
fix: Change strategy back to auto from fast (#152)
* Change strategy back to auto
1 parent 58284e7 commit 60333f9

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

pipeline-notebooks/pipeline-general.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@
772772
" # since fast api might sent the wrong one.\n",
773773
" file_content_type = \"application/x-ole-storage\"\n",
774774
" \n",
775-
" strategy = (m_strategy[0] if len(m_strategy) else 'fast').lower()\n",
775+
" strategy = (m_strategy[0] if len(m_strategy) else 'auto').lower()\n",
776776
" strategies = ['fast', 'hi_res', 'auto', 'ocr_only']\n",
777777
" if strategy not in strategies:\n",
778778
" raise HTTPException(\n",

prepline_general/api/general.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def pipeline_api(
236236
# since fast api might sent the wrong one.
237237
file_content_type = "application/x-ole-storage"
238238

239-
strategy = (m_strategy[0] if len(m_strategy) else "fast").lower()
239+
strategy = (m_strategy[0] if len(m_strategy) else "auto").lower()
240240
strategies = ["fast", "hi_res", "auto", "ocr_only"]
241241
if strategy not in strategies:
242242
raise HTTPException(

scripts/smoketest.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
def send_document(
1616
filename,
1717
content_type,
18-
strategy="fast",
18+
strategy="auto",
1919
output_format="application/json",
2020
pdf_infer_table_structure="false",
2121
):
@@ -110,14 +110,21 @@ def test_strategy_performance():
110110
performance_ratio = 4
111111
test_file = Path("sample-docs") / "layout-parser-paper.pdf"
112112

113-
start_time = time.time()
113+
start_time = time.monotonic()
114114
response = send_document(test_file, content_type="application/pdf", strategy="hi_res")
115-
hi_res_time = time.time() - start_time
115+
hi_res_time = time.monotonic() - start_time
116116
assert response.status_code == 200
117117

118-
start_time = time.time()
118+
start_time = time.monotonic()
119+
response = send_document(test_file, content_type="application/pdf", strategy="auto")
120+
auto_time = time.monotonic() - start_time
121+
assert response.status_code == 200
122+
123+
assert hi_res_time > performance_ratio * auto_time
124+
125+
start_time = time.monotonic()
119126
response = send_document(test_file, content_type="application/pdf", strategy="fast")
120-
fast_time = time.time() - start_time
127+
fast_time = time.monotonic() - start_time
121128
assert response.status_code == 200
122129

123130
assert hi_res_time > performance_ratio * fast_time

0 commit comments

Comments
 (0)