Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions tests/client_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
import unittest
from difflib import SequenceMatcher, unified_diff
from pathlib import Path

import pytest
Expand All @@ -23,9 +24,7 @@ def test_get_usage_info(client):
"subscription_plan",
"today_page_count",
]
assert set(usage_info.keys()) == set(
expected_keys
), f"usage_info {usage_info} does not contain the expected keys"
assert set(usage_info.keys()) == set(expected_keys), f"usage_info {usage_info} does not contain the expected keys"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -56,7 +55,21 @@ def test_whisper(client, data_dir, processing_mode, output_mode, input_file):

assert isinstance(response, dict)
assert response["status_code"] == 200
assert response["extracted_text"] == exp

# For text based processing, perform a strict match
if processing_mode == "text" and output_mode == "text":
assert response["extracted_text"] == exp
# For OCR based processing, perform a fuzzy match
else:
extracted_text = response["extracted_text"]
similarity = SequenceMatcher(None, extracted_text, exp).ratio()
threshold = 0.97

if similarity < threshold:
diff = "\n".join(
unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted")
)
pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}")


# TODO: Review and port to pytest based tests
Expand All @@ -78,9 +91,7 @@ def test_whisper(self):
# @unittest.skip("Skipping test_whisper")
def test_whisper_stream(self):
client = LLMWhispererClient()
download_url = (
"https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf"
)
download_url = "https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf"
# Create a stream of download_url and pass it to whisper
response_download = requests.get(download_url, stream=True)
response_download.raise_for_status()
Expand All @@ -95,18 +106,14 @@ def test_whisper_stream(self):
@unittest.skip("Skipping test_whisper_status")
def test_whisper_status(self):
client = LLMWhispererClient()
response = client.whisper_status(
whisper_hash="7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a"
)
response = client.whisper_status(whisper_hash="7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a")
logger.info(response)
self.assertIsInstance(response, dict)

@unittest.skip("Skipping test_whisper_retrieve")
def test_whisper_retrieve(self):
client = LLMWhispererClient()
response = client.whisper_retrieve(
whisper_hash="7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a"
)
response = client.whisper_retrieve(whisper_hash="7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a")
logger.info(response)
self.assertIsInstance(response, dict)

Expand Down
223 changes: 111 additions & 112 deletions tests/test_data/expected/credit_card.ocr.line-printer.txt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ How did you hear about us?
[ ] This mailer
[X] Other (Explain) SAW THE SIGN WHEN THE LOCATION

WAS BEING BUILT
WAS BEING BUILT

By signing, I agree to receive all communications from acme, inc.

Expand Down
12 changes: 10 additions & 2 deletions tests/test_data/expected/handwritten-form.text.text.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ Name
STEPHEN YOUNG
Address
123 MAIN ST.
Are you: :unselected: Married :selected: Single
How did you hear about us? :unselected: Search Ad :unselected: Facebook :unselected: :selected: X (formerly Twitter) :unselected: This mailer :selected: Other (Explain) ; SAW THE SIGN WHEN THE LOCATION WAS BEING BUILT
Are you:
[ ] Married
[X] Single
How did you hear about us?
[ ] Search Ad
[ ] Facebook
[ ] [X] X (formerly Twitter)
[ ] This mailer
[X] Other (Explain) i SAW THE SIGN WHEN THE LOCATION WAS BEING BUILT
By signing, I agree to receive all communications from acme, inc.
Signature
Date
10/15/23



<<<
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,27 @@

Type : Table

Table Number: 2
Table Number : 2

Bill No .: T2 -- 126653

Date:2023-05-31 23:16:50
Kots:63

Item Qty Amt

Jack The

Ripper 1 400.00
Plain Fries +
Coke 300 ML 1 130.00
Plain Fries +
Coke 300 ML 1 130.00

Total Qty: 2
SubTotal : 530.00
SubTotal: 530.00

GST@5% 26.50

CGST @2.5% 13.25
CGST @2.5%. 13.25
SGST @2.5% 13.25

Round Off: 0.50
Round Off : 0.50

Total Invoice Value: 557

Expand Down
15 changes: 9 additions & 6 deletions tests/test_data/expected/restaurant_invoice_photo.ocr.text.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BURGER SEIGNEUR No. 35, 80 feet road, HAL 3rd Stage, Indiranagar, Bangalore GST: 29AAHFL9534H1ZV
Order Number : T2- 57 Type : Table Table Number: 2
Order Number : T2- 57 Type : Table Table Number : 2
Bill No .: T2 -- 126653 Date:2023-05-31 23:16:50 Kots:63
Item
Qty
Expand All @@ -14,20 +14,23 @@ Coke 300 ML
130.00
Total Qty:
2
SubTotal :
SubTotal:
530.00
GST@5%
26.50
CGST @2.5%
CGST @2.5%.
13.25
SGST @2.5%
13.25
Round Off:
Round Off :
0.50
Total Invoice Value:
Total Invoice
Value:
557
PAY: 557 Thank you, visit again! :selected:
PAY: 557
Thank you, visit again!
Powered by - POSIST



<<<
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,27 @@

Type : Table

Table Number: 2
Table Number : 2

Bill No .: T2 -- 126653

Date:2023-05-31 23:16:50
Kots:63

Item Qty Amt

Jack The

Ripper 1 400.00
Plain Fries +
Coke 300 ML 1 130.00
Plain Fries +
Coke 300 ML 1 130.00

Total Qty: 2
SubTotal : 530.00
SubTotal: 530.00

GST@5% 26.50

CGST @2.5% 13.25
CGST @2.5%. 13.25
SGST @2.5% 13.25

Round Off: 0.50
Round Off : 0.50

Total Invoice Value: 557

Expand Down