Skip to content

Commit a1399a6

Browse files
fix: Added retries for whisper-status API in LLMW v2 (#126)
* Added retries for whisper-status API in LLMW v2 * Corrected mentions of LLMWhisperer * Corrected mentions of LLMWhisperer - 2 * Reverted LLMW error message key back to * Fixed imports, used MimeType constant and minor error handling changes * Bumped version to 0.54.0rc4 --------- Signed-off-by: Chandrasekharan M <[email protected]>
1 parent 265d5b9 commit a1399a6

File tree

19 files changed

+128
-98
lines changed

19 files changed

+128
-98
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.54.0rc3"
1+
__version__ = "0.54.0rc4"
22

33

44
def get_sdk_version():

src/unstract/sdk/adapters/utils.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
from pathlib import Path
23

34
import filetype
@@ -6,8 +7,11 @@
67
from requests.exceptions import RequestException
78

89
from unstract.sdk.adapters.constants import Common
10+
from unstract.sdk.constants import MimeType
911
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
1012

13+
logger = logging.getLogger(__name__)
14+
1115

1216
class AdapterUtils:
1317
@staticmethod
@@ -25,17 +29,38 @@ def get_msg_from_request_exc(
2529
Returns:
2630
str: Error message returned by the server
2731
"""
28-
if hasattr(err, "response"):
29-
err_response: Response = err.response # type: ignore
30-
if err_response.headers["Content-Type"] == "application/json":
31-
err_json = err_response.json()
32-
if message_key in err_json:
33-
return str(err_json[message_key])
34-
elif err_response.headers["Content-Type"] == "text/plain":
35-
return err_response.text # type: ignore
32+
if not hasattr(err, "response"):
33+
return default_err
34+
35+
err_response: Response = err.response # type: ignore
36+
err_content_type = err_response.headers.get("Content-Type")
37+
38+
if not err_content_type:
39+
logger.warning(
40+
f"Content-Type header not found in {err_response}, "
41+
f"returning {default_err}"
42+
)
43+
return default_err
44+
45+
if err_content_type == MimeType.JSON:
46+
err_json = err_response.json()
47+
if message_key in err_json:
48+
return str(err_json[message_key])
49+
else:
50+
logger.warning(
51+
f"Unable to parse error with key '{message_key}' for "
52+
f"'{err_json}', returning '{default_err}' instead."
53+
)
54+
elif err_content_type == MimeType.TEXT:
55+
return err_response.text # type: ignore
56+
else:
57+
logger.warning(
58+
f"Unhandled err_response type '{err_content_type}' "
59+
f"for {err_response}, returning {default_err}"
60+
)
3661
return default_err
3762

38-
# ToDo: get_file_mime_type() to be removed once migrated to FileStorage
63+
# TODO: get_file_mime_type() to be removed once migrated to FileStorage
3964
# FileStorage has mime_type() which could be used instead.
4065
@staticmethod
4166
def get_file_mime_type(

src/unstract/sdk/adapters/x2text/helper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from unstract.sdk.adapters.exceptions import AdapterError
99
from unstract.sdk.adapters.utils import AdapterUtils
1010
from unstract.sdk.adapters.x2text.constants import X2TextConstants
11+
from unstract.sdk.constants import MimeType
1112
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
1213

1314
logger = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ def make_request(
111112
X2TextConstants.PLATFORM_SERVICE_API_KEY
112113
)
113114
headers = {
114-
"accept": "application/json",
115+
"accept": MimeType.JSON,
115116
"Authorization": f"Bearer {platform_service_api_key}",
116117
}
117118
body = {

src/unstract/sdk/adapters/x2text/llm_whisperer/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Unstract LLM Whisperer X2Text Adapter
1+
# Unstract LLMWhisperer X2Text Adapter
22

33
## Env variables
44

5-
The below env variables are resolved by LLM Whisperer adapter
5+
The below env variables are resolved by LLMWhisperer adapter
66

77
| Variable | Description |
88
| ---------------------------- | -------------------------------------------------------------------------------------------- |

src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class WhispererEndpoint:
3939

4040

4141
class WhispererEnv:
42-
"""Env variables for LLM whisperer.
42+
"""Env variables for LLMWhisperer.
4343
4444
Can be used to alter behaviour at runtime.
4545
@@ -89,7 +89,7 @@ class WhisperStatus:
8989

9090

9191
class WhispererDefaults:
92-
"""Defaults meant for LLM whisperer."""
92+
"""Defaults meant for LLMWhisperer."""
9393

9494
MEDIAN_FILTER_SIZE = 0
9595
GAUSSIAN_BLUR_RADIUS = 0.0
@@ -104,4 +104,3 @@ class WhispererDefaults:
104104
PAGE_SEPARATOR = "<<< >>>"
105105
MARK_VERTICAL_LINES = False
106106
MARK_HORIZONTAL_LINES = False
107-

src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
WhisperStatus,
2828
)
2929
from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter
30+
from unstract.sdk.constants import MimeType
3031
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
3132

3233
logger = logging.getLogger(__name__)
@@ -61,13 +62,13 @@ def get_json_schema() -> str:
6162
return schema
6263

6364
def _get_request_headers(self) -> dict[str, Any]:
64-
"""Obtains the request headers to authenticate with LLM Whisperer.
65+
"""Obtains the request headers to authenticate with LLMWhisperer.
6566
6667
Returns:
6768
str: Request headers
6869
"""
6970
return {
70-
"accept": "application/json",
71+
"accept": MimeType.JSON,
7172
WhispererHeader.UNSTRACT_KEY: self.config.get(WhispererConfig.UNSTRACT_KEY),
7273
}
7374

@@ -79,11 +80,11 @@ def _make_request(
7980
params: Optional[dict[str, Any]] = None,
8081
data: Optional[Any] = None,
8182
) -> Response:
82-
"""Makes a request to LLM whisperer service.
83+
"""Makes a request to LLMWhisperer service.
8384
8485
Args:
8586
request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST
86-
request_endpoint (str): LLM whisperer endpoint to hit
87+
request_endpoint (str): LLMWhisperer endpoint to hit
8788
headers (Optional[dict[str, Any]], optional): Headers to pass.
8889
Defaults to None.
8990
params (Optional[dict[str, Any]], optional): Query params to pass.
@@ -119,15 +120,15 @@ def _make_request(
119120
except ConnectionError as e:
120121
logger.error(f"Adapter error: {e}")
121122
raise ExtractorError(
122-
"Unable to connect to LLM Whisperer service, please check the URL"
123+
"Unable to connect to LLMWhisperer service, please check the URL"
123124
)
124125
except Timeout as e:
125-
msg = "Request to LLM whisperer has timed out"
126+
msg = "Request to LLMWhisperer has timed out"
126127
logger.error(f"{msg}: {e}")
127128
raise ExtractorError(msg)
128129
except HTTPError as e:
129130
logger.error(f"Adapter error: {e}")
130-
default_err = "Error while calling the LLM Whisperer service"
131+
default_err = "Error while calling the LLMWhisperer service"
131132
msg = AdapterUtils.get_msg_from_request_exc(
132133
err=e, message_key="message", default_err=default_err
133134
)

src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"title": "LLM Whisperer X2Text",
2+
"title": "LLMWhisperer X2Text",
33
"type": "object",
44
"required": [
55
"adapter_name",
@@ -11,14 +11,14 @@
1111
"type": "string",
1212
"title": "Name",
1313
"default": "",
14-
"description": "Provide a unique name for this adapter instance. Example: LLM Whisperer 1"
14+
"description": "Provide a unique name for this adapter instance. Example: LLMWhisperer 1"
1515
},
1616
"url": {
1717
"type": "string",
1818
"title": "URL",
1919
"format": "uri",
2020
"default": "https://llmwhisperer-api.unstract.com",
21-
"description": "Provide the URL of the LLM Whisperer service. Please note that this version of LLM Whisperer is deprecated."
21+
"description": "Provide the URL of the LLMWhisperer service. Please note that this version of LLMWhisperer is deprecated."
2222
},
2323
"unstract_key": {
2424
"type": "string",

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Unstract LLM Whisperer v2 X2Text Adapter
1+
# Unstract LLMWWhisperer v2 X2Text Adapter
22

33
## Env variables
44

5-
The below env variables are resolved by LLM Whisperer adapter
5+
The below env variables are resolved by LLMWhisperer adapter
66

77
| Variable | Description |
88
| ---------------------------- | -------------------------------------------------------------------------------------------- |

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class WhispererEndpoint:
3333

3434

3535
class WhispererEnv:
36-
"""Env variables for LLM whisperer.
36+
"""Env variables for LLMWhisperer.
3737
3838
Can be used to alter behaviour at runtime.
3939
@@ -42,10 +42,13 @@ class WhispererEnv:
4242
LLMWhisperer's status API. Defaults to 30s
4343
MAX_POLLS: Total number of times to poll the status API.
4444
Set to -1 to poll indefinitely. Defaults to -1
45+
STATUS_RETRIES: Number of times to retry calling LLLMWhisperer's status API
46+
on failure during polling. Defaults to 5.
4547
"""
4648

4749
POLL_INTERVAL = "ADAPTER_LLMW_POLL_INTERVAL"
4850
MAX_POLLS = "ADAPTER_LLMW_MAX_POLLS"
51+
STATUS_RETRIES = "ADAPTER_LLMW_STATUS_RETRIES"
4952

5053

5154
class WhispererConfig:
@@ -84,7 +87,7 @@ class WhisperStatus:
8487

8588

8689
class WhispererDefaults:
87-
"""Defaults meant for LLM whisperer."""
90+
"""Defaults meant for LLMWhisperer."""
8891

8992
MEDIAN_FILTER_SIZE = 0
9093
GAUSSIAN_BLUR_RADIUS = 0.0
@@ -94,6 +97,7 @@ class WhispererDefaults:
9497
HORIZONTAL_STRETCH_FACTOR = 1.0
9598
POLL_INTERVAL = int(os.getenv(WhispererEnv.POLL_INTERVAL, 30))
9699
MAX_POLLS = int(os.getenv(WhispererEnv.MAX_POLLS, 30))
100+
STATUS_RETRIES = int(os.getenv(WhispererEnv.STATUS_RETRIES, 5))
97101
PAGES_TO_EXTRACT = ""
98102
PAGE_SEPARATOR = "<<<"
99103
MARK_VERTICAL_LINES = False

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,22 @@
2020
WhispererHeader,
2121
WhisperStatus,
2222
)
23-
from unstract.sdk.file_storage.fs_impl import FileStorage
24-
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
23+
from unstract.sdk.constants import MimeType
24+
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
2525

2626
logger = logging.getLogger(__name__)
2727

2828

2929
class LLMWhispererHelper:
30-
3130
@staticmethod
3231
def get_request_headers(config: dict[str, Any]) -> dict[str, Any]:
33-
"""Obtains the request headers to authenticate with LLM Whisperer.
32+
"""Obtains the request headers to authenticate with LLMWhisperer.
3433
3534
Returns:
3635
str: Request headers
3736
"""
3837
return {
39-
"accept": "application/json",
38+
"accept": MimeType.JSON,
4039
WhispererHeader.UNSTRACT_KEY: config.get(WhispererConfig.UNSTRACT_KEY),
4140
}
4241

@@ -49,11 +48,11 @@ def make_request(
4948
params: Optional[dict[str, Any]] = None,
5049
data: Optional[Any] = None,
5150
) -> Response:
52-
"""Makes a request to LLM whisperer service.
51+
"""Makes a request to LLMWhisperer service.
5352
5453
Args:
5554
request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST
56-
request_endpoint (str): LLM whisperer endpoint to hit
55+
request_endpoint (str): LLMWhisperer endpoint to hit
5756
headers (Optional[dict[str, Any]], optional): Headers to pass.
5857
Defaults to None.
5958
params (Optional[dict[str, Any]], optional): Query params to pass.
@@ -89,15 +88,15 @@ def make_request(
8988
except ConnectionError as e:
9089
logger.error(f"Adapter error: {e}")
9190
raise ExtractorError(
92-
"Unable to connect to LLM Whisperer service, please check the URL"
91+
"Unable to connect to LLMWhisperer service, please check the URL"
9392
)
9493
except Timeout as e:
95-
msg = "Request to LLM whisperer has timed out"
94+
msg = "Request to LLMWhisperer has timed out"
9695
logger.error(f"{msg}: {e}")
9796
raise ExtractorError(msg)
9897
except HTTPError as e:
9998
logger.error(f"Adapter error: {e}")
100-
default_err = "Error while calling the LLM Whisperer service"
99+
default_err = "Error while calling the LLMWhisperer service"
101100
msg = AdapterUtils.get_msg_from_request_exc(
102101
err=e, message_key="message", default_err=default_err
103102
)
@@ -197,14 +196,16 @@ def check_status_until_ready(
197196
"""
198197
POLL_INTERVAL = WhispererDefaults.POLL_INTERVAL
199198
MAX_POLLS = WhispererDefaults.MAX_POLLS
199+
STATUS_RETRY_THRESHOLD = WhispererDefaults.STATUS_RETRIES
200+
status_retry_count = 0
200201
request_count = 0
201202

202203
# Check status in fixed intervals upto max poll count.
203204
while True:
204205
request_count += 1
205206
logger.info(
206-
f"Checking status with interval: {POLL_INTERVAL}s"
207-
f", request count: {request_count} [max: {MAX_POLLS}]"
207+
f"Checking status for whisper-hash '{whisper_hash}' with interval: "
208+
f"{POLL_INTERVAL}s, request count: {request_count} [max: {MAX_POLLS}]"
208209
)
209210
status_response = LLMWhispererHelper.make_request(
210211
config=config,
@@ -216,19 +217,28 @@ def check_status_until_ready(
216217
if status_response.status_code == 200:
217218
status_data = status_response.json()
218219
status = status_data.get(WhisperStatus.STATUS, WhisperStatus.UNKNOWN)
219-
logger.info(f"Whisper status for {whisper_hash}: {status}")
220+
logger.info(f"Whisper status for '{whisper_hash}': {status}")
220221
if status in [WhisperStatus.PROCESSED, WhisperStatus.DELIVERED]:
221222
break
222223
else:
223-
raise ExtractorError(
224-
"Error checking LLMWhisperer status: "
225-
f"{status_response.status_code} - {status_response.text}"
226-
)
224+
if status_retry_count >= STATUS_RETRY_THRESHOLD:
225+
raise ExtractorError(
226+
f"Error checking LLMWhisperer status for whisper-hash "
227+
f"'{whisper_hash}': {status_response.text}"
228+
)
229+
else:
230+
status_retry_count += 1
231+
logger.warning(
232+
f"Whisper status for '{whisper_hash}' failed "
233+
f"{status_retry_count} time(s), retrying... "
234+
f"[threshold: {STATUS_RETRY_THRESHOLD}]: {status_response.text}"
235+
)
227236

228237
# Exit with error if max poll count is reached
229238
if request_count >= MAX_POLLS:
230239
raise ExtractorError(
231-
"Unable to extract text after attempting" f" {request_count} times"
240+
f"Unable to extract text for whisper-hash '{whisper_hash}' "
241+
f"after attempting {request_count} times"
232242
)
233243
time.sleep(POLL_INTERVAL)
234244

0 commit comments

Comments
 (0)