Zipstack
diff --git a/‎src/unstract/sdk/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/unstract/sdk/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/unstract/sdk/adapters/utils.py‎
Lines changed: 34 additions & 9 deletions b/‎src/unstract/sdk/adapters/utils.py‎
Lines changed: 34 additions & 9 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/helper.py‎
Lines changed: 2 additions & 1 deletion b/‎src/unstract/sdk/adapters/x2text/helper.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer/README.md‎
Lines changed: 2 additions & 2 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py‎
Lines changed: 2 additions & 3 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py‎
Lines changed: 8 additions & 7 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json‎
Lines changed: 3 additions & 3 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md‎
Lines changed: 2 additions & 2 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py‎
Lines changed: 6 additions & 2 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py‎
Lines changed: 28 additions & 18 deletions b/‎src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py‎
Lines changed: 28 additions & 18 deletions
@@ -1,4 +1,4 @@
-__version__ = "0.54.0rc3"
+__version__ = "0.54.0rc4"
 
 
 def get_sdk_version():
 
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 
 import filetype
@@ -6,8 +7,11 @@
 from requests.exceptions import RequestException
 
 from unstract.sdk.adapters.constants import Common
+from unstract.sdk.constants import MimeType
 from unstract.sdk.file_storage import FileStorage, FileStorageProvider
 
+logger = logging.getLogger(__name__)
+
 
 class AdapterUtils:
     @staticmethod
@@ -25,17 +29,38 @@ def get_msg_from_request_exc(
         Returns:
             str: Error message returned by the server
         """
-        if hasattr(err, "response"):
-            err_response: Response = err.response  # type: ignore
-            if err_response.headers["Content-Type"] == "application/json":
-                err_json = err_response.json()
-                if message_key in err_json:
-                    return str(err_json[message_key])
-            elif err_response.headers["Content-Type"] == "text/plain":
-                return err_response.text  # type: ignore
+        if not hasattr(err, "response"):
+            return default_err
+
+        err_response: Response = err.response  # type: ignore
+        err_content_type = err_response.headers.get("Content-Type")
+
+        if not err_content_type:
+            logger.warning(
+                f"Content-Type header not found in {err_response}, "
+                f"returning {default_err}"
+            )
+            return default_err
+
+        if err_content_type == MimeType.JSON:
+            err_json = err_response.json()
+            if message_key in err_json:
+                return str(err_json[message_key])
+            else:
+                logger.warning(
+                    f"Unable to parse error with key '{message_key}' for "
+                    f"'{err_json}', returning '{default_err}' instead."
+                )
+        elif err_content_type == MimeType.TEXT:
+            return err_response.text  # type: ignore
+        else:
+            logger.warning(
+                f"Unhandled err_response type '{err_content_type}' "
+                f"for {err_response}, returning {default_err}"
+            )
         return default_err
 
-    # ToDo: get_file_mime_type() to be removed once migrated to FileStorage
+    # TODO: get_file_mime_type() to be removed once migrated to FileStorage
     # FileStorage has mime_type() which could be used instead.
     @staticmethod
     def get_file_mime_type(
 
@@ -8,6 +8,7 @@
 from unstract.sdk.adapters.exceptions import AdapterError
 from unstract.sdk.adapters.utils import AdapterUtils
 from unstract.sdk.adapters.x2text.constants import X2TextConstants
+from unstract.sdk.constants import MimeType
 from unstract.sdk.file_storage import FileStorage, FileStorageProvider
 
 logger = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ def make_request(
             X2TextConstants.PLATFORM_SERVICE_API_KEY
         )
         headers = {
-            "accept": "application/json",
+            "accept": MimeType.JSON,
             "Authorization": f"Bearer {platform_service_api_key}",
         }
         body = {
 
@@ -1,8 +1,8 @@
-# Unstract LLM Whisperer X2Text Adapter
+# Unstract LLMWhisperer X2Text Adapter
 
 ## Env variables
 
-The below env variables are resolved by LLM Whisperer adapter
+The below env variables are resolved by LLMWhisperer adapter
 
 | Variable                     | Description                                                                                  |
 | ---------------------------- | -------------------------------------------------------------------------------------------- |
 
@@ -39,7 +39,7 @@ class WhispererEndpoint:
 
 
 class WhispererEnv:
-    """Env variables for LLM whisperer.
+    """Env variables for LLMWhisperer.
 
     Can be used to alter behaviour at runtime.
 
@@ -89,7 +89,7 @@ class WhisperStatus:
 
 
 class WhispererDefaults:
-    """Defaults meant for LLM whisperer."""
+    """Defaults meant for LLMWhisperer."""
 
     MEDIAN_FILTER_SIZE = 0
     GAUSSIAN_BLUR_RADIUS = 0.0
@@ -104,4 +104,3 @@ class WhispererDefaults:
     PAGE_SEPARATOR = "<<< >>>"
     MARK_VERTICAL_LINES = False
     MARK_HORIZONTAL_LINES = False
-
@@ -27,6 +27,7 @@
     WhisperStatus,
 )
 from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter
+from unstract.sdk.constants import MimeType
 from unstract.sdk.file_storage import FileStorage, FileStorageProvider
 
 logger = logging.getLogger(__name__)
@@ -61,13 +62,13 @@ def get_json_schema() -> str:
         return schema
 
     def _get_request_headers(self) -> dict[str, Any]:
-        """Obtains the request headers to authenticate with LLM Whisperer.
+        """Obtains the request headers to authenticate with LLMWhisperer.
 
         Returns:
             str: Request headers
         """
         return {
-            "accept": "application/json",
+            "accept": MimeType.JSON,
             WhispererHeader.UNSTRACT_KEY: self.config.get(WhispererConfig.UNSTRACT_KEY),
         }
 
@@ -79,11 +80,11 @@ def _make_request(
         params: Optional[dict[str, Any]] = None,
         data: Optional[Any] = None,
     ) -> Response:
-        """Makes a request to LLM whisperer service.
+        """Makes a request to LLMWhisperer service.
 
         Args:
             request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST
-            request_endpoint (str): LLM whisperer endpoint to hit
+            request_endpoint (str): LLMWhisperer endpoint to hit
             headers (Optional[dict[str, Any]], optional): Headers to pass.
                 Defaults to None.
             params (Optional[dict[str, Any]], optional): Query params to pass.
@@ -119,15 +120,15 @@ def _make_request(
         except ConnectionError as e:
             logger.error(f"Adapter error: {e}")
             raise ExtractorError(
-                "Unable to connect to LLM Whisperer service, please check the URL"
+                "Unable to connect to LLMWhisperer service, please check the URL"
             )
         except Timeout as e:
-            msg = "Request to LLM whisperer has timed out"
+            msg = "Request to LLMWhisperer has timed out"
             logger.error(f"{msg}: {e}")
             raise ExtractorError(msg)
         except HTTPError as e:
             logger.error(f"Adapter error: {e}")
-            default_err = "Error while calling the LLM Whisperer service"
+            default_err = "Error while calling the LLMWhisperer service"
             msg = AdapterUtils.get_msg_from_request_exc(
                 err=e, message_key="message", default_err=default_err
             )
 
@@ -1,5 +1,5 @@
 {
-  "title": "LLM Whisperer X2Text",
+  "title": "LLMWhisperer X2Text",
   "type": "object",
   "required": [
     "adapter_name",
@@ -11,14 +11,14 @@
       "type": "string",
       "title": "Name",
       "default": "",
-      "description": "Provide a unique name for this adapter instance. Example: LLM Whisperer 1"
+      "description": "Provide a unique name for this adapter instance. Example: LLMWhisperer 1"
     },
     "url": {
       "type": "string",
       "title": "URL",
       "format": "uri",
       "default": "https://llmwhisperer-api.unstract.com",
-      "description": "Provide the URL of the LLM Whisperer service. Please note that this version of LLM Whisperer is deprecated."
+      "description": "Provide the URL of the LLMWhisperer service. Please note that this version of LLMWhisperer is deprecated."
     },
     "unstract_key": {
       "type": "string",
 
@@ -1,8 +1,8 @@
-# Unstract LLM Whisperer v2 X2Text Adapter
+# Unstract LLMWWhisperer v2 X2Text Adapter
 
 ## Env variables
 
-The below env variables are resolved by LLM Whisperer adapter
+The below env variables are resolved by LLMWhisperer adapter
 
 | Variable                     | Description                                                                                  |
 | ---------------------------- | -------------------------------------------------------------------------------------------- |
 
@@ -33,7 +33,7 @@ class WhispererEndpoint:
 
 
 class WhispererEnv:
-    """Env variables for LLM whisperer.
+    """Env variables for LLMWhisperer.
 
     Can be used to alter behaviour at runtime.
 
@@ -42,10 +42,13 @@ class WhispererEnv:
             LLMWhisperer's status API. Defaults to 30s
         MAX_POLLS: Total number of times to poll the status API.
             Set to -1 to poll indefinitely. Defaults to -1
+        STATUS_RETRIES: Number of times to retry calling LLLMWhisperer's status API
+            on failure during polling. Defaults to 5.
     """
 
     POLL_INTERVAL = "ADAPTER_LLMW_POLL_INTERVAL"
     MAX_POLLS = "ADAPTER_LLMW_MAX_POLLS"
+    STATUS_RETRIES = "ADAPTER_LLMW_STATUS_RETRIES"
 
 
 class WhispererConfig:
@@ -84,7 +87,7 @@ class WhisperStatus:
 
 
 class WhispererDefaults:
-    """Defaults meant for LLM whisperer."""
+    """Defaults meant for LLMWhisperer."""
 
     MEDIAN_FILTER_SIZE = 0
     GAUSSIAN_BLUR_RADIUS = 0.0
@@ -94,6 +97,7 @@ class WhispererDefaults:
     HORIZONTAL_STRETCH_FACTOR = 1.0
     POLL_INTERVAL = int(os.getenv(WhispererEnv.POLL_INTERVAL, 30))
     MAX_POLLS = int(os.getenv(WhispererEnv.MAX_POLLS, 30))
+    STATUS_RETRIES = int(os.getenv(WhispererEnv.STATUS_RETRIES, 5))
     PAGES_TO_EXTRACT = ""
     PAGE_SEPARATOR = "<<<"
     MARK_VERTICAL_LINES = False
 
@@ -20,23 +20,22 @@
     WhispererHeader,
     WhisperStatus,
 )
-from unstract.sdk.file_storage.fs_impl import FileStorage
-from unstract.sdk.file_storage.fs_provider import FileStorageProvider
+from unstract.sdk.constants import MimeType
+from unstract.sdk.file_storage import FileStorage, FileStorageProvider
 
 logger = logging.getLogger(__name__)
 
 
 class LLMWhispererHelper:
-
     @staticmethod
     def get_request_headers(config: dict[str, Any]) -> dict[str, Any]:
-        """Obtains the request headers to authenticate with LLM Whisperer.
+        """Obtains the request headers to authenticate with LLMWhisperer.
 
         Returns:
             str: Request headers
         """
         return {
-            "accept": "application/json",
+            "accept": MimeType.JSON,
             WhispererHeader.UNSTRACT_KEY: config.get(WhispererConfig.UNSTRACT_KEY),
         }
 
@@ -49,11 +48,11 @@ def make_request(
         params: Optional[dict[str, Any]] = None,
         data: Optional[Any] = None,
     ) -> Response:
-        """Makes a request to LLM whisperer service.
+        """Makes a request to LLMWhisperer service.
 
         Args:
             request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST
-            request_endpoint (str): LLM whisperer endpoint to hit
+            request_endpoint (str): LLMWhisperer endpoint to hit
             headers (Optional[dict[str, Any]], optional): Headers to pass.
                 Defaults to None.
             params (Optional[dict[str, Any]], optional): Query params to pass.
@@ -89,15 +88,15 @@ def make_request(
         except ConnectionError as e:
             logger.error(f"Adapter error: {e}")
             raise ExtractorError(
-                "Unable to connect to LLM Whisperer service, please check the URL"
+                "Unable to connect to LLMWhisperer service, please check the URL"
             )
         except Timeout as e:
-            msg = "Request to LLM whisperer has timed out"
+            msg = "Request to LLMWhisperer has timed out"
             logger.error(f"{msg}: {e}")
             raise ExtractorError(msg)
         except HTTPError as e:
             logger.error(f"Adapter error: {e}")
-            default_err = "Error while calling the LLM Whisperer service"
+            default_err = "Error while calling the LLMWhisperer service"
             msg = AdapterUtils.get_msg_from_request_exc(
                 err=e, message_key="message", default_err=default_err
             )
@@ -197,14 +196,16 @@ def check_status_until_ready(
         """
         POLL_INTERVAL = WhispererDefaults.POLL_INTERVAL
         MAX_POLLS = WhispererDefaults.MAX_POLLS
+        STATUS_RETRY_THRESHOLD = WhispererDefaults.STATUS_RETRIES
+        status_retry_count = 0
         request_count = 0
 
         # Check status in fixed intervals upto max poll count.
         while True:
             request_count += 1
             logger.info(
-                f"Checking status with interval: {POLL_INTERVAL}s"
-                f", request count: {request_count} [max: {MAX_POLLS}]"
+                f"Checking status for whisper-hash '{whisper_hash}' with interval: "
+                f"{POLL_INTERVAL}s, request count: {request_count} [max: {MAX_POLLS}]"
             )
             status_response = LLMWhispererHelper.make_request(
                 config=config,
@@ -216,19 +217,28 @@ def check_status_until_ready(
             if status_response.status_code == 200:
                 status_data = status_response.json()
                 status = status_data.get(WhisperStatus.STATUS, WhisperStatus.UNKNOWN)
-                logger.info(f"Whisper status for {whisper_hash}: {status}")
+                logger.info(f"Whisper status for '{whisper_hash}': {status}")
                 if status in [WhisperStatus.PROCESSED, WhisperStatus.DELIVERED]:
                     break
             else:
-                raise ExtractorError(
-                    "Error checking LLMWhisperer status: "
-                    f"{status_response.status_code} - {status_response.text}"
-                )
+                if status_retry_count >= STATUS_RETRY_THRESHOLD:
+                    raise ExtractorError(
+                        f"Error checking LLMWhisperer status for whisper-hash "
+                        f"'{whisper_hash}': {status_response.text}"
+                    )
+                else:
+                    status_retry_count += 1
+                    logger.warning(
+                        f"Whisper status for '{whisper_hash}' failed "
+                        f"{status_retry_count} time(s), retrying... "
+                        f"[threshold: {STATUS_RETRY_THRESHOLD}]: {status_response.text}"
+                    )
 
             # Exit with error if max poll count is reached
             if request_count >= MAX_POLLS:
                 raise ExtractorError(
-                    "Unable to extract text after attempting" f" {request_count} times"
+                    f"Unable to extract text for whisper-hash '{whisper_hash}' "
+                    f"after attempting {request_count} times"
                 )
             time.sleep(POLL_INTERVAL)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.54.0rc3"`
	`1`	`+__version__ = "0.54.0rc4"`
`2`	`2`
`3`	`3`
`4`	`4`	`def get_sdk_version():`