2222import logging
2323import os
2424import time
25- from typing import IO
25+ from collections .abc import Generator
26+ from typing import IO , Any
2627
2728import requests
2829
@@ -41,13 +42,25 @@ class LLMWhispererClientException(Exception):
4142 status_code (int, optional): HTTP status code returned by the LLMWhisperer API. Defaults to None.
4243 """
4344
44- def __init__ (self , value ):
45+ def __init__ (self , value : str , status_code : int | None = None ) -> None :
46+ """Initialize the LLMWhispererClientException.
47+
48+ Args:
49+ value: The error message or value.
50+ status_code: The HTTP status code returned by the LLMWhisperer API.
51+ """
4552 self .value = value
53+ self .status_code = status_code
4654
47- def __str__ (self ):
55+ def __str__ (self ) -> str :
56+ """Return string representation of the exception.
57+
58+ Returns:
59+ String representation of the error value.
60+ """
4861 return repr (self .value )
4962
50- def error_message (self ):
63+ def error_message (self ) -> str :
5164 return self .value
5265
5366
@@ -67,17 +80,17 @@ class LLMWhispererClientV2:
6780 log_stream_handler .setFormatter (formatter )
6881 logger .addHandler (log_stream_handler )
6982
70- api_key = ""
71- base_url = ""
72- api_timeout = 120
83+ api_key : str
84+ base_url : str
85+ api_timeout : int
7386
7487 def __init__ (
7588 self ,
7689 base_url : str = "" ,
7790 api_key : str = "" ,
7891 logging_level : str = "" ,
7992 custom_headers : dict [str , str ] | None = None ,
80- ):
93+ ) -> None :
8194 """Initializes the LLMWhispererClient with the given parameters.
8295
8396 Args:
@@ -136,15 +149,15 @@ def __init__(
136149 # "Start-Date": "2024-07-09",
137150 # }
138151
139- def get_usage_info (self ) -> dict :
152+ def get_usage_info (self ) -> Any :
140153 """Retrieves the usage information of the LLMWhisperer API.
141154
142155 This method sends a GET request to the '/get-usage-info' endpoint of the LLMWhisperer API.
143156 The response is a JSON object containing the usage information.
144157 Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_usage_api
145158
146159 Returns:
147- dict : A dictionary containing the usage information.
160+ Dict[Any, Any] : A dictionary containing the usage information.
148161
149162 Raises:
150163 LLMWhispererClientException: If the API request fails, it raises an exception with
@@ -163,7 +176,7 @@ def get_usage_info(self) -> dict:
163176 raise LLMWhispererClientException (err )
164177 return json .loads (response .text )
165178
166- def get_highlight_data (self , whisper_hash : str , lines : str , extract_all_lines : bool = False ) -> dict :
179+ def get_highlight_data (self , whisper_hash : str , lines : str , extract_all_lines : bool = False ) -> Any :
167180 """Retrieves the highlight information of the LLMWhisperer API.
168181
169182 This method sends a GET request to the '/highlights' endpoint of the LLMWhisperer API.
@@ -178,7 +191,7 @@ def get_highlight_data(self, whisper_hash: str, lines: str, extract_all_lines: b
178191 till the last line meta data.
179192
180193 Returns:
181- dict : A dictionary containing the highlight information.
194+ Dict[Any, Any] : A dictionary containing the highlight information.
182195
183196 Raises:
184197 LLMWhispererClientException: If the API request fails, it raises an exception with
@@ -205,7 +218,7 @@ def get_highlight_data(self, whisper_hash: str, lines: str, extract_all_lines: b
205218 def whisper (
206219 self ,
207220 file_path : str = "" ,
208- stream : IO [bytes ] = None ,
221+ stream : IO [bytes ] | None = None ,
209222 url : str = "" ,
210223 mode : str = "form" ,
211224 output_mode : str = "layout_preserving" ,
@@ -219,17 +232,17 @@ def whisper(
219232 mark_horizontal_lines : bool = False ,
220233 line_spitter_strategy : str = "left-priority" ,
221234 add_line_nos : bool = False ,
222- lang = "eng" ,
223- tag = "default" ,
224- filename = "" ,
225- webhook_metadata = "" ,
226- use_webhook = "" ,
227- wait_for_completion = False ,
228- wait_timeout = 180 ,
235+ lang : str = "eng" ,
236+ tag : str = "default" ,
237+ filename : str = "" ,
238+ webhook_metadata : str = "" ,
239+ use_webhook : str = "" ,
240+ wait_for_completion : bool = False ,
241+ wait_timeout : int = 180 ,
229242 encoding : str = "utf-8" ,
230- ) -> dict :
243+ ) -> Any :
231244 """Sends a request to the LLMWhisperer API to process a document.
232- Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_text_extraction_api
245+ Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_text_extraction_api.
233246
234247 Args:
235248 file_path (str, optional): The path to the file to be processed. Defaults to "".
@@ -264,7 +277,7 @@ def whisper(
264277 encoding (str): The character encoding to use for processing the text. Defaults to "utf-8".
265278
266279 Returns:
267- dict : The response from the API as a dictionary.
280+ Dict[Any, Any] : The response from the API as a dictionary.
268281
269282 Raises:
270283 LLMWhispererClientException: If the API request fails, it raises an exception with
@@ -296,28 +309,22 @@ def whisper(
296309 self .logger .debug ("params: %s" , params )
297310
298311 if use_webhook != "" and wait_for_completion :
299- raise LLMWhispererClientException (
300- {
301- "status_code" : - 1 ,
302- "message" : "Cannot wait for completion when using webhook" ,
303- }
304- )
312+ raise LLMWhispererClientException ("Cannot wait for completion when using webhook" , 1 )
305313
306314 if url == "" and file_path == "" and stream is None :
307315 raise LLMWhispererClientException (
308- {
309- "status_code" : - 1 ,
310- "message" : "Either url, stream or file_path must be provided" ,
311- }
316+ "Either url, stream or file_path must be provided" ,
317+ 1 ,
312318 )
313319
314320 should_stream = False
315321 if url == "" :
316322 if stream is not None :
317323 should_stream = True
318324
319- def generate ():
320- yield from stream
325+ def generate () -> Generator [bytes , None , None ]:
326+ if stream is not None : # Add explicit type check
327+ yield from stream
321328
322329 req = requests .Request (
323330 "POST" ,
@@ -410,7 +417,7 @@ def generate():
410417 message ["status_code" ] = response .status_code
411418 return message
412419
413- def whisper_status (self , whisper_hash : str ) -> dict :
420+ def whisper_status (self , whisper_hash : str ) -> Any :
414421 """Retrieves the status of the whisper operation from the LLMWhisperer
415422 API.
416423
@@ -446,7 +453,7 @@ def whisper_status(self, whisper_hash: str) -> dict:
446453 message ["status_code" ] = response .status_code
447454 return message
448455
449- def whisper_retrieve (self , whisper_hash : str , encoding : str = "utf-8" ) -> dict :
456+ def whisper_retrieve (self , whisper_hash : str , encoding : str = "utf-8" ) -> Any :
450457 """Retrieves the result of the whisper operation from the LLMWhisperer
451458 API.
452459
@@ -485,7 +492,7 @@ def whisper_retrieve(self, whisper_hash: str, encoding: str = "utf-8") -> dict:
485492 "extraction" : json .loads (response .text ),
486493 }
487494
488- def register_webhook (self , url : str , auth_token : str , webhook_name : str ) -> dict :
495+ def register_webhook (self , url : str , auth_token : str , webhook_name : str ) -> Any :
489496 """Registers a webhook with the LLMWhisperer API.
490497
491498 This method sends a POST request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API.
@@ -499,7 +506,7 @@ def register_webhook(self, url: str, auth_token: str, webhook_name: str) -> dict
499506 webhook_name (str): The name of the webhook.
500507
501508 Returns:
502- dict : A dictionary containing the status code and the response from the API.
509+ Any : A dictionary containing the status code and the response from the API.
503510
504511 Raises:
505512 LLMWhispererClientException: If the API request fails, it raises an exception with
@@ -521,7 +528,7 @@ def register_webhook(self, url: str, auth_token: str, webhook_name: str) -> dict
521528 raise LLMWhispererClientException (err )
522529 return json .loads (response .text )
523530
524- def update_webhook_details (self , webhook_name : str , url : str , auth_token : str ) -> dict :
531+ def update_webhook_details (self , webhook_name : str , url : str , auth_token : str ) -> Any :
525532 """Updates the details of a webhook from the LLMWhisperer API.
526533
527534 This method sends a PUT request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API.
@@ -557,7 +564,7 @@ def update_webhook_details(self, webhook_name: str, url: str, auth_token: str) -
557564 raise LLMWhispererClientException (err )
558565 return json .loads (response .text )
559566
560- def get_webhook_details (self , webhook_name : str ) -> dict :
567+ def get_webhook_details (self , webhook_name : str ) -> Any :
561568 """Retrieves the details of a webhook from the LLMWhisperer API.
562569
563570 This method sends a GET request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API.
@@ -587,7 +594,7 @@ def get_webhook_details(self, webhook_name: str) -> dict:
587594 raise LLMWhispererClientException (err )
588595 return json .loads (response .text )
589596
590- def delete_webhook (self , webhook_name : str ) -> dict :
597+ def delete_webhook (self , webhook_name : str ) -> Any :
591598 """Deletes a webhook from the LLMWhisperer API.
592599
593600 This method sends a DELETE request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API.
@@ -624,7 +631,7 @@ def get_highlight_rect(
624631 target_height : int ,
625632 ) -> tuple [int , int , int , int , int ]:
626633 """Given the line metadata and the line number, this function returns
627- the bounding box of the line in the format (page,x1,y1,x2,y2)
634+ the bounding box of the line in the format (page,x1,y1,x2,y2).
628635
629636 Args:
630637 line_metadata (list[int]): The line metadata returned by the LLMWhisperer API.
0 commit comments