@@ -169,6 +169,7 @@ def whisper(
169169 ocr_provider : str = "advanced" ,
170170 line_splitter_tolerance : float = 0.4 ,
171171 horizontal_stretch_factor : float = 1.0 ,
172+ encoding : str = "utf-8"
172173 ) -> dict :
173174 """
174175 Sends a request to the LLMWhisperer API to process a document.
@@ -190,6 +191,7 @@ def whisper(
190191 ocr_provider (str, optional): The OCR provider. Can be "advanced" or "basic". Defaults to "advanced".
191192 line_splitter_tolerance (float, optional): The line splitter tolerance. Defaults to 0.4.
192193 horizontal_stretch_factor (float, optional): The horizontal stretch factor. Defaults to 1.0.
194+ encoding (str): The character encoding to use for processing the text. Defaults to "utf-8".
193195
194196 Returns:
195197 dict: The response from the API as a dictionary.
@@ -268,6 +270,7 @@ def generate():
268270 prepared = req .prepare ()
269271 s = requests .Session ()
270272 response = s .send (prepared , timeout = self .api_timeout , stream = should_stream )
273+ response .encoding = encoding
271274 if response .status_code != 200 and response .status_code != 202 :
272275 message = json .loads (response .text )
273276 message ["status_code" ] = response .status_code
@@ -318,7 +321,7 @@ def whisper_status(self, whisper_hash: str) -> dict:
318321 message ["status_code" ] = response .status_code
319322 return message
320323
321- def whisper_retrieve (self , whisper_hash : str ) -> dict :
324+ def whisper_retrieve (self , whisper_hash : str , encoding : str = "utf-8" ) -> dict :
322325 """Retrieves the result of the whisper operation from the LLMWhisperer
323326 API.
324327
@@ -329,6 +332,7 @@ def whisper_retrieve(self, whisper_hash: str) -> dict:
329332
330333 Args:
331334 whisper_hash (str): The hash of the whisper operation.
335+ encoding (str): The character encoding to use for processing the text. Defaults to "utf-8".
332336
333337 Returns:
334338 dict: A dictionary containing the status code and the extracted text from the whisper operation.
@@ -345,6 +349,7 @@ def whisper_retrieve(self, whisper_hash: str) -> dict:
345349 prepared = req .prepare ()
346350 s = requests .Session ()
347351 response = s .send (prepared , timeout = self .api_timeout )
352+ response .encoding = encoding
348353 if response .status_code != 200 :
349354 err = json .loads (response .text )
350355 err ["status_code" ] = response .status_code
0 commit comments