Skip to content

Commit 585dc52

Browse files
[FIX] RC3 - Adding support to NoOp, LLMW-v2 and Llama parse (#131)
* Exception handling for Prompt Service * Remote storage-Support for NoOp and LLMW-v2 * Fixing mkdir method signature * Passing param with arg * Adding method params * Version bump and passing extra_info param
1 parent 171470c commit 585dc52

File tree

5 files changed

+38
-10
lines changed

5 files changed

+38
-10
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.54.0rc2"
1+
__version__ = "0.54.0rc3"
22

33

44
def get_sdk_version():

src/unstract/sdk/adapters/x2text/llama_parse/src/llama_parse.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,23 @@ def _call_parser(
6969
input_file_path = ".".join(
7070
(input_file_path_copy, input_file_extension)
7171
)
72+
text_content = fs.read(
73+
path=input_file_path_copy, mode="rb", encoding="utf-8"
74+
)
75+
fs.write(
76+
path=input_file_path,
77+
data=text_content,
78+
mode="w",
79+
encoding="utf-8",
80+
)
7281
except OSError as os_err:
7382
logger.error("Exception raised while handling input file.")
7483
raise AdapterError(str(os_err))
7584

7685
file_bytes = fs.read(path=input_file_path, mode="rb")
77-
documents = parser.load_data(file_bytes)
86+
documents = parser.load_data(
87+
file_bytes, extra_info={"file_name": input_file_path}
88+
)
7889

7990
except ConnectError as connec_err:
8091
logger.error(f"Invalid Base URL given. : {connec_err}")

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def extract_text_from_response(
306306
output_file_path: Optional[str],
307307
response_dict: dict[str, Any],
308308
response: Response,
309+
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
309310
) -> str:
310311
output_json = {}
311312
if response.status_code == 200:
@@ -321,11 +322,16 @@ def extract_text_from_response(
321322
LLMWhispererHelper.write_output_to_file(
322323
output_json=output_json,
323324
output_file_path=Path(output_file_path),
325+
fs=fs,
324326
)
325327
return output_json.get("result_text", "")
326328

327329
@staticmethod
328-
def write_output_to_file(output_json: dict, output_file_path: Path) -> None:
330+
def write_output_to_file(
331+
output_json: dict,
332+
output_file_path: Path,
333+
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
334+
) -> None:
329335
"""Writes the extracted text and metadata to the specified output file
330336
and metadata file.
331337
@@ -341,7 +347,9 @@ def write_output_to_file(output_json: dict, output_file_path: Path) -> None:
341347
try:
342348
text_output = output_json.get("result_text", "")
343349
logger.info(f"Writing output to {output_file_path}")
344-
output_file_path.write_text(text_output, encoding="utf-8")
350+
fs.write(
351+
path=output_file_path, mode="w", data=text_output, encoding="utf-8"
352+
)
345353
except Exception as e:
346354
logger.error(f"Error while writing {output_file_path}: {e}")
347355
raise ExtractorError(str(e))
@@ -352,13 +360,15 @@ def write_output_to_file(output_json: dict, output_file_path: Path) -> None:
352360
metadata_file_name = output_file_path.with_suffix(".json").name
353361
metadata_file_path = metadata_dir / metadata_file_name
354362
# Ensure the metadata directory exists
355-
metadata_dir.mkdir(parents=True, exist_ok=True)
363+
fs.mkdir(create_parents=True, path=metadata_dir)
356364
# Remove the "result_text" key from the metadata
357365
metadata = {
358366
key: value for key, value in output_json.items() if key != "result_text"
359367
}
360368
metadata_json = json.dumps(metadata, ensure_ascii=False, indent=4)
361369
logger.info(f"Writing metadata to {metadata_file_path}")
362-
metadata_file_path.write_text(metadata_json, encoding="utf-8")
370+
fs.write(
371+
path=metadata_file_path, mode="w", data=metadata_json, encoding="utf-8"
372+
)
363373
except Exception as e:
364374
logger.warn(f"Error while writing metadata to {metadata_file_path}: {e}")

src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
)
1717
from unstract.sdk.adapters.x2text.llm_whisperer_v2.src.helper import LLMWhispererHelper
1818
from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter
19+
from unstract.sdk.file_storage.fs_impl import FileStorage
20+
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
1921

2022
logger = logging.getLogger(__name__)
2123

@@ -60,6 +62,7 @@ def process(
6062
self,
6163
input_file_path: str,
6264
output_file_path: Optional[str] = None,
65+
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
6366
**kwargs: dict[Any, Any],
6467
) -> TextExtractionResult:
6568
"""Used to extract text from documents.
@@ -75,7 +78,7 @@ def process(
7578
"""
7679

7780
response: requests.Response = LLMWhispererHelper.send_whisper_request(
78-
input_file_path, self.config
81+
input_file_path, self.config, fs=fs
7982
)
8083
response_text = response.text
8184
reponse_dict = json.loads(response_text)
@@ -85,7 +88,7 @@ def process(
8588

8689
return TextExtractionResult(
8790
extracted_text=LLMWhispererHelper.extract_text_from_response(
88-
self.config, output_file_path, reponse_dict, response
91+
self.config, output_file_path, reponse_dict, response, fs=fs
8992
),
9093
extraction_metadata=metadata,
9194
)

src/unstract/sdk/adapters/x2text/no_op/src/no_op_x2text.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from unstract.sdk.adapters.x2text.dto import TextExtractionResult
77
from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter
8+
from unstract.sdk.file_storage.fs_impl import FileStorage
9+
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
810

911
logger = logging.getLogger(__name__)
1012

@@ -41,6 +43,7 @@ def process(
4143
self,
4244
input_file_path: str,
4345
output_file_path: Optional[str] = None,
46+
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
4447
**kwargs: dict[Any, Any],
4548
) -> TextExtractionResult:
4649
extracted_text: str = (
@@ -49,8 +52,9 @@ def process(
4952
)
5053
time.sleep(self.config.get("wait_time"))
5154
if output_file_path:
52-
with open(output_file_path, "w", encoding="utf-8") as f:
53-
f.write(extracted_text)
55+
fs.write(
56+
path=output_file_path, mode="w", data=extracted_text, encoding="utf-8"
57+
)
5458
return TextExtractionResult(extracted_text=extracted_text)
5559

5660
def test_connection(self) -> bool:

0 commit comments

Comments
 (0)