Skip to content

Commit 2675e2b

Browse files
[FIX] Fixes for Prompt studio Indexing and tool runs (#143)
* Refactoring changed file names * Roll version * Update src/unstract/sdk/utils/tool_utils.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Update tests/test_fs_permanent.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Update tests/test_fs_permanent.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Update src/unstract/sdk/utils/tool_utils.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Address review comments * Add support for passing length to mime_type * Add recursive and fix mypy issue * CHange test case with new behavior to return FileNotFound in read() * Remove typing kwargs. * Resolve mypy issues * Resolve mypy issues * Remove unwanted conditionals/vars * Remove pandoc and tessaract. * Details of provider added to error message * fixed enum conditional matching value * Include EnvHelper in __init__ * Rename error handler * Upgrade version * Expose StorageType outside * Resolve circular dependency issue * Resolve circular dependency issue * Indexing fixes + clean up * Add deprecation warnings * Add deprecation warnings --------- Signed-off-by: Gayathri <[email protected]> Co-authored-by: Chandrasekharan M <[email protected]>
1 parent eaccd55 commit 2675e2b

File tree

15 files changed

+91
-45
lines changed

15 files changed

+91
-45
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.54.0rc11"
1+
__version__ = "0.54.0rc12"
22

33

44
def get_sdk_version():

src/unstract/sdk/adapters/utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import warnings
23
from pathlib import Path
34

45
import filetype
@@ -76,6 +77,13 @@ def get_file_mime_type(
7677
Returns:
7778
str: MIME type of the file
7879
"""
80+
# Adding the following DeprecationWarning manually as the package "deprecated"
81+
# does not support deprecation on static methods.
82+
warnings.warn(
83+
"`get_file_mime_type` is deprecated. "
84+
"Use `FileStorage mime_type()` instead.",
85+
DeprecationWarning,
86+
)
7987
sample_contents = fs.read(path=input_file, mode="rb", length=100)
8088
input_file_mime = magic.from_buffer(sample_contents, mime=True)
8189
return input_file_mime
@@ -93,6 +101,13 @@ def guess_extention(
93101
Returns:
94102
str: File extention
95103
"""
104+
# Adding the following DeprecationWarning manually as the package "deprecated"
105+
# does not support deprecation on static methods.
106+
warnings.warn(
107+
"`guess_extention` is deprecated. "
108+
"Use `FileStorage guess_extension()` instead.",
109+
DeprecationWarning,
110+
)
96111
input_file_extention = ""
97112
sample_contents = fs.read(path=input_file_path, mode="rb", length=100)
98113
if sample_contents:

src/unstract/sdk/adapters/x2text/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def process_document(
7373
if not local_storage.exists(input_file_path):
7474
fs.download(from_path=input_file_path, to_path=input_file_path)
7575
with open(input_file_path, "rb") as input_f:
76-
mime_type = AdapterUtils.get_file_mime_type(input_file=input_file_path)
76+
mime_type = local_storage.mime_type(input_file=input_file_path)
7777
files = {"file": (input_file_path, input_f, mime_type)}
7878
response = UnstructuredHelper.make_request(
7979
unstructured_adapter_config=unstructured_adapter_config,

src/unstract/sdk/adapters/x2text/llama_parse/src/llama_parse.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from llama_parse import LlamaParse
88

99
from unstract.sdk.adapters.exceptions import AdapterError
10-
from unstract.sdk.adapters.utils import AdapterUtils
1110
from unstract.sdk.adapters.x2text.dto import TextExtractionResult
1211
from unstract.sdk.adapters.x2text.llama_parse.src.constants import LlamaParseConfig
1312
from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter
@@ -62,9 +61,7 @@ def _call_parser(
6261
file_extension = pathlib.Path(input_file_path).suffix
6362
if not file_extension:
6463
try:
65-
input_file_extension = AdapterUtils.guess_extention(
66-
input_file_path, fs
67-
)
64+
input_file_extension = fs.guess_extension(input_file_path)
6865
input_file_path_copy = input_file_path
6966
input_file_path = ".".join(
7067
(input_file_path_copy, input_file_extension)

src/unstract/sdk/constants.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ class ToolEnv:
2222
DATA_DIR = "TOOL_DATA_DIR"
2323
EXECUTION_BY_TOOL = "EXECUTION_BY_TOOL"
2424
EXECUTION_DATA_DIR = "EXECUTION_DATA_DIR"
25-
WORKFLOW_EXECUTION_FS_PROVIDER = "WORKFLOW_EXECUTION_FS_PROVIDER"
26-
WORKFLOW_EXECUTION_FS_CREDENTIAL = "WORKFLOW_EXECUTION_FS_CREDENTIAL"
25+
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS = (
26+
"WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS"
27+
)
2728

2829

2930
class ConnectorKeys:

src/unstract/sdk/file_storage/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
class FileOperationParams:
55
READ_ENTIRE_LENGTH = -1
66
MIME_TYPE_DEFAULT_READ_LENGTH = 100
7+
EXTENSION_DEFAULT_READ_LENGTH = 100
78
DEFAULT_ENCODING = "utf-8"
89

910

@@ -15,7 +16,7 @@ class FileSeekPosition:
1516

1617
class StorageType(Enum):
1718
PERMANENT = "permanent"
18-
TEMPORARY = "temporary"
19+
SHARED_TEMPORARY = "shared_temporary"
1920

2021

2122
class CredentialKeyword:

src/unstract/sdk/file_storage/env_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def get_storage(storage_type: StorageType, env_name: str) -> FileStorage:
3939
credentials = file_storage_creds.get(CredentialKeyword.CREDENTIALS, "{}")
4040
if storage_type == StorageType.PERMANENT:
4141
file_storage = PermanentFileStorage(provider=provider, **credentials)
42-
elif storage_type == StorageType.TEMPORARY:
42+
elif storage_type == StorageType.SHARED_TEMPORARY:
4343
file_storage = SharedTemporaryFileStorage(
4444
provider=provider, **credentials
4545
)

src/unstract/sdk/file_storage/impl.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from hashlib import sha256
55
from typing import Any, Union
66

7+
import filetype
78
import fsspec
89
import magic
910
import yaml
@@ -361,3 +362,24 @@ def yaml_load(
361362
with self.fs.open(path=path) as f:
362363
data: dict[str, Any] = yaml.safe_load(f)
363364
return data
365+
366+
@skip_local_cache
367+
def guess_extension(self, path: str) -> str:
368+
"""Returns the extension of the file passed.
369+
370+
Args:
371+
path (str): String holding the path
372+
373+
Returns:
374+
str: File extension
375+
"""
376+
file_extension = ""
377+
sample_contents = self.read(
378+
path=path,
379+
mode="rb",
380+
length=FileOperationParams.EXTENSION_DEFAULT_READ_LENGTH,
381+
)
382+
if sample_contents:
383+
file_type = filetype.guess(sample_contents)
384+
file_extension = file_type.EXTENSION
385+
return file_extension

src/unstract/sdk/file_storage/interface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,7 @@ def yaml_load(
121121
path: str,
122122
) -> dict[Any, Any]:
123123
pass
124+
125+
@abstractmethod
126+
def guess_extension(self, path: str) -> str:
127+
pass

src/unstract/sdk/file_storage/shared_temporary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(
1717
):
1818
if provider.value not in self.SUPPORTED_FILE_STORAGE_TYPES:
1919
raise FileStorageError(
20-
f"File storage provider is not supported in Permanent mode. "
20+
f"File storage provider is not supported in Shared Temporary mode. "
2121
f"Supported providers: {self.SUPPORTED_FILE_STORAGE_TYPES}"
2222
)
2323
if provider == FileStorageProvider.MINIO:

0 commit comments

Comments
 (0)