Skip to content

Commit 265d5b9

Browse files
gaya3-zipstackchandrasekharan-zipstackmuhammad-ali-e
authored
Handle file not found using cache invalidation and retry again (#130)
* contextSizeChanges * contextSizeChanges * Version roll and test folder check in * Fix enum values * Fix test cases, address review comments * Address review comments * Update pyproject.toml Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Address mypy issues * Change class design and implementation * Remove unused definitions * Add cp() and function refactoring * Check-in sample env * Default value of dict changed to None * Add size() * Refctor for using FileStorage * Refactor to use FileStorage * Fix issues * Add mim_type, download functions * change comments * Refactor het_hash_from_file * Add return types * Remove permanent file storage from sdk * Fix SDK functional issues * Support minio * Test cases for Minio * Bring file variants back to sdk * Fix copy_on_write * Add new test cases for uploadd/download * Add new functions to support platform-service * Change modififcation_time return type to datetime * Refactor env pick-up logic * Sample env * contextSizeChanges * Remove commented code and some improvisations * contextSizeChanges * Add right JSON formatted string * Update src/unstract/sdk/file_storage/fs_permanent.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Address review comments * Address review comments * Update src/unstract/sdk/file_storage/fs_shared_temporary.py Co-authored-by: ali <[email protected]> Signed-off-by: Gayathri <[email protected]> * Refactor for change in enum value * Add return type * Support glob * Add function to interface * Update env format * Support legacy storage and get_hash_from_file * Change sample path * Update test env * Add yaml_dump function * add more functions * Type the args * Add file not found exception * Optimise checks * Setup python version * Handle file not found using cache invalidation and retry * Revert a change * Renaming * Add env helper for * Add sample env * Update src/unstract/sdk/file_storage/env_helper.py Co-authored-by: Chandrasekharan M <[email protected]> Signed-off-by: Gayathri <[email protected]> * Review comments - Address --------- Signed-off-by: Gayathri <[email protected]> Co-authored-by: Chandrasekharan M <[email protected]> Co-authored-by: ali <[email protected]>
1 parent 585dc52 commit 265d5b9

File tree

11 files changed

+175
-114
lines changed

11 files changed

+175
-114
lines changed

src/unstract/sdk/file_storage/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
"SharedTemporaryFileStorage",
77
]
88

9-
from unstract.sdk.file_storage.fs_impl import FileStorage
10-
from unstract.sdk.file_storage.fs_permanent import PermanentFileStorage
11-
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
12-
from unstract.sdk.file_storage.fs_shared_temporary import SharedTemporaryFileStorage
139
from unstract.sdk.file_storage.helper import FileStorageHelper
10+
from unstract.sdk.file_storage.impl import FileStorage
11+
from unstract.sdk.file_storage.permanent import PermanentFileStorage
12+
from unstract.sdk.file_storage.provider import FileStorageProvider
13+
from unstract.sdk.file_storage.shared_temporary import SharedTemporaryFileStorage

src/unstract/sdk/file_storage/constants.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from enum import Enum
2+
3+
14
class FileOperationParams:
25
READ_ENTIRE_LENGTH = -1
36
DEFAULT_ENCODING = "utf-8"
@@ -7,3 +10,13 @@ class FileSeekPosition:
710
START = 0
811
CURRENT = 1
912
END = 2
13+
14+
15+
class StorageType(Enum):
16+
PERMANENT = "permanent"
17+
TEMPORARY = "temporary"
18+
19+
20+
class CredentialKeyword:
21+
PROVIDER = "provider"
22+
CREDENTIALS = "credentials"
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import json
2+
import logging
3+
import os
4+
5+
from unstract.sdk.exceptions import FileStorageError
6+
from unstract.sdk.file_storage.constants import CredentialKeyword, StorageType
7+
from unstract.sdk.file_storage.impl import FileStorage
8+
from unstract.sdk.file_storage.permanent import PermanentFileStorage
9+
from unstract.sdk.file_storage.provider import FileStorageProvider
10+
from unstract.sdk.file_storage.shared_temporary import SharedTemporaryFileStorage
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
class EnvHelper:
16+
@staticmethod
17+
def get_storage(storage_type: StorageType, env_name: str) -> FileStorage:
18+
try:
19+
file_storage_creds = json.loads(os.environ.get(env_name))
20+
provider = FileStorageProvider(
21+
file_storage_creds[CredentialKeyword.PROVIDER]
22+
)
23+
credentials = file_storage_creds.get(CredentialKeyword.CREDENTIALS, {})
24+
if storage_type == StorageType.PERMANENT.value:
25+
file_storage = PermanentFileStorage(provider=provider, **credentials)
26+
elif storage_type == StorageType.TEMPORARY.value:
27+
file_storage = SharedTemporaryFileStorage(
28+
provider=provider, **credentials
29+
)
30+
else:
31+
raise NotImplementedError()
32+
return file_storage
33+
except KeyError as e:
34+
logger.error(f"Required credentials is missing in the env: {str(e)}")
35+
raise e
36+
except FileStorageError as e:
37+
raise e

src/unstract/sdk/file_storage/helper.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import fsspec
55
from fsspec import AbstractFileSystem
66

7-
from unstract.sdk.exceptions import FileStorageError
8-
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
7+
from unstract.sdk.exceptions import FileOperationError, FileStorageError
8+
from unstract.sdk.file_storage.provider import FileStorageProvider
99

1010
logger = logging.getLogger(__name__)
1111

@@ -67,3 +67,24 @@ def local_file_system_init() -> AbstractFileSystem:
6767
f" file system {e}"
6868
)
6969
raise FileStorageError(str(e)) from e
70+
71+
72+
def skip_local_cache(func):
73+
def wrapper(*args, **kwargs):
74+
try:
75+
return func(*args, **kwargs)
76+
except FileNotFoundError:
77+
try:
78+
# FileNotFound could have been caused by stale cache.
79+
# Hence invalidate cache and retry again
80+
args[0].fs.invalidate_cache()
81+
return func(*args, **kwargs)
82+
except Exception as e:
83+
if isinstance(e, FileNotFoundError):
84+
raise e
85+
else:
86+
raise FileOperationError(str(e)) from e
87+
except Exception as e:
88+
raise FileOperationError(str(e)) from e
89+
90+
return wrapper

0 commit comments

Comments
 (0)