Skip to content

Commit d3c413f

Browse files
[FIX] Support walk() in file storage (#152)
* Fix arg name in mime_type() * Add new APIs for walk * Add a comment for invalidating cache
1 parent f089f4e commit d3c413f

File tree

4 files changed

+94
-1
lines changed

4 files changed

+94
-1
lines changed

src/unstract/sdk/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
__version__ = "0.56.0rc1"
1+
__version__ = "0.56.0rc2"
2+
23

34
def get_sdk_version():
45
"""Returns the SDK version."""

src/unstract/sdk/file_storage/impl.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,22 @@ def guess_extension(self, path: str) -> str:
383383
file_type = filetype.guess(sample_contents)
384384
file_extension = file_type.EXTENSION
385385
return file_extension
386+
387+
def walk(self, path: str, max_depth=None, topdown=True):
388+
"""Walks the dir in the path and returns the list of files/dirs.
389+
390+
Args:
391+
path (str): Root to recurse into
392+
maxdepth (int): Maximum recursion depth. None means limitless,
393+
but not recommended
394+
on link-based file-systems.
395+
topdown (bool): Whether to walk the directory tree from the top
396+
downwards or from
397+
the bottom upwards.
398+
399+
Returns:
400+
Iterator containing the list of files and folders
401+
"""
402+
# Invalidating cache explicitly to avoid any stale listing
403+
self.fs.invalidate_cache(path=path)
404+
return self.fs.walk(path, maxdepth=max_depth, topdown=topdown)

src/unstract/sdk/file_storage/interface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,7 @@ def yaml_load(
125125
@abstractmethod
126126
def guess_extension(self, path: str) -> str:
127127
pass
128+
129+
@abstractmethod
130+
def walk(self, path: str):
131+
pass

tests/test_file_storage.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,3 +771,72 @@ def test_get_storage(storage_type, env_name, expected):
771771
file_storage = EnvHelper.get_storage(storage_type, env_name)
772772
assert file_storage.provider == expected
773773
print(file_storage)
774+
775+
776+
@pytest.mark.parametrize(
777+
"storage_type, env_name, path",
778+
[
779+
(
780+
StorageType.PERMANENT,
781+
"TEST_PERMANENT_STORAGE",
782+
"fsspec-test",
783+
),
784+
(
785+
StorageType.SHARED_TEMPORARY,
786+
"TEST_TEMPORARY_STORAGE",
787+
"unstract/execution/mock_org/"
788+
"13484b52-2127-48c2-b1a3-b517365346c3/"
789+
"39fcdcba-90bb-44ce-9446-67253adcb4d7/COPY_TO_FOLDER",
790+
),
791+
],
792+
)
793+
def test_dir_walk(storage_type, env_name, path):
794+
file_storage = EnvHelper.get_storage(storage_type, env_name)
795+
try:
796+
root, dirs, files = next(file_storage.walk(path))
797+
except StopIteration:
798+
return []
799+
for dir_name in dirs:
800+
print(dir_name)
801+
for file_name in files:
802+
print(file_name)
803+
if storage_type == StorageType.PERMANENT:
804+
assert len(files) > 0
805+
elif storage_type == StorageType.SHARED_TEMPORARY:
806+
assert len(files) == 0
807+
808+
809+
def list_print_dir(file_storage, path, iter_num):
810+
print(f"PATH: {path}")
811+
print(f"\nItertion: {iter_num}")
812+
try:
813+
root, dirs, files = next(file_storage.walk(path))
814+
except StopIteration:
815+
return []
816+
for dir_name in dirs:
817+
print(dir_name)
818+
for file_name in files:
819+
print(file_name)
820+
print(f"Files: {files}")
821+
822+
823+
@pytest.mark.parametrize(
824+
"storage_type, env_name, path",
825+
[
826+
(
827+
StorageType.SHARED_TEMPORARY,
828+
"TEST_TEMPORARY_STORAGE",
829+
"unstract/execution/mock_org/"
830+
"13484b52-2127-48c2-b1a3-b517365346c3/b"
831+
"f7b3d81-d0aa-4e9e-883d-25dd0f3a6466/COPY_TO_FOLDER",
832+
),
833+
],
834+
)
835+
def test_dir_ls(storage_type, env_name, path):
836+
new_file = os.path.join(path, "tmp.txt")
837+
file_storage = EnvHelper.get_storage(storage_type, env_name)
838+
if file_storage.exists(new_file):
839+
file_storage.rm(new_file)
840+
list_print_dir(file_storage, path, "1")
841+
file_storage.write(new_file, "w", data="Hello")
842+
list_print_dir(file_storage, path, "2")

0 commit comments

Comments
 (0)