Skip to content

Commit 1eecb72

Browse files
[FEAT] Support for Azure and S3 for permanent file storage (#168)
* Exception handling for Prompt Service * Adding support for Azure and s3 * Adding support for Azure and s3 * Locking pdm * Change Protcol name * Change Protcol name * Change Protcol name
1 parent be328d6 commit 1eecb72

File tree

8 files changed

+650
-180
lines changed

8 files changed

+650
-180
lines changed

pdm.lock

Lines changed: 512 additions & 173 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ scripts = { unstract-tool-gen = "unstract.sdk.scripts.tool_gen:main" }
8282
# Pinning boto3 to 1.34.x for remote storage compatibility.
8383
aws = ["s3fs[boto3]~=2024.10.0", "boto3~=1.34.131"]
8484
gcs = ["gcsfs~=2024.10.0"]
85+
azure = ["adlfs~=2024.7.0"]
8586

8687
[tool.pdm.dev-dependencies]
8788
docs = [ "lazydocs~=0.4.8" ]
@@ -90,7 +91,8 @@ test = [
9091
"pytest==8.3.3",
9192
"pytest-mock==3.14.0",
9293
"gcsfs==2024.10.0",
93-
"s3fs==2024.10.0"
94+
"s3fs==2024.10.0",
95+
"adlfs~=2024.7.0"
9496
]
9597
lint = [
9698
"autopep8~=2.0.2",

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.58.0"
1+
__version__ = "0.59.0"
22

33

44
def get_sdk_version():

src/unstract/sdk/file_storage/permanent.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class PermanentFileStorage(FileStorage):
1818
FileStorageProvider.S3.value,
1919
FileStorageProvider.MINIO.value,
2020
FileStorageProvider.LOCAL.value,
21+
FileStorageProvider.AZURE.value,
2122
]
2223

2324
def __init__(
@@ -35,6 +36,8 @@ def __init__(
3536
provider == FileStorageProvider.GCS
3637
or provider == FileStorageProvider.LOCAL
3738
or provider == FileStorageProvider.MINIO
39+
or provider == FileStorageProvider.S3
40+
or provider == FileStorageProvider.AZURE
3841
):
3942
super().__init__(provider, **storage_config)
4043
else:

src/unstract/sdk/file_storage/provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
class FileStorageProvider(enum.Enum):
5-
AZURE = "azure"
5+
AZURE = "abfs"
66
GCS = "gcs"
77
S3 = "s3"
88
MINIO = "minio"

tests/sample.env

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ TEXT_CONTENT=Writing directly from string as read_file is not passed
2424
FILE_STORAGE_GCS='{"token": "/path/to/google/creds.json"}'
2525
FILE_STORAGE_MINIO='{"endpoint_url": "http://localhost:9000","key": "xxxx", "secret": "xxxx"}'
2626
FILE_STORAGE_LOCAL='{"auto_mkdir": True}'
27-
28-
TEST_PERMANENT_STORAGE='{"provider": "gcs", "credentials": {"token": "/path/to/google/creds.json"}}'
27+
FILE_STORAGE_AZURE = '{"azure_account_name":"xxxx","azure_access_key":"XXX","connection_string":"xxxx","azure_bucket_name":"fsspec-test"}'
28+
FILE_STORAGE_S3 = '{"s3_key":'xxxx',"s3_secret":'XXXX',"s3_bucket":'fsspec-test',"s3_endpoint":'https://s3.ap-south-1.amazonaws.com/',"s3_region":'ap-south-1'}'
29+
TEST_PERMANENT_STORAGE_GCS='{"provider": "gcs", "credentials": {"token": "/path/to/google/creds.json"}}'
2930
TEST_TEMPORARY_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "xxxx", "secret": "xxxx"}}'
3031
TEST_LOCAL_STORAGE='{"provider":"local"}'
32+
TEST_PERMANENT_STORAGE_AZURE = '{"provider": "abfs", "credentials": {"azure_account_name":"unstractpocstorage","azure_access_key":"xxxx","azure_bucket_name":"fsspec-test","connection_string":"xxxx"}}'

tests/test_file_storage.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ class TEST_CONSTANTS:
3131
FILE_STORAGE_GCS = "FILE_STORAGE_GCS"
3232
FILE_STORAGE_MINIO = "FILE_STORAGE_MINIO"
3333
FILE_STORAGE_LOCAL = "FILE_STORAGE_LOCAL"
34+
FILE_STORAGE_S3 = "FILE_STORAGE_S3"
35+
FILE_STORAGE_AZURE = "FILE_STORAGE_AZURE"
3436

3537

3638
def file_storage(provider: FileStorageProvider):
@@ -41,6 +43,10 @@ def file_storage(provider: FileStorageProvider):
4143
creds = json.loads(os.environ.get(TEST_CONSTANTS.FILE_STORAGE_MINIO, "{}"))
4244
elif provider == FileStorageProvider.LOCAL:
4345
creds = json.loads(os.environ.get(TEST_CONSTANTS.FILE_STORAGE_LOCAL, "{}"))
46+
elif provider == FileStorageProvider.S3:
47+
creds = json.loads(os.environ.get(TEST_CONSTANTS.FILE_STORAGE_S3, "{}"))
48+
elif provider == FileStorageProvider.AZURE:
49+
creds = json.loads(os.environ.get(TEST_CONSTANTS.FILE_STORAGE_AZURE, "{}"))
4450
except JSONDecodeError:
4551
creds = {}
4652
file_storage = FileStorage(provider, **creds)
@@ -86,6 +92,20 @@ def file_storage(provider: FileStorageProvider):
8692
-1,
8793
os.path.getsize(TEST_CONSTANTS.READ_TEXT_FILE),
8894
),
95+
(
96+
file_storage(provider=FileStorageProvider.S3),
97+
TEST_CONSTANTS.READ_TEXT_FILE,
98+
"rb",
99+
0,
100+
os.path.getsize(TEST_CONSTANTS.READ_TEXT_FILE),
101+
),
102+
(
103+
file_storage(provider=FileStorageProvider.AZURE),
104+
TEST_CONSTANTS.READ_TEXT_FILE,
105+
"rb",
106+
0,
107+
os.path.getsize(TEST_CONSTANTS.READ_TEXT_FILE),
108+
),
89109
],
90110
)
91111
def test_file_read(file_storage, path, mode, read_length, expected_read_length):
@@ -233,6 +253,46 @@ def test_file_read_exception(file_storage, path, mode, read_length):
233253
0,
234254
len(TEST_CONSTANTS.TEXT_CONTENT),
235255
),
256+
(
257+
file_storage(provider=FileStorageProvider.AZURE),
258+
TEST_CONSTANTS.READ_PDF_FILE,
259+
"rb",
260+
None,
261+
TEST_CONSTANTS.WRITE_PDF_FILE,
262+
"wb",
263+
-1,
264+
os.path.getsize(TEST_CONSTANTS.READ_PDF_FILE),
265+
),
266+
(
267+
file_storage(provider=FileStorageProvider.AZURE),
268+
TEST_CONSTANTS.READ_TEXT_FILE,
269+
"rb",
270+
None,
271+
TEST_CONSTANTS.WRITE_TEXT_FILE,
272+
"wb",
273+
0,
274+
0,
275+
),
276+
(
277+
file_storage(provider=FileStorageProvider.AZURE),
278+
TEST_CONSTANTS.READ_TEXT_FILE,
279+
"rb",
280+
None,
281+
TEST_CONSTANTS.WRITE_TEXT_FILE,
282+
"wb",
283+
0,
284+
0,
285+
),
286+
(
287+
file_storage(provider=FileStorageProvider.AZURE),
288+
None,
289+
"rb",
290+
TEST_CONSTANTS.TEXT_CONTENT,
291+
TEST_CONSTANTS.WRITE_TEXT_FILE,
292+
"w",
293+
0,
294+
len(TEST_CONSTANTS.TEXT_CONTENT),
295+
),
236296
],
237297
)
238298
def test_file_write(
@@ -284,6 +344,11 @@ def test_file_write(
284344
# as they only support creating buckets. For
285345
# further details pls check implementation of mkdir in S3
286346
),
347+
(
348+
file_storage(provider=FileStorageProvider.AZURE),
349+
TEST_CONSTANTS.TEST_FOLDER,
350+
False,
351+
),
287352
],
288353
)
289354
def test_make_dir(file_storage, folder_path, expected_result):
@@ -316,6 +381,11 @@ def test_make_dir(file_storage, folder_path, expected_result):
316381
TEST_CONSTANTS.GCS_BUCKET,
317382
True,
318383
),
384+
(
385+
file_storage(provider=FileStorageProvider.AZURE),
386+
TEST_CONSTANTS.GCS_BUCKET,
387+
True,
388+
),
319389
],
320390
)
321391
def test_path_exists(file_storage, folder_path, expected_result):
@@ -345,6 +415,11 @@ def test_path_exists(file_storage, folder_path, expected_result):
345415
TEST_CONSTANTS.READ_FOLDER_PATH,
346416
2,
347417
),
418+
(
419+
file_storage(provider=FileStorageProvider.AZURE),
420+
TEST_CONSTANTS.READ_FOLDER_PATH,
421+
2,
422+
),
348423
],
349424
)
350425
def test_ls(file_storage, folder_path, expected_file_count):
@@ -366,6 +441,10 @@ def test_ls(file_storage, folder_path, expected_file_count):
366441
file_storage(provider=FileStorageProvider.MINIO),
367442
TEST_CONSTANTS.WRITE_FOLDER_PATH,
368443
),
444+
(
445+
file_storage(provider=FileStorageProvider.AZURE),
446+
TEST_CONSTANTS.WRITE_FOLDER_PATH,
447+
),
369448
],
370449
)
371450
def test_rm(file_storage, folder_path):
@@ -661,6 +740,11 @@ def test_file_mime_type(file_storage, path, read_length, expected_mime_type):
661740
TEST_CONSTANTS.READ_TEXT_FILE,
662741
TEST_CONSTANTS.TEST_FOLDER + "/3.txt",
663742
),
743+
(
744+
file_storage(provider=FileStorageProvider.AZURE),
745+
TEST_CONSTANTS.READ_TEXT_FILE,
746+
TEST_CONSTANTS.TEST_FOLDER + "/3.txt",
747+
),
664748
],
665749
)
666750
def test_download(file_storage, from_path, to_path):
@@ -690,6 +774,11 @@ def test_download(file_storage, from_path, to_path):
690774
TEST_CONSTANTS.READ_TEXT_FILE,
691775
TEST_CONSTANTS.TEST_FOLDER + "/3.txt",
692776
),
777+
(
778+
file_storage(provider=FileStorageProvider.AZURE),
779+
TEST_CONSTANTS.READ_TEXT_FILE,
780+
TEST_CONSTANTS.TEST_FOLDER + "/3.txt",
781+
),
693782
],
694783
)
695784
def test_upload(file_storage, from_path, to_path):
@@ -719,6 +808,11 @@ def test_upload(file_storage, from_path, to_path):
719808
TEST_CONSTANTS.READ_TEXT_FILE,
720809
"4a08b5721f75657eb883202cae16c74ca62df2c605e4126e50f4bf341d4fd693",
721810
),
811+
(
812+
file_storage(provider=FileStorageProvider.AZURE),
813+
TEST_CONSTANTS.READ_TEXT_FILE,
814+
"4a08b5721f75657eb883202cae16c74ca62df2c605e4126e50f4bf341d4fd693",
815+
),
722816
],
723817
)
724818
def test_get_hash_from_file(file_storage, file_path, expected_result):
@@ -744,6 +838,11 @@ def test_get_hash_from_file(file_storage, file_path, expected_result):
744838
TEST_CONSTANTS.READ_FOLDER_PATH + "/*.pdf",
745839
1,
746840
),
841+
(
842+
file_storage(provider=FileStorageProvider.AZURE),
843+
TEST_CONSTANTS.READ_FOLDER_PATH + "/*.pdf",
844+
1,
845+
),
747846
],
748847
)
749848
def test_glob(file_storage, folder_path, expected_result):
@@ -757,7 +856,7 @@ def test_glob(file_storage, folder_path, expected_result):
757856
[
758857
(
759858
StorageType.PERMANENT,
760-
"TEST_PERMANENT_STORAGE",
859+
"TEST_PERMANENT_STORAGE_GCS",
761860
FileStorageProvider.GCS,
762861
),
763862
(
@@ -770,6 +869,11 @@ def test_glob(file_storage, folder_path, expected_result):
770869
"TEST_LOCAL_STORAGE",
771870
FileStorageProvider.LOCAL,
772871
),
872+
(
873+
StorageType.PERMANENT,
874+
"TEST_PERMANENT_STORAGE_AZURE",
875+
FileStorageProvider.AZURE,
876+
),
773877
],
774878
)
775879
def test_get_storage(storage_type, env_name, expected):
@@ -783,7 +887,7 @@ def test_get_storage(storage_type, env_name, expected):
783887
[
784888
(
785889
StorageType.PERMANENT,
786-
"TEST_PERMANENT_STORAGE",
890+
"TEST_PERMANENT_STORAGE_GCS",
787891
"fsspec-test",
788892
),
789893
(

tests/test_fs_permanent.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ def permanent_file_storage(provider: FileStorageProvider):
5454
"fsspec-test/input/3.txt",
5555
"r",
5656
),
57+
(
58+
permanent_file_storage(provider=FileStorageProvider.AZURE),
59+
"fsspec-test/input/3.txt",
60+
"r",
61+
),
5762
],
5863
)
5964
def test_permanent_fs_copy_on_read(file_storage, file_read_path, read_mode):
@@ -86,6 +91,14 @@ def test_permanent_fs_copy_on_read(file_storage, file_read_path, read_mode):
8691
"fsspec-test/output/copy_on_read_legacy_storage.txt",
8792
"w",
8893
),
94+
(
95+
permanent_file_storage(provider=FileStorageProvider.AZURE),
96+
"fsspec-test/input/3.txt",
97+
"r",
98+
"fsspec-test/legacy_storage/3.txt",
99+
"fsspec-test/output/copy_on_read_legacy_storage.txt",
100+
"w",
101+
),
89102
],
90103
)
91104
def test_permanent_fs_copy_on_read_with_legacy_storage(
@@ -151,6 +164,13 @@ def test_permanent_fs_copy(
151164
"fsspec-test/output/test_write.txt",
152165
"w",
153166
),
167+
(
168+
permanent_file_storage(provider=FileStorageProvider.AZURE),
169+
"fsspec-test/input/3.txt",
170+
"r",
171+
"fsspec-test/output/test_write.txt",
172+
"w",
173+
),
154174
],
155175
)
156176
def test_permanent_fs_download(file_storage, from_path, read_mode, to_path, write_mode):

0 commit comments

Comments
 (0)