Skip to content

Commit 86a57d6

Browse files
Unit tests for list_files (#643)
Was trying to repro an issue where we saw `list_files` on a `Dir` was returning the parent folder in addition to the files - was not able to repro, but adding some tests to check. Also noticed we weren't using a variable in `Dir.from_local`. --------- Signed-off-by: Yee Hing Tong <wild-endeavor@users.noreply.github.com>
1 parent bf07b80 commit 86a57d6

File tree

3 files changed

+84
-1
lines changed

3 files changed

+84
-1
lines changed

src/flyte/io/_dir.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ async def _lazy_uploader() -> tuple[str | None, str]:
662662

663663
# todo: in the future, mirror File and set the file to_path here
664664
output_path = await storage.put(
665-
from_path=local_path_str, to_path=remote_destination, recursive=True, batch_size=batch_size
665+
from_path=local_path_str, to_path=resolved_remote_path, recursive=True, batch_size=batch_size
666666
)
667667
return cls(path=output_path, name=dirname, hash=dir_cache_key)
668668

tests/flyte/io_types/test_dirs.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,3 +510,46 @@ async def test_dir_lazy_uploader_preserves_hash(tmp_dir_structure):
510510
assert not lv.hash # Empty string or None
511511
finally:
512512
_run_mode_var.set(None)
513+
514+
515+
@pytest.mark.sandbox
516+
@pytest.mark.asyncio
517+
async def test_list_dir_mock_s3(tmp_path, tmp_dir_structure, ctx_with_test_local_s3_stack_raw_data_path):
518+
"""
519+
Test downloading a directory to a directory path.
520+
When a directory path is provided, the directory contents should be downloaded directly into that path.
521+
"""
522+
from flyte.storage import S3
523+
524+
await flyte.init.aio(storage=S3.for_sandbox())
525+
526+
# Upload to S3
527+
uploaded_dir = await Dir.from_local(tmp_dir_structure)
528+
uploaded_path = uploaded_dir.path + "/"
529+
530+
# List files with a trailing /
531+
replica_dir = Dir.from_existing_remote(uploaded_path)
532+
files = await replica_dir.list_files()
533+
assert len(files) == 3
534+
535+
# List without the /
536+
replica_dir = Dir.from_existing_remote(uploaded_dir.path)
537+
files = await replica_dir.list_files()
538+
assert len(files) == 3
539+
540+
541+
@pytest.mark.asyncio
542+
async def test_list_dir_local_fs(tmp_path, tmp_dir_structure, ctx_with_test_raw_data_path):
543+
"""
544+
Test listing files in a directory using local filesystem storage (no S3).
545+
Mirrors test_list_dir_mock_s3 but uses local raw data path instead.
546+
"""
547+
flyte.init()
548+
549+
# Upload to local "remote"
550+
uploaded_dir = await Dir.from_local(tmp_dir_structure)
551+
552+
# List files
553+
replica_dir = Dir(path=uploaded_dir.path + "/")
554+
files = await replica_dir.list_files()
555+
assert len(files) == 3

tests/internal/storage/test_storage.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,46 @@ async def test_storage_exists():
139139
assert not await storage.exists("/non-existent/test")
140140

141141

142+
@pytest.mark.sandbox
143+
@pytest.mark.asyncio
144+
async def test_get_underlying_filesystem_upload_download(tmp_path, ctx_with_test_local_s3_stack_raw_data_path):
145+
"""
146+
Sandbox integration test that uses get_underlying_filesystem with the sandbox S3
147+
(LocalStack) to upload and download a file.
148+
"""
149+
from flyte.storage import S3
150+
151+
await flyte.init.aio(storage=S3.for_sandbox())
152+
153+
# Create a local file with known content
154+
original_content = b"hello from sandbox integration test"
155+
local_file = tmp_path / "upload_me.txt"
156+
local_file.write_bytes(original_content)
157+
158+
# Upload the file to sandbox S3 via storage.put
159+
s3_path = "s3://bucket/tests/default_upload/upload_me.txt"
160+
await storage.put(str(local_file), s3_path)
161+
162+
# Use get_underlying_filesystem to verify the file exists on S3
163+
fs = storage.get_underlying_filesystem(path=s3_path)
164+
assert fs.exists(s3_path)
165+
166+
# Download the file back using get_underlying_filesystem
167+
downloaded_file = tmp_path / "downloaded.txt"
168+
fs.get(s3_path, str(downloaded_file))
169+
170+
# Verify the downloaded content matches the original
171+
assert downloaded_file.read_bytes() == original_content
172+
173+
# Also upload via the filesystem directly and read back with storage.get
174+
s3_path_2 = "s3://bucket/tests/default_upload/fs_uploaded.txt"
175+
fs.put(str(local_file), s3_path_2)
176+
177+
downloaded_file_2 = tmp_path / "downloaded_2.txt"
178+
await storage.get(s3_path_2, str(downloaded_file_2))
179+
assert downloaded_file_2.exists()
180+
181+
142182
@pytest.mark.parametrize(
143183
"path,expected",
144184
[

0 commit comments

Comments
 (0)