Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -195,16 +195,28 @@ public boolean createDirectory(StoragePath path)
public List<StoragePathInfo> listDirectEntries(StoragePath path)
throws IOException
{
FileIterator fileIterator = fileSystem.listFiles(convertToLocation(path));
Location location = convertToLocation(path);
Optional<Boolean> dirExists = fileSystem.directoryExists(location);

// For hierarchical filesystems (HDFS): directoryExists returns true/false
// For non-hierarchical filesystems (S3): returns empty if directory doesn't exist
if (dirExists.isPresent() && !dirExists.get()) {
// Path exists but is not a directory
throw new FileNotFoundException("Path " + path + " does not exist");
}

FileIterator fileIterator = fileSystem.listFiles(location);
Set<StoragePathInfo> entryList = new HashSet<>();
while (fileIterator.hasNext()) {
entryList.add(getDirectEntryPathInfo(path, fileIterator.next()));
}
if (entryList.isEmpty()) {
// Based on the API definition, the `FileNotFoundException` should be thrown here
// so that Hudi logic can catch it and swallow it as needed

// For S3 (non-hierarchical): empty list means directory doesn't exist
// For HDFS (hierarchical): empty list is valid (empty directory)
if (entryList.isEmpty() && dirExists.isEmpty()) {
throw new FileNotFoundException("Path " + path + " does not exist");
}

return ImmutableList.copyOf(entryList);
}

Expand All @@ -224,7 +236,17 @@ public List<StoragePathInfo> listFiles(StoragePath path)
public List<StoragePathInfo> listDirectEntries(StoragePath path, StoragePathFilter filter)
throws IOException
{
FileIterator fileIterator = fileSystem.listFiles(convertToLocation(path));
Location location = convertToLocation(path);
Optional<Boolean> dirExists = fileSystem.directoryExists(location);

// For hierarchical filesystems (HDFS): directoryExists returns true/false
// For non-hierarchical filesystems (S3): returns empty if directory doesn't exist
if (dirExists.isPresent() && !dirExists.get()) {
// Path exists but is not a directory
throw new FileNotFoundException("Path " + path + " does not exist");
}

FileIterator fileIterator = fileSystem.listFiles(location);
ImmutableList.Builder<StoragePathInfo> listBuilder = ImmutableList.builder();
int count = 0;
while (fileIterator.hasNext()) {
Expand All @@ -234,11 +256,13 @@ public List<StoragePathInfo> listDirectEntries(StoragePath path, StoragePathFilt
listBuilder.add(pathInfo);
}
}
if (count == 0) {
// Based on the API definition, the `FileNotFoundException` should be thrown here
// so that Hudi logic can catch it and swallow it as needed

// For S3 (non-hierarchical): empty list means directory doesn't exist
// For HDFS (hierarchical): empty list is valid (empty directory)
if (count == 0 && dirExists.isEmpty()) {
throw new FileNotFoundException("Path " + path + " does not exist");
}

return listBuilder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,44 @@ private static void validatePathInfoList(
}
}

@Test
public void testListEmptyDirectory()
Comment on lines +296 to +297
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider adding coverage for non-hierarchical filesystems (e.g., S3) when listing empty directories.

Please add a test using a non-hierarchical filesystem (such as S3) to ensure listing an empty directory behaves as expected and does not raise exceptions.

Suggested implementation:

    @Test
    public void testListEmptyDirectory()
            throws IOException
    {
        HoodieStorage storage = getStorage();

        // Create an empty directory
        StoragePath emptyDir = new StoragePath(getTempDir(), "empty_directory");
        assertThat(storage.createDirectory(emptyDir)).isTrue();
        assertThat(storage.exists(emptyDir)).isTrue();

        // List the empty directory - should return empty list, not throw exception
        List<StoragePathInfo> entries = storage.listDirectEntries(emptyDir);
        assertThat(entries).isEmpty();
    }

    @Test
    public void testListEmptyDirectoryOnS3()
            throws IOException
    {
        HoodieStorage s3Storage = getS3Storage();

        // Create an empty "directory" on S3 (non-hierarchical)
        StoragePath emptyS3Dir = new StoragePath(getS3TempDir(), "empty_s3_directory/");
        assertThat(s3Storage.createDirectory(emptyS3Dir)).isTrue();
        assertThat(s3Storage.exists(emptyS3Dir)).isTrue();

        // List the empty S3 "directory" - should return empty list, not throw exception
        List<StoragePathInfo> s3Entries = s3Storage.listDirectEntries(emptyS3Dir);
        assertThat(s3Entries).isEmpty();

You will need to implement or provide the following helper methods for the new test to work:

  • getS3Storage(): Returns a HoodieStorage instance backed by S3 (or a mock S3).
  • getS3TempDir(): Returns a StoragePath representing a temporary S3 directory for testing.

If you already have S3 test infrastructure, use those helpers. Otherwise, you may need to add them, possibly using a mock S3 service (e.g., LocalStack or Minio) for testing.

throws IOException
{
HoodieStorage storage = getStorage();

// Create an empty directory
StoragePath emptyDir = new StoragePath(getTempDir(), "empty_directory");
assertThat(storage.createDirectory(emptyDir)).isTrue();
assertThat(storage.exists(emptyDir)).isTrue();

// List the empty directory - should return empty list, not throw exception
List<StoragePathInfo> entries = storage.listDirectEntries(emptyDir);
assertThat(entries).isEmpty();

List<StoragePathInfo> files = storage.listFiles(emptyDir);
assertThat(files).isEmpty();
}

@Test
public void testListNonExistentDirectory()
throws IOException
{
HoodieStorage storage = getStorage();

// Try to list a non-existent directory - should throw FileNotFoundException
StoragePath nonExistentDir = new StoragePath(getTempDir(), "does_not_exist");
assertThat(storage.exists(nonExistentDir)).isFalse();

assertThatThrownBy(() -> storage.listDirectEntries(nonExistentDir))
.isInstanceOf(FileNotFoundException.class)
.hasMessageContaining("does not exist");

assertThatThrownBy(() -> storage.listDirectEntries(nonExistentDir, path -> true))
.isInstanceOf(FileNotFoundException.class)
.hasMessageContaining("does not exist");
}

private void prepareFilesOnStorage(HoodieStorage storage)
throws IOException
{
Expand Down