From 29641b3a6c2884a82fdf11df181a34a823c240b5 Mon Sep 17 00:00:00 2001 From: Salman Niazi Date: Thu, 9 Oct 2025 15:27:55 +0200 Subject: [PATCH] Fix listing empty directories in TrinoHudiStorage --- .../plugin/hudi/storage/TrinoHudiStorage.java | 40 +++++++++++++++---- .../hudi/storage/TestTrinoHudiStorage.java | 38 ++++++++++++++++++ 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/storage/TrinoHudiStorage.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/storage/TrinoHudiStorage.java index ffdf61881151..7bb0d116bcb1 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/storage/TrinoHudiStorage.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/storage/TrinoHudiStorage.java @@ -195,16 +195,28 @@ public boolean createDirectory(StoragePath path) public List listDirectEntries(StoragePath path) throws IOException { - FileIterator fileIterator = fileSystem.listFiles(convertToLocation(path)); + Location location = convertToLocation(path); + Optional dirExists = fileSystem.directoryExists(location); + + // For hierarchical filesystems (HDFS): directoryExists returns true/false + // For non-hierarchical filesystems (S3): returns empty if directory doesn't exist + if (dirExists.isPresent() && !dirExists.get()) { + // Path exists but is not a directory + throw new FileNotFoundException("Path " + path + " does not exist"); + } + + FileIterator fileIterator = fileSystem.listFiles(location); Set entryList = new HashSet<>(); while (fileIterator.hasNext()) { entryList.add(getDirectEntryPathInfo(path, fileIterator.next())); } - if (entryList.isEmpty()) { - // Based on the API definition, the `FileNotFoundException` should be thrown here - // so that Hudi logic can catch it and swallow it as needed + + // For S3 (non-hierarchical): empty list means directory doesn't exist + // For HDFS (hierarchical): empty list is valid (empty directory) + if (entryList.isEmpty() && dirExists.isEmpty()) { throw new FileNotFoundException("Path " + path + " does not exist"); } + return ImmutableList.copyOf(entryList); } @@ -224,7 +236,17 @@ public List listFiles(StoragePath path) public List listDirectEntries(StoragePath path, StoragePathFilter filter) throws IOException { - FileIterator fileIterator = fileSystem.listFiles(convertToLocation(path)); + Location location = convertToLocation(path); + Optional dirExists = fileSystem.directoryExists(location); + + // For hierarchical filesystems (HDFS): directoryExists returns true/false + // For non-hierarchical filesystems (S3): returns empty if directory doesn't exist + if (dirExists.isPresent() && !dirExists.get()) { + // Path exists but is not a directory + throw new FileNotFoundException("Path " + path + " does not exist"); + } + + FileIterator fileIterator = fileSystem.listFiles(location); ImmutableList.Builder listBuilder = ImmutableList.builder(); int count = 0; while (fileIterator.hasNext()) { @@ -234,11 +256,13 @@ public List listDirectEntries(StoragePath path, StoragePathFilt listBuilder.add(pathInfo); } } - if (count == 0) { - // Based on the API definition, the `FileNotFoundException` should be thrown here - // so that Hudi logic can catch it and swallow it as needed + + // For S3 (non-hierarchical): empty list means directory doesn't exist + // For HDFS (hierarchical): empty list is valid (empty directory) + if (count == 0 && dirExists.isEmpty()) { throw new FileNotFoundException("Path " + path + " does not exist"); } + return listBuilder.build(); } diff --git a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/storage/TestTrinoHudiStorage.java b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/storage/TestTrinoHudiStorage.java index ae5f98bccfa0..2268195b1e57 100644 --- a/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/storage/TestTrinoHudiStorage.java +++ b/plugin/trino-hudi/src/test/java/io/trino/plugin/hudi/storage/TestTrinoHudiStorage.java @@ -293,6 +293,44 @@ private static void validatePathInfoList( } } + @Test + public void testListEmptyDirectory() + throws IOException + { + HoodieStorage storage = getStorage(); + + // Create an empty directory + StoragePath emptyDir = new StoragePath(getTempDir(), "empty_directory"); + assertThat(storage.createDirectory(emptyDir)).isTrue(); + assertThat(storage.exists(emptyDir)).isTrue(); + + // List the empty directory - should return empty list, not throw exception + List entries = storage.listDirectEntries(emptyDir); + assertThat(entries).isEmpty(); + + List files = storage.listFiles(emptyDir); + assertThat(files).isEmpty(); + } + + @Test + public void testListNonExistentDirectory() + throws IOException + { + HoodieStorage storage = getStorage(); + + // Try to list a non-existent directory - should throw FileNotFoundException + StoragePath nonExistentDir = new StoragePath(getTempDir(), "does_not_exist"); + assertThat(storage.exists(nonExistentDir)).isFalse(); + + assertThatThrownBy(() -> storage.listDirectEntries(nonExistentDir)) + .isInstanceOf(FileNotFoundException.class) + .hasMessageContaining("does not exist"); + + assertThatThrownBy(() -> storage.listDirectEntries(nonExistentDir, path -> true)) + .isInstanceOf(FileNotFoundException.class) + .hasMessageContaining("does not exist"); + } + private void prepareFilesOnStorage(HoodieStorage storage) throws IOException {