From df3b8429ba6f5780d41e194c69e5011c19420391 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Mon, 8 Sep 2025 21:51:33 +0530 Subject: [PATCH 1/5] HADOOP-19681: Fix S3A failing to initialize S3 buckets having namespace with dot followed by number --- .../apache/hadoop/fs/AbstractFileSystem.java | 2 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 5 ++++- .../hadoop/fs/s3a/tools/BucketTool.java | 2 +- .../hadoop/fs/s3native/S3xLoginHelper.java | 8 ++++++-- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 18 +++++++++++++++++- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 6 +++++- .../fs/s3a/TestBucketConfiguration.java | 19 +++++++++++++++++++ .../hadoop/fs/s3a/auth/ITestCustomSigner.java | 11 +++++++++++ .../hadoop/fs/s3a/auth/ITestJceksIO.java | 6 +++++- 9 files changed, 69 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 7988ebb7904dc..7b613a93a773b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -335,7 +335,7 @@ private URI getUri(URI uri, String supportedScheme, int port = uri.getPort(); port = (port == -1 ? defaultPort : port); if (port == -1) { // no port supplied and default port is not specified - return new URI(supportedScheme, authority, "/", null); + return URI.create(supportedScheme + "://" + authority); } return new URI(supportedScheme + "://" + uri.getHost() + ":" + port); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index a45ed720b9ce3..783f03eb1a36a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -573,8 +573,11 @@ private static void addDeprecatedKeys() { */ public void initialize(URI name, Configuration originalConf) throws IOException { - // get the host; this is guaranteed to be non-null, non-empty + // get the host; fallback to authority if getHost() returns null bucket = name.getHost(); + if (bucket == null) { + bucket = name.getAuthority(); + } AuditSpan span = null; // track initialization duration; will only be set after // statistics are set up. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/BucketTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/BucketTool.java index 6bfcbcf776f14..915e9851be3b7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/BucketTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/BucketTool.java @@ -172,7 +172,7 @@ public int run(final String[] args, final PrintStream out) final String bucketPath = parsedArgs.get(0); final Path source = new Path(bucketPath); URI fsURI = source.toUri(); - String bucket = fsURI.getHost(); + String bucket = fsURI.getHost() != null ? fsURI.getHost() : fsURI.getAuthority(); println(out, "Filesystem %s", fsURI); if (!"s3a".equals(fsURI.getScheme())) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java index f10f28d9f7b01..e3d3fd51f74e1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java @@ -56,8 +56,12 @@ public static URI buildFSURI(URI uri) { // look for login secrets and fail if they are present. Objects.requireNonNull(uri, "null uri"); Objects.requireNonNull(uri.getScheme(), "null uri.getScheme()"); - Objects.requireNonNull(uri.getHost(), "null uri host."); - return URI.create(uri.getScheme() + "://" + uri.getHost()); + String host = uri.getHost(); + if (host == null) { + host = uri.getAuthority(); + } + Objects.requireNonNull(host, "null uri host."); + return URI.create(uri.getScheme() + "://" + host); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 7222e923e963b..4f0805308c856 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -48,6 +48,7 @@ import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; @@ -594,13 +595,28 @@ private static T getField(Object target, Class fieldType, public void testConfOptionPropagationToFS() throws Exception { Configuration config = new Configuration(); String testFSName = config.getTrimmed(TEST_FS_S3A_NAME, ""); - String bucket = new URI(testFSName).getHost(); + URI uri = new URI(testFSName); + String bucket = uri.getHost(); + if (bucket == null) { + bucket = uri.getAuthority(); + } setBucketOption(config, bucket, "propagation", "propagated"); fs = S3ATestUtils.createTestFileSystem(config); Configuration updated = fs.getConf(); assertOptionEquals(updated, "fs.s3a.propagation", "propagated"); } + @Test + public void testBucketNameWithDotAndNumber() throws Exception { + Configuration config = new Configuration(); + Path path = new Path("s3a://test-bucket-v1.1"); + try (FileSystem fs = path.getFileSystem(config)) { + assertThat(fs instanceof S3AFileSystem) + .describedAs("FileSystem should be S3AFileSystem instance") + .isTrue(); + } + } + @Test @Timeout(10) public void testS3SpecificSignerOverride() throws Exception { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 07af6e6b1aa41..2795878bf16bd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -883,7 +883,11 @@ public static T roundTrip( public static String getTestBucketName(final Configuration conf) { String bucket = checkNotNull(conf.get(TEST_FS_S3A_NAME), "No test bucket"); - return URI.create(bucket).getHost(); + URI uri = URI.create(bucket); + if (uri.getHost() != null) { + return uri.getHost(); + } + return uri.getAuthority(); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java index 77586d357ca98..d68813f4901d4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; +import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; @@ -75,6 +76,24 @@ public void setup() throws Exception { S3AFileSystem.initializeClass(); } + @Test + public void testS3xLoginHelperWithDotInBucketName() throws Throwable { + // Test buildFSURI with bucket name containing dot followed by number + URI uri = URI.create("s3a://bucket-v1.1/path"); + URI result = S3xLoginHelper.buildFSURI(uri); + assertEquals("s3a://bucket-v1.1", result.toString()); + + // Test with normal bucket name + URI normalUri = URI.create("s3a://normal-bucket/path"); + URI normalResult = S3xLoginHelper.buildFSURI(normalUri); + assertEquals("s3a://normal-bucket", normalResult.toString()); + + // Test edge case with multiple dots + URI multiDotUri = URI.create("s3a://bucket.v1.2.test/path"); + URI multiDotResult = S3xLoginHelper.buildFSURI(multiDotUri); + assertEquals("s3a://bucket.v1.2.test", multiDotResult.toString()); + } + @Test public void testBucketConfigurationPropagation() throws Throwable { Configuration config = new Configuration(false); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java index 1a4d354d5edc8..667bf0643ecc6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java @@ -283,6 +283,17 @@ public SdkHttpFullRequest sign(SdkHttpFullRequest request, String host = request.host(); String bucketName = parseBucketFromHost(host); + // If host-based parsing fails (path-style requests), extract from path + if (bucketName.equals("s3")) { + String path = request.encodedPath(); + if (path != null && path.startsWith("/") && path.length() > 1) { + String[] pathParts = path.substring(1).split("/", 2); + if (pathParts.length > 0 && !pathParts[0].isEmpty()) { + bucketName = pathParts[0]; + } + } + } + try { lastStoreValue = CustomSignerInitializer .getStoreValue(bucketName, UserGroupInformation.getCurrentUser()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestJceksIO.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestJceksIO.java index d13af064e1a88..c179e0915ed72 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestJceksIO.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestJceksIO.java @@ -187,8 +187,12 @@ private String run(CredentialShell cs, String expected, String... args) */ private String toJceksProvider(Path keystore) { final URI uri = keystore.toUri(); + String bucket = uri.getHost(); + if (bucket == null) { + bucket = uri.getAuthority(); + } return String.format("jceks://%s@%s%s", - uri.getScheme(), uri.getHost(), uri.getPath()); + uri.getScheme(), bucket, uri.getPath()); } } From e64bbd36f1227bd6aa936e91b236644b1305d9d3 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 9 Sep 2025 11:18:47 +0530 Subject: [PATCH 2/5] Test fix --- .../src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 7b613a93a773b..e23cc43838e3f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -335,7 +335,7 @@ private URI getUri(URI uri, String supportedScheme, int port = uri.getPort(); port = (port == -1 ? defaultPort : port); if (port == -1) { // no port supplied and default port is not specified - return URI.create(supportedScheme + "://" + authority); + return URI.create(supportedScheme + "://" + authority + "/"); } return new URI(supportedScheme + "://" + uri.getHost() + ":" + port); } From b564de25c63213fe1234b5011d929d5205403995 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 16 Sep 2025 12:18:50 +0530 Subject: [PATCH 3/5] Test changes --- .../site/markdown/tools/hadoop-aws/index.md | 6 ++++ .../hadoop/fs/s3a/ITestS3AConfiguration.java | 11 ------- .../fs/s3a/TestBucketConfiguration.java | 33 +++++++++++++++++++ 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 1f65caeb5e219..6a1cf1cab303a 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -31,6 +31,12 @@ before 2021. Consult [S3A and Directory Markers](directory_markers.html) for full details. +### S3 Bucket Name Compatibility + +This release adds support for S3 bucket names containing dots followed by numbers +(e.g., `my-bucket-v1.1`, `data-store.v2.3`). Previous versions of the Hadoop S3A +client failed to initialize such buckets due to URI parsing limitations. + ## Documents * [Connecting](./connecting.html) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 4f0805308c856..fcc9dee9c1ba3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -606,17 +606,6 @@ public void testConfOptionPropagationToFS() throws Exception { assertOptionEquals(updated, "fs.s3a.propagation", "propagated"); } - @Test - public void testBucketNameWithDotAndNumber() throws Exception { - Configuration config = new Configuration(); - Path path = new Path("s3a://test-bucket-v1.1"); - try (FileSystem fs = path.getFileSystem(config)) { - assertThat(fs instanceof S3AFileSystem) - .describedAs("FileSystem should be S3AFileSystem instance") - .isTrue(); - } - } - @Test @Timeout(10) public void testS3SpecificSignerOverride() throws Exception { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java index d68813f4901d4..ee1f917085a5c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java @@ -28,6 +28,7 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.security.ProviderUtils; @@ -94,6 +95,38 @@ public void testS3xLoginHelperWithDotInBucketName() throws Throwable { assertEquals("s3a://bucket.v1.2.test", multiDotResult.toString()); } + @Test + public void testBucketNameWithDotAndNumber() throws Exception { + Configuration config = new Configuration(); + URI uri = URI.create("s3a://test-bucket-v1.1"); + String bucket = uri.getHost(); + if (bucket == null) { + bucket = uri.getAuthority(); + } + assertThat(bucket) + .describedAs("Bucket name should be extracted correctly") + .isEqualTo("test-bucket-v1.1"); + } + + @Test + public void testFileSystemCacheForBucketWithDotAndNumber() throws Exception { + Configuration config = new Configuration(); + URI uri1 = URI.create("s3a://test-bucket-v1.1"); + URI uri2 = URI.create("s3a://test-bucket-v1.2"); + + FileSystem fs1a = FileSystem.get(uri1, config); + FileSystem fs1b = FileSystem.get(uri1, config); + FileSystem fs2 = FileSystem.get(uri2, config); + + assertThat(fs1a) + .describedAs("FileSystem.get should return same cached instance for same URI") + .isSameAs(fs1b); + + assertThat(fs1a) + .describedAs("FileSystem.get should return different instance for different bucket") + .isNotSameAs(fs2); + } + @Test public void testBucketConfigurationPropagation() throws Throwable { Configuration config = new Configuration(false); From 5014c326331ebcc9e8cb30cf2632774733c6ce56 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 16 Sep 2025 12:53:39 +0530 Subject: [PATCH 4/5] Test changes --- .../hadoop/fs/s3a/TestBucketConfiguration.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java index ee1f917085a5c..567d10e9cc176 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java @@ -98,14 +98,13 @@ public void testS3xLoginHelperWithDotInBucketName() throws Throwable { @Test public void testBucketNameWithDotAndNumber() throws Exception { Configuration config = new Configuration(); - URI uri = URI.create("s3a://test-bucket-v1.1"); - String bucket = uri.getHost(); - if (bucket == null) { - bucket = uri.getAuthority(); + org.apache.hadoop.fs.Path path = + new org.apache.hadoop.fs.Path("s3a://test-bucket-v1.1"); + try (FileSystem fs = path.getFileSystem(config)) { + assertThat(fs) + .describedAs("FileSystem should be S3AFileSystem instance") + .isInstanceOf(S3AFileSystem.class); } - assertThat(bucket) - .describedAs("Bucket name should be extracted correctly") - .isEqualTo("test-bucket-v1.1"); } @Test @@ -119,11 +118,11 @@ public void testFileSystemCacheForBucketWithDotAndNumber() throws Exception { FileSystem fs2 = FileSystem.get(uri2, config); assertThat(fs1a) - .describedAs("FileSystem.get should return same cached instance for same URI") + .describedAs("The call should return same cached instance for same URI") .isSameAs(fs1b); assertThat(fs1a) - .describedAs("FileSystem.get should return different instance for different bucket") + .describedAs("The call should return different instance for different bucket") .isNotSameAs(fs2); } From f86ae95b09e52b9c71c1d9d937b060aa9409c096 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 16 Sep 2025 17:42:59 +0530 Subject: [PATCH 5/5] fix checkstyle --- .../java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java | 1 - .../org/apache/hadoop/fs/s3a/TestBucketConfiguration.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index fcc9dee9c1ba3..8821d680ea072 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -48,7 +48,6 @@ import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java index 567d10e9cc176..d6471a730a2a7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestBucketConfiguration.java @@ -116,11 +116,11 @@ public void testFileSystemCacheForBucketWithDotAndNumber() throws Exception { FileSystem fs1a = FileSystem.get(uri1, config); FileSystem fs1b = FileSystem.get(uri1, config); FileSystem fs2 = FileSystem.get(uri2, config); - + assertThat(fs1a) .describedAs("The call should return same cached instance for same URI") .isSameAs(fs1b); - + assertThat(fs1a) .describedAs("The call should return different instance for different bucket") .isNotSameAs(fs2);