Skip to content

Commit e292750

Browse files
authored
HADOOP-19660: Add support for custom ClientAssertionProvider in WorkloadIdentityTokenProvider (#7901)
Contributed by Kunal Sevkani. Reviewed by Anmol Asrani
1 parent 29fca14 commit e292750

File tree

6 files changed

+568
-34
lines changed

6 files changed

+568
-34
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
5656
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider;
5757
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
58+
import org.apache.hadoop.fs.azurebfs.oauth2.ClientAssertionProvider;
5859
import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider;
5960
import org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter;
6061
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
@@ -1330,12 +1331,38 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio
13301331
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT);
13311332
String clientId =
13321333
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID);
1333-
String tokenFile =
1334-
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
1335-
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
1336-
tokenProvider = new WorkloadIdentityTokenProvider(
1337-
authority, tenantGuid, clientId, tokenFile);
1338-
LOG.trace("WorkloadIdentityTokenProvider initialized");
1334+
1335+
// Check if a custom ClientAssertionProvider is configured
1336+
String clientAssertionProviderType =
1337+
getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_TYPE);
1338+
1339+
if (clientAssertionProviderType != null && !clientAssertionProviderType.trim().isEmpty()) {
1340+
// Use custom ClientAssertionProvider
1341+
try {
1342+
Class<?> providerClass = Class.forName(clientAssertionProviderType.trim());
1343+
ClientAssertionProvider clientAssertionProvider =
1344+
(ClientAssertionProvider) providerClass.getDeclaredConstructor().newInstance();
1345+
1346+
// Initialize the provider with configuration
1347+
clientAssertionProvider.initialize(rawConfig, accountName);
1348+
1349+
tokenProvider = new WorkloadIdentityTokenProvider(
1350+
authority, tenantGuid, clientId, clientAssertionProvider);
1351+
LOG.trace("WorkloadIdentityTokenProvider initialized with custom ClientAssertionProvider: {}",
1352+
clientAssertionProviderType);
1353+
} catch (Exception e) {
1354+
throw new TokenAccessProviderException(
1355+
"Failed to initialize custom ClientAssertionProvider: " + clientAssertionProviderType, e);
1356+
}
1357+
} else {
1358+
// Use file-based approach (backward compatibility)
1359+
String tokenFile =
1360+
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
1361+
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
1362+
tokenProvider = new WorkloadIdentityTokenProvider(
1363+
authority, tenantGuid, clientId, tokenFile);
1364+
LOG.trace("WorkloadIdentityTokenProvider initialized with file-based token");
1365+
}
13391366
} else {
13401367
throw new IllegalArgumentException("Failed to initialize " + tokenProviderClass);
13411368
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,8 @@ public final class ConfigurationKeys {
339339
public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "fs.azure.account.oauth2.refresh.token.endpoint";
340340
/** Key for oauth AAD workload identity token file path: {@value}. */
341341
public static final String FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = "fs.azure.account.oauth2.token.file";
342+
/** Key for custom client assertion provider class for WorkloadIdentityTokenProvider */
343+
public static final String FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_TYPE = "fs.azure.account.oauth2.client.assertion.provider.type";
342344
/** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */
343345
public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track";
344346

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.azurebfs.oauth2;
20+
21+
import java.io.IOException;
22+
23+
import org.apache.hadoop.classification.InterfaceAudience;
24+
import org.apache.hadoop.classification.InterfaceStability;
25+
import org.apache.hadoop.conf.Configuration;
26+
27+
/**
28+
* Interface for providing client assertions for Azure Workload Identity authentication.
29+
*
30+
* This interface allows custom implementations to provide JWT tokens through various mechanisms:
31+
* - Kubernetes Token Request API
32+
* - HashiCorp Vault
33+
* - Custom token services
34+
* - File-based tokens with custom logic
35+
*
36+
* Implementations should be thread-safe as they may be called concurrently.
37+
*/
38+
@InterfaceAudience.Public
39+
@InterfaceStability.Evolving
40+
public interface ClientAssertionProvider {
41+
42+
/**
43+
* Initializes the provider with the given configuration.
44+
* This method is called once after the provider is instantiated via reflection.
45+
*
46+
* @param configuration Hadoop configuration containing provider-specific settings
47+
* @param accountName Azure storage account name for account-specific configuration
48+
* @throws IOException if initialization fails
49+
*/
50+
void initialize(Configuration configuration, String accountName) throws IOException;
51+
52+
/**
53+
* Retrieves a client assertion (JWT token) for Azure Workload Identity authentication.
54+
*
55+
* The returned string should be a valid JWT token that can be used as a client assertion
56+
* in OAuth 2.0 client credentials flow with JWT bearer assertion.
57+
*
58+
* @return JWT token as a string
59+
* @throws IOException if token retrieval fails
60+
*/
61+
String getClientAssertion() throws IOException;
62+
63+
/**
64+
* Optional: Cleanup resources when the provider is no longer needed.
65+
* Default implementation does nothing.
66+
*
67+
* @throws IOException if cleanup fails
68+
*/
69+
default void close() throws IOException {
70+
// Default: no-op
71+
}
72+
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,19 @@
2020

2121
import java.io.File;
2222
import java.io.IOException;
23+
import java.nio.charset.StandardCharsets;
2324

2425
import org.slf4j.Logger;
2526
import org.slf4j.LoggerFactory;
27+
2628
import org.apache.commons.io.FileUtils;
2729
import org.apache.hadoop.classification.VisibleForTesting;
30+
import org.apache.hadoop.conf.Configuration;
2831
import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
2932
import org.apache.hadoop.util.Preconditions;
3033

34+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING;
35+
3136
/**
3237
* Provides tokens based on Azure AD Workload Identity.
3338
*/
@@ -38,11 +43,73 @@ public class WorkloadIdentityTokenProvider extends AccessTokenProvider {
3843
private static final String EMPTY_TOKEN_FILE_ERROR = "Empty token file found at specified path: ";
3944
private static final String TOKEN_FILE_READ_ERROR = "Error reading token file at specified path: ";
4045

46+
/**
47+
* Internal implementation of ClientAssertionProvider for file-based token reading.
48+
* This provides backward compatibility for the file-based constructor.
49+
*/
50+
private static class FileBasedClientAssertionProvider implements ClientAssertionProvider {
51+
private final String tokenFile;
52+
53+
FileBasedClientAssertionProvider(String tokenFile) {
54+
this.tokenFile = tokenFile;
55+
}
56+
57+
@Override
58+
public void initialize(Configuration configuration, String accountName) throws IOException {
59+
// No initialization needed for file-based provider
60+
}
61+
62+
@Override
63+
public String getClientAssertion() throws IOException {
64+
String clientAssertion = EMPTY_STRING;
65+
try {
66+
File file = new File(tokenFile);
67+
clientAssertion = FileUtils.readFileToString(file, StandardCharsets.UTF_8);
68+
} catch (Exception e) {
69+
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
70+
}
71+
clientAssertion = clientAssertion.trim();
72+
if (Strings.isNullOrEmpty(clientAssertion)) {
73+
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
74+
}
75+
return clientAssertion;
76+
}
77+
}
78+
4179
private final String authEndpoint;
4280
private final String clientId;
43-
private final String tokenFile;
81+
private final ClientAssertionProvider clientAssertionProvider;
4482
private long tokenFetchTime = -1;
4583

84+
/**
85+
* Constructor with custom ClientAssertionProvider.
86+
* Use this for custom token retrieval mechanisms like Kubernetes Token Request API.
87+
*
88+
* @param authority OAuth authority URL
89+
* @param tenantId Azure AD tenant ID
90+
* @param clientId Azure AD client ID
91+
* @param clientAssertionProvider Custom provider for client assertions
92+
*/
93+
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
94+
final String clientId, ClientAssertionProvider clientAssertionProvider) {
95+
Preconditions.checkNotNull(authority, "authority");
96+
Preconditions.checkNotNull(tenantId, "tenantId");
97+
Preconditions.checkNotNull(clientId, "clientId");
98+
Preconditions.checkNotNull(clientAssertionProvider, "clientAssertionProvider");
99+
100+
this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
101+
this.clientId = clientId;
102+
this.clientAssertionProvider = clientAssertionProvider;
103+
}
104+
105+
/**
106+
* Constructor with file-based token reading (backward compatibility).
107+
*
108+
* @param authority OAuth authority URL
109+
* @param tenantId Azure AD tenant ID
110+
* @param clientId Azure AD client ID
111+
* @param tokenFile Path to file containing the JWT token
112+
*/
46113
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
47114
final String clientId, final String tokenFile) {
48115
Preconditions.checkNotNull(authority, "authority");
@@ -52,13 +119,13 @@ public WorkloadIdentityTokenProvider(final String authority, final String tenant
52119

53120
this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
54121
this.clientId = clientId;
55-
this.tokenFile = tokenFile;
122+
this.clientAssertionProvider = new FileBasedClientAssertionProvider(tokenFile);
56123
}
57124

58125
@Override
59126
protected AzureADToken refreshToken() throws IOException {
60127
LOG.debug("AADToken: refreshing token from JWT Assertion");
61-
String clientAssertion = getClientAssertion();
128+
String clientAssertion = clientAssertionProvider.getClientAssertion();
62129
AzureADToken token = getTokenUsingJWTAssertion(clientAssertion);
63130
tokenFetchTime = System.currentTimeMillis();
64131
return token;
@@ -90,31 +157,6 @@ protected boolean isTokenAboutToExpire() {
90157
return expiring;
91158
}
92159

93-
/**
94-
* Gets the client assertion from the token file.
95-
* The token file should contain the client assertion in JWT format.
96-
* It should be a String containing Base64Url encoded JSON Web Token (JWT).
97-
* See <a href="https://azure.github.io/azure-workload-identity/docs/faq.html#does-workload-identity-work-in-disconnected-environments">
98-
* Azure Workload Identity FAQ</a>.
99-
*
100-
* @return the client assertion.
101-
* @throws IOException if the token file is empty.
102-
*/
103-
private String getClientAssertion()
104-
throws IOException {
105-
String clientAssertion = "";
106-
try {
107-
File file = new File(tokenFile);
108-
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
109-
} catch (Exception e) {
110-
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
111-
}
112-
if (Strings.isNullOrEmpty(clientAssertion)) {
113-
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
114-
}
115-
return clientAssertion;
116-
}
117-
118160
/**
119161
* Gets the Azure AD token from a client assertion in JWT format.
120162
* This method exists to make unit testing possible.

0 commit comments

Comments
 (0)