Skip to content

Commit 697a8f7

Browse files
committed
Performance fixes
- HTTP_CLIENT is now static and includes a connection timeout - StackableAuthorizer only constructs a single instance of StackableAccessControlEnforcer - StackableAccessControlEnforcer does not serialize to JSON twice anymore - Spotless formatting applied to code
1 parent 1bda282 commit 697a8f7

File tree

3 files changed

+225
-168
lines changed

3 files changed

+225
-168
lines changed

src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java

Lines changed: 194 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
import com.fasterxml.jackson.databind.DeserializationFeature;
88
import com.fasterxml.jackson.databind.ObjectMapper;
99
import com.fasterxml.jackson.databind.SerializationFeature;
10+
import java.net.URI;
11+
import java.net.http.HttpClient;
12+
import java.net.http.HttpRequest;
13+
import java.net.http.HttpResponse;
14+
import java.time.Duration;
15+
import java.util.Objects;
1016
import org.apache.hadoop.conf.Configuration;
1117
import org.apache.hadoop.fs.permission.FsAction;
1218
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
@@ -19,153 +25,202 @@
1925
import org.slf4j.Logger;
2026
import org.slf4j.LoggerFactory;
2127

22-
import java.net.URI;
23-
import java.net.http.HttpClient;
24-
import java.net.http.HttpRequest;
25-
import java.net.http.HttpResponse;
26-
import java.util.Objects;
27-
28-
// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old - deprecated -
29-
// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the authorizer
30-
// supports the new API (which we do) and uses that in this case. This is also indicated by the log statement
31-
// "Use the new authorization provider API" during startup, see https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245
32-
// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal operationType, which
33-
// needs to be set to e.g. "create", "delete" or "rename" prior to calling the FSPermissionChecker.checkPermission
34-
// function, as it will actually check if operationType is null and will still use the old API in this case! But the old
35-
// API does not have the information about the operationType, which makes it hard to impossible to authorize the request.
36-
// As a consequence we only support the new API and will make sure no HDFS code path calls the old API. This required
37-
// minor patches to HDFS, as it was e.g. missing a call to FSPermissionChecker.setOperationType("create") in
28+
// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old -
29+
// deprecated -
30+
// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the
31+
// authorizer
32+
// supports the new API (which we do) and uses that in this case. This is also indicated by the log
33+
// statement
34+
// "Use the new authorization provider API" during startup, see
35+
// https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245
36+
// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal
37+
// operationType, which
38+
// needs to be set to e.g. "create", "delete" or "rename" prior to calling the
39+
// FSPermissionChecker.checkPermission
40+
// function, as it will actually check if operationType is null and will still use the old API in
41+
// this case! But the old
42+
// API does not have the information about the operationType, which makes it hard to impossible to
43+
// authorize the request.
44+
// As a consequence we only support the new API and will make sure no HDFS code path calls the old
45+
// API. This required
46+
// minor patches to HDFS, as it was e.g. missing a call to
47+
// FSPermissionChecker.setOperationType("create") in
3848
// FSNamesystem.startFileInt (this claim needs to be validated though).
39-
public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer {
40-
41-
private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class);
42-
43-
public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url";
44-
45-
private final HttpClient httpClient = HttpClient.newHttpClient();
46-
private final ObjectMapper json;
47-
private URI opaUri;
48-
49-
public StackableAccessControlEnforcer() {
50-
LOG.debug("Starting StackableAccessControlEnforcer");
51-
52-
// Guaranteed to be only called once (Effective Java: Item 3)
53-
Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration();
54-
55-
String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP);
56-
if (opaPolicyUrl == null) {
57-
throw new OpaException.UriMissing(OPA_POLICY_URL_PROP);
58-
}
59-
60-
try {
61-
this.opaUri = URI.create(opaPolicyUrl);
62-
} catch (Exception e) {
63-
throw new OpaException.UriInvalid(opaUri, e);
64-
}
65-
66-
this.json = new ObjectMapper()
67-
// OPA server can send other fields, such as `decision_id`` when enabling decision logs
68-
// We could add all the fields we *currently* know, but it's more future-proof to ignore any unknown fields
69-
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
70-
// Previously we were getting
71-
// Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties discovered to create BeanSerializer (to avoid exception, disable SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain: tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"])
72-
.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false)
73-
// Only include the needed fields. HDFS has many classes with even more circular reference to remove
74-
.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
75-
.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY)
76-
.setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY)
77-
// We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root)
78-
// Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError)
79-
.addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class);
80-
81-
LOG.debug("Started HdfsOpaAccessControlEnforcer");
49+
public class StackableAccessControlEnforcer
50+
implements INodeAttributeProvider.AccessControlEnforcer {
51+
52+
private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class);
53+
54+
public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url";
55+
56+
private static final HttpClient HTTP_CLIENT =
57+
HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
58+
private final ObjectMapper json;
59+
private URI opaUri;
60+
61+
public StackableAccessControlEnforcer() {
62+
LOG.debug("Starting StackableAccessControlEnforcer");
63+
64+
// Guaranteed to be only called once (Effective Java: Item 3)
65+
Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration();
66+
67+
String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP);
68+
if (opaPolicyUrl == null) {
69+
throw new OpaException.UriMissing(OPA_POLICY_URL_PROP);
70+
}
71+
72+
try {
73+
opaUri = URI.create(opaPolicyUrl);
74+
} catch (Exception e) {
75+
throw new OpaException.UriInvalid(opaUri, e);
8276
}
8377

84-
private static class OpaQueryResult {
85-
// Boxed Boolean to detect not-present vs explicitly false
86-
public Boolean result;
78+
json =
79+
new ObjectMapper()
80+
// OPA server can send other fields, such as `decision_id`` when enabling decision logs
81+
// We could add all the fields we *currently* know, but it's more future-proof to ignore
82+
// any unknown fields
83+
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
84+
// Previously we were getting
85+
// Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No
86+
// serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties
87+
// discovered to create BeanSerializer (to avoid exception, disable
88+
// SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain:
89+
// tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"])
90+
.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false)
91+
// Only include the needed fields. HDFS has many classes with even more circular
92+
// reference to remove
93+
.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
94+
.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY)
95+
.setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY)
96+
// We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root)
97+
// Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite
98+
// recursion (StackOverflowError)
99+
.addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class);
100+
101+
LOG.debug("Started HdfsOpaAccessControlEnforcer");
102+
}
103+
104+
private static class OpaQueryResult {
105+
// Boxed Boolean to detect not-present vs explicitly false
106+
public Boolean result;
107+
}
108+
109+
@Override
110+
public void checkPermission(
111+
String fsOwner,
112+
String supergroup,
113+
UserGroupInformation ugi,
114+
INodeAttributes[] inodeAttrs,
115+
INode[] inodes,
116+
byte[][] pathByNameArr,
117+
int snapshotId,
118+
String path,
119+
int ancestorIndex,
120+
boolean doCheckOwner,
121+
FsAction ancestorAccess,
122+
FsAction parentAccess,
123+
FsAction access,
124+
FsAction subAccess,
125+
boolean ignoreEmptyDir)
126+
throws AccessControlException {
127+
LOG.warn("checkPermission called");
128+
129+
new Throwable().printStackTrace();
130+
throw new AccessControlException(
131+
"The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. "
132+
+ "This should not happen, as all HDFS code paths should call the new API. "
133+
+ "I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. "
134+
+ "Please report all of that to author of the OPA authorizer by creating an issue in our repo: https://github.com/stackabletech/hdfs-utils"
135+
+ "Passed arguments: "
136+
+ "fsOwner: "
137+
+ fsOwner
138+
+ ", supergroup: "
139+
+ supergroup
140+
+ ", ugi: "
141+
+ ugi
142+
+ ", path: "
143+
+ path
144+
+ ", ancestorIndex:"
145+
+ ancestorIndex
146+
+ ", doCheckOwner: "
147+
+ doCheckOwner
148+
+ ", ancestorAccess: "
149+
+ ancestorAccess
150+
+ ", parentAccess: "
151+
+ parentAccess
152+
+ ", subAccess: "
153+
+ subAccess
154+
+ ", ignoreEmptyDir: "
155+
+ ignoreEmptyDir);
156+
}
157+
158+
@Override
159+
public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext)
160+
throws AccessControlException {
161+
OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext));
162+
163+
String body;
164+
try {
165+
body = json.writeValueAsString(query);
166+
} catch (JsonProcessingException e) {
167+
throw new OpaException.SerializeFailed(e);
87168
}
88169

89-
@Override
90-
public void checkPermission(String fsOwner, String supergroup,
91-
UserGroupInformation ugi, INodeAttributes[] inodeAttrs,
92-
INode[] inodes, byte[][] pathByNameArr, int snapshotId, String path,
93-
int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess,
94-
FsAction parentAccess, FsAction access, FsAction subAccess,
95-
boolean ignoreEmptyDir) throws AccessControlException {
96-
LOG.warn("checkPermission called");
97-
98-
new Throwable().printStackTrace();
99-
throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. " +
100-
"This should not happen, as all HDFS code paths should call the new API. " +
101-
"I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. " +
102-
"Please report all of that to author of the OPA authorizer (We don't have a stable GitHub link yet, sorry!) " +
103-
"Passed arguments: " +
104-
"fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex +
105-
", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess +
106-
", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir);
170+
if (LOG.isDebugEnabled()) {
171+
String prettyPrinted;
172+
try {
173+
prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query);
174+
} catch (JsonProcessingException e) {
175+
LOG.error(
176+
"Could not pretty print the following request body (but non-pretty print did work): {}",
177+
body);
178+
throw new OpaException.SerializeFailed(e);
179+
}
180+
LOG.debug("Request body:\n{}", prettyPrinted);
107181
}
108182

109-
@Override
110-
public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext) throws AccessControlException {
111-
OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext));
112-
113-
String body;
114-
try {
115-
body = json.writeValueAsString(query);
116-
} catch (JsonProcessingException e) {
117-
throw new OpaException.SerializeFailed(e);
118-
}
119-
120-
String prettyPrinted;
121-
try {
122-
prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query);
123-
} catch (JsonProcessingException e) {
124-
LOG.error("Could not pretty print the following request body (but non-pretty print did work): {}", body);
125-
throw new OpaException.SerializeFailed(e);
126-
}
127-
128-
LOG.debug("Request body:\n{}", prettyPrinted);
129-
HttpResponse<String> response = null;
130-
try {
131-
response =
132-
httpClient.send(
133-
HttpRequest.newBuilder(opaUri)
134-
.header("Content-Type", "application/json")
135-
.POST(HttpRequest.BodyPublishers.ofString(body))
136-
.build(),
137-
HttpResponse.BodyHandlers.ofString());
138-
LOG.debug("Opa response: {}", response.body());
139-
} catch (Exception e) {
140-
LOG.error(e.getMessage());
141-
throw new OpaException.QueryFailed(e);
142-
}
143-
144-
switch (Objects.requireNonNull(response).statusCode()) {
145-
case 200:
146-
break;
147-
case 404:
148-
throw new OpaException.EndPointNotFound(opaUri.toString());
149-
default:
150-
throw new OpaException.OpaServerError(query.toString(), response);
151-
}
152-
153-
OpaQueryResult result;
154-
try {
155-
result = json.readValue(response.body(), OpaQueryResult.class);
156-
} catch (JsonProcessingException e) {
157-
throw new OpaException.DeserializeFailed(e);
158-
}
159-
160-
if (result.result == null || !result.result) {
161-
throw new AccessControlException("OPA denied the request");
162-
}
183+
HttpResponse<String> response;
184+
try {
185+
response =
186+
HTTP_CLIENT.send(
187+
HttpRequest.newBuilder(opaUri)
188+
.header("Content-Type", "application/json")
189+
.POST(HttpRequest.BodyPublishers.ofString(body))
190+
.build(),
191+
HttpResponse.BodyHandlers.ofString());
192+
LOG.debug("OPA response: {}", response.body());
193+
} catch (Exception e) {
194+
LOG.error(e.getMessage());
195+
throw new OpaException.QueryFailed(e);
163196
}
164197

165-
private abstract static class DatanodeDescriptorMixin {
166-
@JsonIgnore
167-
abstract INode getParent();
168-
@JsonIgnore
169-
abstract DatanodeStorageInfo[] getStorageInfos();
198+
switch (Objects.requireNonNull(response).statusCode()) {
199+
case 200:
200+
break;
201+
case 404:
202+
throw new OpaException.EndPointNotFound(opaUri.toString());
203+
default:
204+
throw new OpaException.OpaServerError(query.toString(), response);
170205
}
206+
207+
OpaQueryResult result;
208+
try {
209+
result = json.readValue(response.body(), OpaQueryResult.class);
210+
} catch (JsonProcessingException e) {
211+
throw new OpaException.DeserializeFailed(e);
212+
}
213+
214+
if (result.result == null || !result.result) {
215+
throw new AccessControlException("OPA denied the request");
216+
}
217+
}
218+
219+
private abstract static class DatanodeDescriptorMixin {
220+
@JsonIgnore
221+
abstract INode getParent();
222+
223+
@JsonIgnore
224+
abstract DatanodeStorageInfo[] getStorageInfos();
225+
}
171226
}

0 commit comments

Comments
 (0)