|
7 | 7 | import com.fasterxml.jackson.databind.DeserializationFeature; |
8 | 8 | import com.fasterxml.jackson.databind.ObjectMapper; |
9 | 9 | import com.fasterxml.jackson.databind.SerializationFeature; |
| 10 | +import java.net.URI; |
| 11 | +import java.net.http.HttpClient; |
| 12 | +import java.net.http.HttpRequest; |
| 13 | +import java.net.http.HttpResponse; |
| 14 | +import java.time.Duration; |
| 15 | +import java.util.Objects; |
10 | 16 | import org.apache.hadoop.conf.Configuration; |
11 | 17 | import org.apache.hadoop.fs.permission.FsAction; |
12 | 18 | import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; |
|
19 | 25 | import org.slf4j.Logger; |
20 | 26 | import org.slf4j.LoggerFactory; |
21 | 27 |
|
22 | | -import java.net.URI; |
23 | | -import java.net.http.HttpClient; |
24 | | -import java.net.http.HttpRequest; |
25 | | -import java.net.http.HttpResponse; |
26 | | -import java.util.Objects; |
27 | | - |
28 | | -// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old - deprecated - |
29 | | -// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the authorizer |
30 | | -// supports the new API (which we do) and uses that in this case. This is also indicated by the log statement |
31 | | -// "Use the new authorization provider API" during startup, see https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245 |
32 | | -// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal operationType, which |
33 | | -// needs to be set to e.g. "create", "delete" or "rename" prior to calling the FSPermissionChecker.checkPermission |
34 | | -// function, as it will actually check if operationType is null and will still use the old API in this case! But the old |
35 | | -// API does not have the information about the operationType, which makes it hard to impossible to authorize the request. |
36 | | -// As a consequence we only support the new API and will make sure no HDFS code path calls the old API. This required |
37 | | -// minor patches to HDFS, as it was e.g. missing a call to FSPermissionChecker.setOperationType("create") in |
| 28 | +// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old - |
| 29 | +// deprecated - |
| 30 | +// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the |
| 31 | +// authorizer |
| 32 | +// supports the new API (which we do) and uses that in this case. This is also indicated by the log |
| 33 | +// statement |
| 34 | +// "Use the new authorization provider API" during startup, see |
| 35 | +// https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245 |
| 36 | +// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal |
| 37 | +// operationType, which |
| 38 | +// needs to be set to e.g. "create", "delete" or "rename" prior to calling the |
| 39 | +// FSPermissionChecker.checkPermission |
| 40 | +// function, as it will actually check if operationType is null and will still use the old API in |
| 41 | +// this case! But the old |
| 42 | +// API does not have the information about the operationType, which makes it hard to impossible to |
| 43 | +// authorize the request. |
| 44 | +// As a consequence we only support the new API and will make sure no HDFS code path calls the old |
| 45 | +// API. This required |
| 46 | +// minor patches to HDFS, as it was e.g. missing a call to |
| 47 | +// FSPermissionChecker.setOperationType("create") in |
38 | 48 | // FSNamesystem.startFileInt (this claim needs to be validated though). |
39 | | -public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer { |
40 | | - |
41 | | - private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class); |
42 | | - |
43 | | - public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url"; |
44 | | - |
45 | | - private final HttpClient httpClient = HttpClient.newHttpClient(); |
46 | | - private final ObjectMapper json; |
47 | | - private URI opaUri; |
48 | | - |
49 | | - public StackableAccessControlEnforcer() { |
50 | | - LOG.debug("Starting StackableAccessControlEnforcer"); |
51 | | - |
52 | | - // Guaranteed to be only called once (Effective Java: Item 3) |
53 | | - Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration(); |
54 | | - |
55 | | - String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP); |
56 | | - if (opaPolicyUrl == null) { |
57 | | - throw new OpaException.UriMissing(OPA_POLICY_URL_PROP); |
58 | | - } |
59 | | - |
60 | | - try { |
61 | | - this.opaUri = URI.create(opaPolicyUrl); |
62 | | - } catch (Exception e) { |
63 | | - throw new OpaException.UriInvalid(opaUri, e); |
64 | | - } |
65 | | - |
66 | | - this.json = new ObjectMapper() |
67 | | - // OPA server can send other fields, such as `decision_id`` when enabling decision logs |
68 | | - // We could add all the fields we *currently* know, but it's more future-proof to ignore any unknown fields |
69 | | - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) |
70 | | - // Previously we were getting |
71 | | - // Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties discovered to create BeanSerializer (to avoid exception, disable SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain: tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"]) |
72 | | - .configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false) |
73 | | - // Only include the needed fields. HDFS has many classes with even more circular reference to remove |
74 | | - .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE) |
75 | | - .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY) |
76 | | - .setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY) |
77 | | - // We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root) |
78 | | - // Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError) |
79 | | - .addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class); |
80 | | - |
81 | | - LOG.debug("Started HdfsOpaAccessControlEnforcer"); |
| 49 | +public class StackableAccessControlEnforcer |
| 50 | + implements INodeAttributeProvider.AccessControlEnforcer { |
| 51 | + |
| 52 | + private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class); |
| 53 | + |
| 54 | + public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url"; |
| 55 | + |
| 56 | + private static final HttpClient HTTP_CLIENT = |
| 57 | + HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build(); |
| 58 | + private final ObjectMapper json; |
| 59 | + private URI opaUri; |
| 60 | + |
| 61 | + public StackableAccessControlEnforcer() { |
| 62 | + LOG.debug("Starting StackableAccessControlEnforcer"); |
| 63 | + |
| 64 | + // Guaranteed to be only called once (Effective Java: Item 3) |
| 65 | + Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration(); |
| 66 | + |
| 67 | + String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP); |
| 68 | + if (opaPolicyUrl == null) { |
| 69 | + throw new OpaException.UriMissing(OPA_POLICY_URL_PROP); |
| 70 | + } |
| 71 | + |
| 72 | + try { |
| 73 | + opaUri = URI.create(opaPolicyUrl); |
| 74 | + } catch (Exception e) { |
| 75 | + throw new OpaException.UriInvalid(opaUri, e); |
82 | 76 | } |
83 | 77 |
|
84 | | - private static class OpaQueryResult { |
85 | | - // Boxed Boolean to detect not-present vs explicitly false |
86 | | - public Boolean result; |
| 78 | + json = |
| 79 | + new ObjectMapper() |
| 80 | + // OPA server can send other fields, such as `decision_id`` when enabling decision logs |
| 81 | + // We could add all the fields we *currently* know, but it's more future-proof to ignore |
| 82 | + // any unknown fields |
| 83 | + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) |
| 84 | + // Previously we were getting |
| 85 | + // Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No |
| 86 | + // serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties |
| 87 | + // discovered to create BeanSerializer (to avoid exception, disable |
| 88 | + // SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain: |
| 89 | + // tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"]) |
| 90 | + .configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false) |
| 91 | + // Only include the needed fields. HDFS has many classes with even more circular |
| 92 | + // reference to remove |
| 93 | + .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE) |
| 94 | + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY) |
| 95 | + .setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY) |
| 96 | + // We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root) |
| 97 | + // Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite |
| 98 | + // recursion (StackOverflowError) |
| 99 | + .addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class); |
| 100 | + |
| 101 | + LOG.debug("Started HdfsOpaAccessControlEnforcer"); |
| 102 | + } |
| 103 | + |
| 104 | + private static class OpaQueryResult { |
| 105 | + // Boxed Boolean to detect not-present vs explicitly false |
| 106 | + public Boolean result; |
| 107 | + } |
| 108 | + |
| 109 | + @Override |
| 110 | + public void checkPermission( |
| 111 | + String fsOwner, |
| 112 | + String supergroup, |
| 113 | + UserGroupInformation ugi, |
| 114 | + INodeAttributes[] inodeAttrs, |
| 115 | + INode[] inodes, |
| 116 | + byte[][] pathByNameArr, |
| 117 | + int snapshotId, |
| 118 | + String path, |
| 119 | + int ancestorIndex, |
| 120 | + boolean doCheckOwner, |
| 121 | + FsAction ancestorAccess, |
| 122 | + FsAction parentAccess, |
| 123 | + FsAction access, |
| 124 | + FsAction subAccess, |
| 125 | + boolean ignoreEmptyDir) |
| 126 | + throws AccessControlException { |
| 127 | + LOG.warn("checkPermission called"); |
| 128 | + |
| 129 | + new Throwable().printStackTrace(); |
| 130 | + throw new AccessControlException( |
| 131 | + "The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. " |
| 132 | + + "This should not happen, as all HDFS code paths should call the new API. " |
| 133 | + + "I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. " |
| 134 | + + "Please report all of that to author of the OPA authorizer by creating an issue in our repo: https://github.com/stackabletech/hdfs-utils" |
| 135 | + + "Passed arguments: " |
| 136 | + + "fsOwner: " |
| 137 | + + fsOwner |
| 138 | + + ", supergroup: " |
| 139 | + + supergroup |
| 140 | + + ", ugi: " |
| 141 | + + ugi |
| 142 | + + ", path: " |
| 143 | + + path |
| 144 | + + ", ancestorIndex:" |
| 145 | + + ancestorIndex |
| 146 | + + ", doCheckOwner: " |
| 147 | + + doCheckOwner |
| 148 | + + ", ancestorAccess: " |
| 149 | + + ancestorAccess |
| 150 | + + ", parentAccess: " |
| 151 | + + parentAccess |
| 152 | + + ", subAccess: " |
| 153 | + + subAccess |
| 154 | + + ", ignoreEmptyDir: " |
| 155 | + + ignoreEmptyDir); |
| 156 | + } |
| 157 | + |
| 158 | + @Override |
| 159 | + public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext) |
| 160 | + throws AccessControlException { |
| 161 | + OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext)); |
| 162 | + |
| 163 | + String body; |
| 164 | + try { |
| 165 | + body = json.writeValueAsString(query); |
| 166 | + } catch (JsonProcessingException e) { |
| 167 | + throw new OpaException.SerializeFailed(e); |
87 | 168 | } |
88 | 169 |
|
89 | | - @Override |
90 | | - public void checkPermission(String fsOwner, String supergroup, |
91 | | - UserGroupInformation ugi, INodeAttributes[] inodeAttrs, |
92 | | - INode[] inodes, byte[][] pathByNameArr, int snapshotId, String path, |
93 | | - int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess, |
94 | | - FsAction parentAccess, FsAction access, FsAction subAccess, |
95 | | - boolean ignoreEmptyDir) throws AccessControlException { |
96 | | - LOG.warn("checkPermission called"); |
97 | | - |
98 | | - new Throwable().printStackTrace(); |
99 | | - throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. " + |
100 | | - "This should not happen, as all HDFS code paths should call the new API. " + |
101 | | - "I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. " + |
102 | | - "Please report all of that to author of the OPA authorizer (We don't have a stable GitHub link yet, sorry!) " + |
103 | | - "Passed arguments: " + |
104 | | - "fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex + |
105 | | - ", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess + |
106 | | - ", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir); |
| 170 | + if (LOG.isDebugEnabled()) { |
| 171 | + String prettyPrinted; |
| 172 | + try { |
| 173 | + prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query); |
| 174 | + } catch (JsonProcessingException e) { |
| 175 | + LOG.error( |
| 176 | + "Could not pretty print the following request body (but non-pretty print did work): {}", |
| 177 | + body); |
| 178 | + throw new OpaException.SerializeFailed(e); |
| 179 | + } |
| 180 | + LOG.debug("Request body:\n{}", prettyPrinted); |
107 | 181 | } |
108 | 182 |
|
109 | | - @Override |
110 | | - public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext) throws AccessControlException { |
111 | | - OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext)); |
112 | | - |
113 | | - String body; |
114 | | - try { |
115 | | - body = json.writeValueAsString(query); |
116 | | - } catch (JsonProcessingException e) { |
117 | | - throw new OpaException.SerializeFailed(e); |
118 | | - } |
119 | | - |
120 | | - String prettyPrinted; |
121 | | - try { |
122 | | - prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query); |
123 | | - } catch (JsonProcessingException e) { |
124 | | - LOG.error("Could not pretty print the following request body (but non-pretty print did work): {}", body); |
125 | | - throw new OpaException.SerializeFailed(e); |
126 | | - } |
127 | | - |
128 | | - LOG.debug("Request body:\n{}", prettyPrinted); |
129 | | - HttpResponse<String> response = null; |
130 | | - try { |
131 | | - response = |
132 | | - httpClient.send( |
133 | | - HttpRequest.newBuilder(opaUri) |
134 | | - .header("Content-Type", "application/json") |
135 | | - .POST(HttpRequest.BodyPublishers.ofString(body)) |
136 | | - .build(), |
137 | | - HttpResponse.BodyHandlers.ofString()); |
138 | | - LOG.debug("Opa response: {}", response.body()); |
139 | | - } catch (Exception e) { |
140 | | - LOG.error(e.getMessage()); |
141 | | - throw new OpaException.QueryFailed(e); |
142 | | - } |
143 | | - |
144 | | - switch (Objects.requireNonNull(response).statusCode()) { |
145 | | - case 200: |
146 | | - break; |
147 | | - case 404: |
148 | | - throw new OpaException.EndPointNotFound(opaUri.toString()); |
149 | | - default: |
150 | | - throw new OpaException.OpaServerError(query.toString(), response); |
151 | | - } |
152 | | - |
153 | | - OpaQueryResult result; |
154 | | - try { |
155 | | - result = json.readValue(response.body(), OpaQueryResult.class); |
156 | | - } catch (JsonProcessingException e) { |
157 | | - throw new OpaException.DeserializeFailed(e); |
158 | | - } |
159 | | - |
160 | | - if (result.result == null || !result.result) { |
161 | | - throw new AccessControlException("OPA denied the request"); |
162 | | - } |
| 183 | + HttpResponse<String> response; |
| 184 | + try { |
| 185 | + response = |
| 186 | + HTTP_CLIENT.send( |
| 187 | + HttpRequest.newBuilder(opaUri) |
| 188 | + .header("Content-Type", "application/json") |
| 189 | + .POST(HttpRequest.BodyPublishers.ofString(body)) |
| 190 | + .build(), |
| 191 | + HttpResponse.BodyHandlers.ofString()); |
| 192 | + LOG.debug("OPA response: {}", response.body()); |
| 193 | + } catch (Exception e) { |
| 194 | + LOG.error(e.getMessage()); |
| 195 | + throw new OpaException.QueryFailed(e); |
163 | 196 | } |
164 | 197 |
|
165 | | - private abstract static class DatanodeDescriptorMixin { |
166 | | - @JsonIgnore |
167 | | - abstract INode getParent(); |
168 | | - @JsonIgnore |
169 | | - abstract DatanodeStorageInfo[] getStorageInfos(); |
| 198 | + switch (Objects.requireNonNull(response).statusCode()) { |
| 199 | + case 200: |
| 200 | + break; |
| 201 | + case 404: |
| 202 | + throw new OpaException.EndPointNotFound(opaUri.toString()); |
| 203 | + default: |
| 204 | + throw new OpaException.OpaServerError(query.toString(), response); |
170 | 205 | } |
| 206 | + |
| 207 | + OpaQueryResult result; |
| 208 | + try { |
| 209 | + result = json.readValue(response.body(), OpaQueryResult.class); |
| 210 | + } catch (JsonProcessingException e) { |
| 211 | + throw new OpaException.DeserializeFailed(e); |
| 212 | + } |
| 213 | + |
| 214 | + if (result.result == null || !result.result) { |
| 215 | + throw new AccessControlException("OPA denied the request"); |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + private abstract static class DatanodeDescriptorMixin { |
| 220 | + @JsonIgnore |
| 221 | + abstract INode getParent(); |
| 222 | + |
| 223 | + @JsonIgnore |
| 224 | + abstract DatanodeStorageInfo[] getStorageInfos(); |
| 225 | + } |
171 | 226 | } |
0 commit comments