@@ -9,7 +9,7 @@ namespace DB
99
1010namespace
1111{
12- // / The JSON reply from provider has only a few key-value pairs, so no need for SimdJSON/RapidJSON .
12+ // / The JSON reply from provider has only a few key-value pairs, so no need for any advanced parsing .
1313 // / Reduce complexity by using picojson.
1414 picojson::object parseJSON (const String & json_string) {
1515 picojson::value jsonValue;
@@ -26,18 +26,20 @@ namespace
2626 return jsonValue.get <picojson::object>();
2727 }
2828
29- std::string getValueByKey (const picojson::object & jsonObject, const std::string & key) {
29+ template <typename ValueType = std::string>
30+ ValueType getValueByKey (const picojson::object & jsonObject, const std::string & key) {
3031 auto it = jsonObject.find (key); // Find the key in the object
31- if (it == jsonObject.end ()) {
32+ if (it == jsonObject.end ())
33+ {
3234 throw std::runtime_error (" Key not found: " + key);
3335 }
3436
35- const picojson::value &value = it->second ;
36- if (!value.is <std::string >()) {
37- throw std::runtime_error (" Value for key '" + key + " ' is not a string " );
37+ const picojson::value & value = it->second ;
38+ if (!value.is <ValueType >()) {
39+ throw std::runtime_error (" Value for key '" + key + " ' has incorrect type. " );
3840 }
3941
40- return value.get <std::string >();
42+ return value.get <ValueType >();
4143 }
4244
4345 picojson::object getObjectFromURI (const Poco::URI & uri, const String & token = " " )
@@ -96,9 +98,12 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
9698 String email_regex_str = config.hasProperty (prefix + " .email_filter" ) ? config.getString (
9799 prefix + " .email_filter" ) : " " ;
98100
101+ UInt64 cache_lifetime = config.hasProperty (prefix + " .cache_lifetime" ) ? config.getUInt64 (
102+ prefix + " .cache_lifetime" ) : 3600 ;
103+
99104 if (provider == " google" )
100105 {
101- return std::make_unique<GoogleAccessTokenProcessor>(name, email_regex_str);
106+ return std::make_unique<GoogleAccessTokenProcessor>(name, cache_lifetime, email_regex_str);
102107 }
103108 else if (provider == " azure" )
104109 {
@@ -110,11 +115,9 @@ std::unique_ptr<IAccessTokenProcessor> IAccessTokenProcessor::parseTokenProcesso
110115 throw Exception (ErrorCodes::INVALID_CONFIG_PARAMETER,
111116 " Could not parse access token processor {}: tenant_id must be specified" , name);
112117
113- String client_id_str = config.getString (prefix + " .client_id" );
114118 String tenant_id_str = config.getString (prefix + " .tenant_id" );
115- String client_secret_str = config.hasProperty (prefix + " .client_secret" ) ? config.getString (prefix + " .client_secret" ) : " " ;
116119
117- return std::make_unique<AzureAccessTokenProcessor>(name, email_regex_str, client_id_str , tenant_id_str, client_secret_str );
120+ return std::make_unique<AzureAccessTokenProcessor>(name, cache_lifetime, email_regex_str , tenant_id_str);
118121 }
119122 else
120123 throw Exception (ErrorCodes::INVALID_CONFIG_PARAMETER,
@@ -132,10 +135,11 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
132135
133136 auto user_info = getUserInfo (token);
134137 String user_name = user_info[" sub" ];
138+ bool has_email = user_info.contains (" email" );
135139
136140 if (email_regex.ok ())
137141 {
138- if (!user_info. contains ( " email " ) )
142+ if (!has_email )
139143 {
140144 LOG_TRACE (getLogger (" AccessTokenProcessor" ), " {}: Failed to validate {} by e-mail" , name, user_name);
141145 return false ;
@@ -149,10 +153,59 @@ bool GoogleAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cre
149153 }
150154
151155 }
156+
152157 // / Credentials are passed as const everywhere up the flow, so we have to comply,
153158 // / in this case const_cast looks acceptable.
154159 const_cast <TokenCredentials &>(credentials).setUserName (user_name);
155- const_cast <TokenCredentials &>(credentials).setGroups ({});
160+
161+ auto token_info = getObjectFromURI (Poco::URI (token_info_uri), token);
162+ if (token_info.contains (" exp" ))
163+ const_cast <TokenCredentials &>(credentials).setExpiresAt (std::chrono::system_clock::from_time_t ((getValueByKey<time_t >(token_info, " exp" ))));
164+
165+ // / Groups info can only be retrieved if user email is known.
166+ // / If no email found in user info, we skip this step and there are no external groups for the user.
167+ if (has_email)
168+ {
169+ std::set<String> external_groups_names;
170+ const Poco::URI get_groups_uri = Poco::URI (" https://cloudidentity.googleapis.com/v1/groups/-/memberships:searchDirectGroups?query=member_key_id==" + user_info[" email" ] + " '" );
171+
172+ try
173+ {
174+ auto groups_response = getObjectFromURI (get_groups_uri, token);
175+
176+ if (!groups_response.contains (" memberships" ) || !groups_response[" memberships" ].is <picojson::array>())
177+ {
178+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
179+ " {}: Failed to get Google groups: invalid content in response from server" , name);
180+ return true ;
181+ }
182+
183+ for (const auto & group: groups_response[" memberships" ].get <picojson::array>())
184+ {
185+ if (!group.is <picojson::object>())
186+ {
187+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
188+ " {}: Failed to get Google groups: invalid content in response from server" , name);
189+ continue ;
190+ }
191+
192+ auto group_data = group.get <picojson::object>();
193+ String group_name = getValueByKey (group_data[" groupKey" ].get <picojson::object>(), " id" );
194+ external_groups_names.insert (group_name);
195+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
196+ " {}: User {}: new external group {}" , name, user_name, group_name);
197+ }
198+
199+ const_cast <TokenCredentials &>(credentials).setGroups (external_groups_names);
200+ }
201+ catch (const Exception & e)
202+ {
203+ // / Could not get groups info. Log it and skip it.
204+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
205+ " {}: Failed to get Google groups, no external roles will be mapped. reason: {}" , name, e.what ());
206+ return true ;
207+ }
208+ }
156209
157210 return true ;
158211}
@@ -177,8 +230,9 @@ std::unordered_map<String, String> GoogleAccessTokenProcessor::getUserInfo(const
177230
178231bool AzureAccessTokenProcessor::resolveAndValidate (const TokenCredentials & credentials)
179232{
180- // / Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. We will not trust any data in this token.
181- // / e.g. see here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
233+ // / Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS.
234+ // / We will not trust user data in this token except for 'exp' value to determine caching duration.
235+ // / Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad
182236 // / Let Azure validate it: only valid tokens will be accepted.
183237 // / Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get sub at the same time
184238
@@ -202,8 +256,56 @@ bool AzureAccessTokenProcessor::resolveAndValidate(const TokenCredentials & cred
202256 return false ;
203257 }
204258
205- // / TODO: do not store it in credentials.
206- const_cast <TokenCredentials &>(credentials).setGroups ({});
259+ try
260+ {
261+ const_cast <TokenCredentials &>(credentials).setExpiresAt (jwt::decode (token).get_expires_at ());
262+ }
263+ catch (...) {
264+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
265+ " {}: No expiration data found in a valid token, will use default cache lifetime" , name);
266+ }
267+
268+ std::set<String> external_groups_names;
269+ const Poco::URI get_groups_uri = Poco::URI (" https://graph.microsoft.com/v1.0/me/memberOf" );
270+
271+ try
272+ {
273+ auto groups_response = getObjectFromURI (get_groups_uri, token);
274+
275+ if (!groups_response.contains (" value" ) || !groups_response[" value" ].is <picojson::array>())
276+ {
277+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
278+ " {}: Failed to get Azure groups: invalid content in response from server" , name);
279+ return true ;
280+ }
281+
282+ picojson::array groups_array = groups_response[" value" ].get <picojson::array>();
283+
284+ for (const auto & group: groups_array)
285+ {
286+ // / Got some invalid response. Ignore this, log this.
287+ if (!group.is <picojson::object >())
288+ {
289+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
290+ " {}: Failed to get Azure groups: invalid content in response from server" , name);
291+ continue ;
292+ }
293+
294+ auto group_data = group.get <picojson::object>();
295+ String group_name = getValueByKey (group_data, " id" );
296+ external_groups_names.insert (group_name);
297+ LOG_TRACE (getLogger (" AccessTokenProcessor" ), " {}: User {}: new external group {}" , name, credentials.getUserName (), group_name);
298+ }
299+ }
300+ catch (const Exception & e)
301+ {
302+ // / Could not get groups info. Log it and skip it.
303+ LOG_TRACE (getLogger (" AccessTokenProcessor" ),
304+ " {}: Failed to get Azure groups, no external roles will be mapped. reason: {}" , name, e.what ());
305+ return true ;
306+ }
307+
308+ const_cast <TokenCredentials &>(credentials).setGroups (external_groups_names);
207309
208310 return true ;
209311}
0 commit comments