Skip to content

Commit ead1b13

Browse files
authored
Merge pull request #2712 from at88mph/login-fix
ALMA: auth handling to login to Keycloak
2 parents cfc7383 + 496e78e commit ead1b13

File tree

3 files changed

+218
-66
lines changed

3 files changed

+218
-66
lines changed

astroquery/alma/core.py

Lines changed: 106 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from ..exceptions import LoginError
3131
from ..utils import commons
3232
from ..utils.process_asyncs import async_to_sync
33-
from ..query import QueryWithLogin
33+
from ..query import BaseQuery, QueryWithLogin
3434
from .tapsql import _gen_pos_sql, _gen_str_sql, _gen_numeric_sql,\
3535
_gen_band_list_sql, _gen_datetime_sql, _gen_pol_sql, _gen_pub_sql,\
3636
_gen_science_sql, _gen_spec_res_sql, ALMA_DATE_FORMAT
@@ -212,6 +212,101 @@ def _gen_sql(payload):
212212
return sql + where
213213

214214

215+
class AlmaAuth(BaseQuery):
216+
"""Authentication session information for passing credentials to an OIDC instance
217+
218+
Assumes an OIDC system like Keycloak with a preconfigured client app called "oidc" to validate against.
219+
This does not use Tokens in the traditional OIDC sense, but rather uses the Keycloak specific endpoint
220+
to validate a username and password. Passwords are then kept in a Python keyring.
221+
"""
222+
223+
_CLIENT_ID = 'oidc'
224+
_GRANT_TYPE = 'password'
225+
_INVALID_PASSWORD_MESSAGE = 'Invalid user credentials'
226+
_REALM_ENDPOINT = '/auth/realms/ALMA'
227+
_LOGIN_ENDPOINT = f'{_REALM_ENDPOINT}/protocol/openid-connect/token'
228+
_VERIFY_WELL_KNOWN_ENDPOINT = f'{_REALM_ENDPOINT}/.well-known/openid-configuration'
229+
230+
def __init__(self):
231+
super().__init__()
232+
self._auth_hosts = auth_urls
233+
self._auth_host = None
234+
235+
@property
236+
def auth_hosts(self):
237+
return self._auth_hosts
238+
239+
@auth_hosts.setter
240+
def auth_hosts(self, auth_hosts):
241+
"""
242+
Set the available hosts to check for login endpoints.
243+
244+
Parameters
245+
----------
246+
auth_hosts : array
247+
Available hosts name. Checking each one until one returns a 200 for
248+
the well-known endpoint.
249+
"""
250+
if auth_hosts is None:
251+
raise LoginError('Valid authentication hosts cannot be None')
252+
else:
253+
self._auth_hosts = auth_hosts
254+
255+
def get_valid_host(self):
256+
if self._auth_host is None:
257+
for auth_url in self._auth_hosts:
258+
# set session cookies (they do not get set otherwise)
259+
url_to_check = f'https://{auth_url}{self._VERIFY_WELL_KNOWN_ENDPOINT}'
260+
response = self._request("HEAD", url_to_check, cache=False)
261+
262+
if response.status_code == 200:
263+
self._auth_host = auth_url
264+
log.debug(f'Set auth host to {self._auth_host}')
265+
break
266+
267+
if self._auth_host is None:
268+
raise LoginError(f'No useable hosts to login to: {self._auth_hosts}')
269+
else:
270+
return self._auth_host
271+
272+
def login(self, username, password):
273+
"""
274+
Authenticate to one of the configured hosts.
275+
276+
Parameters
277+
----------
278+
username : str
279+
The username to authenticate with
280+
password : str
281+
The user's password
282+
"""
283+
data = {
284+
'username': username,
285+
'password': password,
286+
'grant_type': self._GRANT_TYPE,
287+
'client_id': self._CLIENT_ID
288+
}
289+
290+
login_url = f'https://{self.get_valid_host()}{self._LOGIN_ENDPOINT}'
291+
log.info(f'Authenticating {username} on {login_url}.')
292+
login_response = self._request('POST', login_url, data=data, cache=False)
293+
json_auth = login_response.json()
294+
295+
if 'error' in json_auth:
296+
log.debug(f'{json_auth}')
297+
error_message = json_auth['error_description']
298+
if self._INVALID_PASSWORD_MESSAGE not in error_message:
299+
raise LoginError("Could not log in to ALMA authorization portal: "
300+
f"{self.get_valid_host()} Message from server: {error_message}")
301+
else:
302+
raise LoginError(error_message)
303+
elif 'access_token' not in json_auth:
304+
raise LoginError("Could not log in to any of the known ALMA authorization portals: \n"
305+
f"No error from server, but missing access token from host: {self.get_valid_host()}")
306+
else:
307+
log.info(f'Successfully logged in to {self._auth_host}')
308+
309+
215310
@async_to_sync
216311
class AlmaClass(QueryWithLogin):
217312

@@ -228,6 +323,11 @@ def __init__(self):
228323
self._sia_url = None
229324
self._tap_url = None
230325
self._datalink_url = None
326+
self._auth = AlmaAuth()
327+
328+
@property
329+
def auth(self):
330+
return self._auth
231331

232332
@property
233333
def datalink(self):
@@ -875,11 +975,7 @@ def _get_auth_info(self, username, *, store_password=False,
875975
else:
876976
username = self.USERNAME
877977

878-
if hasattr(self, '_auth_url'):
879-
auth_url = self._auth_url
880-
else:
881-
raise LoginError("Login with .login() to acquire the appropriate"
882-
" login URL")
978+
auth_url = self.auth.get_valid_host()
883979

884980
# Get password from keyring or prompt
885981
password, password_from_keyring = self._get_password(
@@ -909,69 +1005,16 @@ def _login(self, username=None, store_password=False,
9091005
on the keyring. Default is False.
9101006
"""
9111007

912-
success = False
913-
for auth_url in auth_urls:
914-
# set session cookies (they do not get set otherwise)
915-
cookiesetpage = self._request("GET",
916-
urljoin(self._get_dataarchive_url(),
917-
'rh/forceAuthentication'),
918-
cache=False)
919-
self._login_cookiepage = cookiesetpage
920-
cookiesetpage.raise_for_status()
921-
922-
if (auth_url+'/cas/login' in cookiesetpage.request.url):
923-
# we've hit a target, we're good
924-
success = True
925-
break
926-
if not success:
927-
raise LoginError("Could not log in to any of the known ALMA "
928-
"authorization portals: {0}".format(auth_urls))
929-
930-
# Check if already logged in
931-
loginpage = self._request("GET", "https://{auth_url}/cas/login".format(auth_url=auth_url),
932-
cache=False)
933-
root = BeautifulSoup(loginpage.content, 'html5lib')
934-
if root.find('div', class_='success'):
935-
log.info("Already logged in.")
936-
return True
937-
938-
self._auth_url = auth_url
1008+
self.auth.auth_hosts = auth_urls
9391009

9401010
username, password = self._get_auth_info(username=username,
9411011
store_password=store_password,
9421012
reenter_password=reenter_password)
9431013

944-
# Authenticate
945-
log.info("Authenticating {0} on {1} ...".format(username, auth_url))
946-
# Do not cache pieces of the login process
947-
data = {kw: root.find('input', {'name': kw})['value']
948-
for kw in ('execution', '_eventId')}
949-
data['username'] = username
950-
data['password'] = password
951-
data['submit'] = 'LOGIN'
952-
953-
login_response = self._request("POST", "https://{0}/cas/login".format(auth_url),
954-
params={'service': self._get_dataarchive_url()},
955-
data=data,
956-
cache=False)
957-
958-
# save the login response for debugging purposes
959-
self._login_response = login_response
960-
# do not expose password back to user
961-
del data['password']
962-
# but save the parameters for debug purposes
963-
self._login_parameters = data
964-
965-
authenticated = ('You have successfully logged in' in
966-
login_response.text)
967-
968-
if authenticated:
969-
log.info("Authentication successful!")
970-
self.USERNAME = username
971-
else:
972-
log.exception("Authentication failed!")
1014+
self.auth.login(username, password)
1015+
self.USERNAME = username
9731016

974-
return authenticated
1017+
return True
9751018

9761019
def get_cycle0_uid_contents(self, uid):
9771020
"""
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
from ..core import AlmaAuth
3+
from ...exceptions import LoginError
4+
5+
import pytest
6+
from unittest.mock import Mock
7+
8+
9+
def test_host():
10+
def _requests_mock_ok(method, url, **kwargs):
11+
response = Mock()
12+
response.status_code = 200
13+
return response
14+
15+
test_subject = AlmaAuth()
16+
test_subject.auth_hosts = ['almaexample.com']
17+
test_subject._request = Mock(side_effect=_requests_mock_ok)
18+
assert test_subject.get_valid_host() == 'almaexample.com'
19+
20+
21+
def test_host_default():
22+
def _requests_mock_ok(method, url, **kwargs):
23+
response = Mock()
24+
response.status_code = 200
25+
return response
26+
27+
test_subject = AlmaAuth()
28+
test_subject._request = Mock(side_effect=_requests_mock_ok)
29+
assert test_subject.get_valid_host() == 'asa.alma.cl'
30+
31+
32+
def test_host_err():
33+
def _requests_mock_err(method, url, **kwargs):
34+
response = Mock()
35+
response.status_code = 404
36+
return response
37+
38+
test_subject = AlmaAuth()
39+
test_subject.auth_hosts = ['almaexample.com']
40+
test_subject._request = Mock(side_effect=_requests_mock_err)
41+
with pytest.raises(LoginError):
42+
test_subject.get_valid_host()
43+
44+
45+
def test_login_bad_error():
46+
def _response_json():
47+
return {
48+
'error': 'Badness',
49+
'error_description': 'Something very bad'
50+
}
51+
52+
def _requests_mock_err(method, url, **kwargs):
53+
response = Mock()
54+
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
55+
response.status_code = 200
56+
elif test_subject._LOGIN_ENDPOINT in url:
57+
response.json = _response_json
58+
return response
59+
60+
test_subject = AlmaAuth()
61+
test_subject.auth_hosts = ['almaexample.com']
62+
test_subject._request = Mock(side_effect=_requests_mock_err)
63+
with pytest.raises(LoginError) as e:
64+
test_subject.login('TESTUSER', 'TESTPASS')
65+
assert 'Could not log in to ALMA authorization portal' in e.value.args[0]
66+
67+
68+
def test_login_missing_token():
69+
def _response_json():
70+
return {
71+
'irrlevant': 'Weird',
72+
}
73+
74+
def _requests_mock_err(method, url, **kwargs):
75+
response = Mock()
76+
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
77+
response.status_code = 200
78+
elif test_subject._LOGIN_ENDPOINT in url:
79+
response.json = _response_json
80+
return response
81+
82+
test_subject = AlmaAuth()
83+
test_subject.auth_hosts = ['almaexample.com']
84+
test_subject._request = Mock(side_effect=_requests_mock_err)
85+
with pytest.raises(LoginError) as e:
86+
test_subject.login('TESTUSER', 'TESTPASS')
87+
88+
assert 'No error from server, but missing access token from host' in e.value.args[0]
89+
90+
91+
def test_login_success():
92+
def _response_json():
93+
return {
94+
'access_token': 'MYTOKEN'
95+
}
96+
97+
def _requests_mock_good(method, url, **kwargs):
98+
response = Mock()
99+
print(f'URL is {url}')
100+
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
101+
response.status_code = 200
102+
elif test_subject._LOGIN_ENDPOINT in url:
103+
response.json = _response_json
104+
return response
105+
106+
test_subject = AlmaAuth()
107+
test_subject.auth_hosts = ['almaexample.com']
108+
test_subject._request = Mock(side_effect=_requests_mock_good)
109+
test_subject.login('TESTUSER', 'TESTPASS')

docs/alma/alma.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ Authentication
8080
==============
8181
8282
Users can log in to acquire proprietary data products. Login is performed
83-
via the ALMA CAS (central authentication server).
83+
via the ALMA OIDC (OpenID Connect) service, Keycloak.
8484
8585
.. doctest-skip::
8686
@@ -97,11 +97,11 @@ via the ALMA CAS (central authentication server).
9797
ICONDOR, enter your ALMA password:
9898
<BLANKLINE>
9999
Authenticating ICONDOR on asa.alma.cl...
100-
Authentication successful!
100+
Successfully logged in to asa.alma.cl
101101
>>> # After the first login, your password has been stored
102102
>>> alma.login("ICONDOR")
103103
Authenticating ICONDOR on asa.alma.cl...
104-
Authentication successful!
104+
Successfully logged in to asa.alma.cl
105105
106106
Your password will be stored by the `keyring
107107
<https://pypi.python.org/pypi/keyring>`_ module.

0 commit comments

Comments
 (0)