Skip to content

Commit 076ba0a

Browse files
authored
Merge pull request #767 from keflavich/nrao_login
Upgrades & fixes to the NRAO archive tool
2 parents 34b3e62 + 2d15b0b commit 076ba0a

File tree

9 files changed

+1116
-17
lines changed

9 files changed

+1116
-17
lines changed

CHANGES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Add license statements to appropriate places (#776)
66
- Correct HEASARC tool, which was sending incorrect data to the server (#774)
77
- Fix NIST issue #714 which led to badly-parsed tables (#773)
8+
- NRAO archive tool allows user logins and HTML-based queries (#767)
89

910
0.3.3 (2016-10-11)
1011
------------------

astroquery/alma/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def stage_data(self, uids):
305305
summary.raise_for_status()
306306
self._staging_log['json_data'] = json_data = summary.json()
307307

308-
username = self._username if hasattr(self, '_username') else 'anonymous'
308+
username = self.USERNAME if self.USERNAME else 'anonymous'
309309

310310
# templates:
311311
# https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
@@ -447,7 +447,7 @@ def _login(self, username=None, store_password=False,
447447
"""
448448

449449
if username is None:
450-
if self.USERNAME == "":
450+
if not self.USERNAME:
451451
raise LoginError("If you do not pass a username to login(), "
452452
"you should configure a default one!")
453453
else:
@@ -496,7 +496,7 @@ def _login(self, username=None, store_password=False,
496496

497497
if authenticated:
498498
log.info("Authentication successful!")
499-
self._username = username
499+
self.USERNAME = username
500500
else:
501501
log.exception("Authentication failed!")
502502
# When authenticated, save password in keyring if needed

astroquery/nrao/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ class Conf(_config.ConfigNamespace):
1515
timeout = _config.ConfigItem(
1616
60,
1717
'Time limit for connecting to NRAO server.')
18+
username = _config.ConfigItem(
19+
"",
20+
'Optional default username for ALMA archive.')
1821

1922
conf = Conf()
2023

astroquery/nrao/core.py

Lines changed: 177 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,25 @@
22
from __future__ import print_function
33

44
import re
5+
import os
56
import warnings
67
import functools
8+
import getpass
9+
import keyring
710

11+
import numpy as np
812
import astropy.units as u
913
import astropy.io.votable as votable
1014
from astropy import coordinates
1115
from astropy.extern import six
16+
from astropy.table import Table
17+
from astropy import log
18+
from bs4 import BeautifulSoup
1219

13-
from ..query import BaseQuery
14-
from ..utils import commons, async_to_sync
20+
from ..query import QueryWithLogin
21+
from ..utils import commons, async_to_sync, system_tools
1522
from ..utils.docstr_chompers import prepend_docstr_noreturns
16-
from ..exceptions import TableParseError
23+
from ..exceptions import TableParseError, LoginError
1724

1825
from . import conf
1926

@@ -44,10 +51,11 @@ def wrapper(*args, **kwargs):
4451

4552

4653
@async_to_sync
47-
class NraoClass(BaseQuery):
54+
class NraoClass(QueryWithLogin):
4855

4956
DATA_URL = conf.server
5057
TIMEOUT = conf.timeout
58+
USERNAME = conf.username
5159

5260
# dicts and lists for data archive queries
5361
telescope_code = {
@@ -124,15 +132,27 @@ def _args_to_payload(self, **kwargs):
124132
125133
querytype : str
126134
The type of query to perform. "OBSSUMMARY" is the default, but
127-
it is only valid for VLA/VLBA observations. ARCHIVE will not
128-
work at all because it relies on XML data. OBSERVATION will
135+
it is only valid for VLA/VLBA observations. ARCHIVE will give
136+
the list of files available for download. OBSERVATION will
129137
provide full details of the sources observed and under what
130138
configurations.
131139
source_id : str, optional
132140
A source name (to be parsed by SIMBAD or NED)
141+
protocol : 'VOTable-XML' or 'HTML'
142+
The type of table to return. In theory, this should not matter,
143+
but in practice the different table types actually have different
144+
content. For ``querytype='ARCHIVE'``, the protocol will be force
145+
to HTML because the archive doesn't support votable returns for
146+
archive queries.
133147
get_query_payload : bool, optional
134148
if set to `True` then returns the dictionary sent as the HTTP
135149
request. Defaults to `False`
150+
cache : bool
151+
Cache the query results
152+
retry : bool or int
153+
The number of times to retry querying the server if it doesn't
154+
raise an exception but returns a null result (this sort of behavior
155+
seems unique to the NRAO archive)
136156
137157
Returns
138158
-------
@@ -149,7 +169,7 @@ def _args_to_payload(self, **kwargs):
149169

150170
request_payload = dict(
151171
QUERYTYPE=kwargs.get('querytype', "OBSSUMMARY"),
152-
PROTOCOL="VOTable-XML",
172+
PROTOCOL=kwargs.get('protocol',"VOTable-XML"),
153173
MAX_ROWS="NO LIMIT",
154174
SORT_PARM="Starttime",
155175
SORT_ORDER="Asc",
@@ -187,6 +207,20 @@ def _args_to_payload(self, **kwargs):
187207
PASSWD="", # TODO: implement login...
188208
SUBMIT="Submit Query")
189209

210+
if (request_payload['QUERYTYPE'] == "ARCHIVE" and
211+
request_payload['PROTOCOL'] != 'HTML'):
212+
warnings.warn("Changing protocol to HTML: ARCHIVE queries do not"
213+
" support votable returns")
214+
request_payload['PROTOCOL'] = 'HTML'
215+
216+
if request_payload['PROTOCOL'] not in ('HTML','VOTable-XML'):
217+
raise ValueError("Only HTML and VOTable-XML returns are supported")
218+
219+
if request_payload['QUERYTYPE'] not in ('ARCHIVE', 'OBSSUMMARY',
220+
'OBSERVATION'):
221+
raise ValueError("Only ARCHIVE, OBSSUMMARY, and OBSERVATION "
222+
"querytypes are supported")
223+
190224
if 'coordinates' in kwargs:
191225
c = commons.parse_coordinates(
192226
kwargs['coordinates']).transform_to(coordinates.ICRS)
@@ -195,10 +229,91 @@ def _args_to_payload(self, **kwargs):
195229

196230
return request_payload
197231

232+
def _login(self, username=None, store_password=False,
233+
reenter_password=False):
234+
"""
235+
Login to the NRAO archive
236+
237+
Parameters
238+
----------
239+
username : str, optional
240+
Username to the NRAO archive. If not given, it should be specified
241+
in the config file.
242+
store_password : bool, optional
243+
Stores the password securely in your keyring. Default is False.
244+
reenter_password : bool, optional
245+
Asks for the password even if it is already stored in the
246+
keyring. This is the way to overwrite an already stored passwork
247+
on the keyring. Default is False.
248+
"""
249+
250+
# Developer notes:
251+
# Login via https://my.nrao.edu/cas/login
252+
# # this can be added to auto-redirect back to the query tool: ?service=https://archive.nrao.edu/archive/advquery.jsp
253+
254+
if username is None:
255+
if not self.USERNAME:
256+
raise LoginError("If you do not pass a username to login(), "
257+
"you should configure a default one!")
258+
else:
259+
username = self.USERNAME
260+
261+
# Check if already logged in
262+
loginpage = self._request("GET", "https://my.nrao.edu/cas/login",
263+
cache=False)
264+
root = BeautifulSoup(loginpage.content, 'html5lib')
265+
if root.find('div', class_='success'):
266+
log.info("Already logged in.")
267+
return True
268+
269+
# Get password from keyring or prompt
270+
if reenter_password is False:
271+
password_from_keyring = keyring.get_password(
272+
"astroquery:my.nrao.edu", username)
273+
else:
274+
password_from_keyring = None
275+
276+
if password_from_keyring is None:
277+
if system_tools.in_ipynb():
278+
log.warning("You may be using an ipython notebook:"
279+
" the password form will appear in your terminal.")
280+
password = getpass.getpass("{0}, enter your NRAO archive password:"
281+
"\n".format(username))
282+
else:
283+
password = password_from_keyring
284+
# Authenticate
285+
log.info("Authenticating {0} on my.nrao.edu ...".format(username))
286+
# Do not cache pieces of the login process
287+
data = {kw: root.find('input', {'name': kw})['value']
288+
for kw in ('lt', '_eventId', 'execution')}
289+
data['username'] = username
290+
data['password'] = password
291+
data['execution'] = 'e1s1' # not sure if needed
292+
data['_eventId'] = 'submit'
293+
data['submit'] = 'LOGIN'
294+
295+
login_response = self._request("POST", "https://my.nrao.edu/cas/login",
296+
data=data, cache=False)
297+
298+
authenticated = ('You have successfully logged in' in
299+
login_response.text)
300+
301+
if authenticated:
302+
log.info("Authentication successful!")
303+
self.USERNAME = username
304+
else:
305+
log.exception("Authentication failed!")
306+
# When authenticated, save password in keyring if needed
307+
if authenticated and password_from_keyring is None and store_password:
308+
keyring.set_password("astroquery:my.nrao.edu", username, password)
309+
310+
return authenticated
311+
198312
@prepend_docstr_noreturns(_args_to_payload.__doc__)
199313
def query_async(self,
200314
get_query_payload=False,
201315
cache=True,
316+
retry=False,
202317
**kwargs):
203318
"""
204319
Returns
@@ -213,14 +328,32 @@ def query_async(self,
213328
return request_payload
214329
response = self._request('POST', self.DATA_URL, params=request_payload,
215330
timeout=self.TIMEOUT, cache=cache)
331+
self._last_response = response
332+
333+
response.raise_for_status()
334+
335+
if not response.content:
336+
if cache:
337+
last_pickle = self._last_query.hash()+".pickle"
338+
cache_fn = os.path.join(self.cache_location, last_pickle)
339+
os.remove(cache_fn)
340+
if retry > 0:
341+
self.query_async(cache=cache, retry=retry-1, **kwargs)
342+
else:
343+
raise ValueError("Query resulted in an empty result but "
344+
"the server did not raise an error.")
345+
216346
return response
217347

218348
@prepend_docstr_noreturns(_args_to_payload.__doc__)
219349
def query_region_async(self, coordinates, radius=1 * u.deg,
220350
equinox='J2000', telescope='all', start_date="",
221351
end_date="", freq_low=None, freq_up=None,
222352
telescope_config='all', obs_band='all',
223-
sub_array='all', get_query_payload=False):
353+
querytype='OBSSUMMARY', sub_array='all',
354+
protocol='VOTable-XML',
355+
retry=False,
356+
get_query_payload=False, cache=True):
224357
"""
225358
Returns
226359
-------
@@ -239,9 +372,22 @@ def query_region_async(self, coordinates, radius=1 * u.deg,
239372
telescope_config=telescope_config,
240373
obs_band=obs_band,
241374
sub_array=sub_array,
242-
get_query_payload=get_query_payload)
375+
querytype=querytype,
376+
protocol=protocol,
377+
get_query_payload=get_query_payload,
378+
retry=retry,
379+
cache=cache)
243380

244381
def _parse_result(self, response, verbose=False):
382+
if '<?xml' in response.text[:5]:
383+
return self._parse_votable_result(response, verbose=verbose)
384+
elif '<html>' in response.text[:6]:
385+
return self._parse_html_result(response, verbose=verbose)
386+
else:
387+
raise ValueError("Unrecognized response type; it does not appear "
388+
"to be VO-XML or HTML")
389+
390+
def _parse_votable_result(self, response, verbose=False):
245391
if not verbose:
246392
commons.suppress_vo_warnings()
247393

@@ -281,4 +427,26 @@ def _parse_result(self, response, verbose=False):
281427
"raw response can be found in self.response,"
282428
" and the error in self.table_parse_error.")
283429

430+
def _parse_html_result(self, response, verbose=False):
431+
# pares the HTML return...
432+
root = BeautifulSoup(response.content, 'html5lib')
433+
434+
htmltable = root.findAll('table')
435+
#if len(htmltable) != 1:
436+
# raise ValueError("Found the wrong number of tables: {0}"
437+
# .format(len(htmltable)))
438+
439+
string_to_parse = htmltable[-1].encode('ascii')
440+
441+
if six.PY2:
442+
from astropy.io.ascii import html
443+
from astropy.io.ascii.core import convert_numpy
444+
htmlreader = html.HTML()
445+
htmlreader.outputter.default_converters.append(convert_numpy(np.unicode))
446+
table = htmlreader.read(string_to_parse)
447+
else:
448+
table = Table.read(string_to_parse.decode('utf-8'), format='ascii.html')
449+
450+
return table
451+
284452
Nrao = NraoClass()

0 commit comments

Comments
 (0)