22from __future__ import print_function
33
44import re
5+ import os
56import warnings
67import functools
8+ import getpass
9+ import keyring
710
11+ import numpy as np
812import astropy .units as u
913import astropy .io .votable as votable
1014from astropy import coordinates
1115from astropy .extern import six
16+ from astropy .table import Table
17+ from astropy import log
18+ from bs4 import BeautifulSoup
1219
13- from ..query import BaseQuery
14- from ..utils import commons , async_to_sync
20+ from ..query import QueryWithLogin
21+ from ..utils import commons , async_to_sync , system_tools
1522from ..utils .docstr_chompers import prepend_docstr_noreturns
16- from ..exceptions import TableParseError
23+ from ..exceptions import TableParseError , LoginError
1724
1825from . import conf
1926
@@ -44,10 +51,11 @@ def wrapper(*args, **kwargs):
4451
4552
4653@async_to_sync
47- class NraoClass (BaseQuery ):
54+ class NraoClass (QueryWithLogin ):
4855
4956 DATA_URL = conf .server
5057 TIMEOUT = conf .timeout
58+ USERNAME = conf .username
5159
5260 # dicts and lists for data archive queries
5361 telescope_code = {
@@ -124,15 +132,27 @@ def _args_to_payload(self, **kwargs):
124132
125133 querytype : str
126134 The type of query to perform. "OBSSUMMARY" is the default, but
127- it is only valid for VLA/VLBA observations. ARCHIVE will not
128- work at all because it relies on XML data . OBSERVATION will
135+ it is only valid for VLA/VLBA observations. ARCHIVE will give
136+ the list of files available for download . OBSERVATION will
129137 provide full details of the sources observed and under what
130138 configurations.
131139 source_id : str, optional
132140 A source name (to be parsed by SIMBAD or NED)
141+ protocol : 'VOTable-XML' or 'HTML'
142+ The type of table to return. In theory, this should not matter,
143+ but in practice the different table types actually have different
144+ content. For ``querytype='ARCHIVE'``, the protocol will be force
145+ to HTML because the archive doesn't support votable returns for
146+ archive queries.
133147 get_query_payload : bool, optional
134148 if set to `True` then returns the dictionary sent as the HTTP
135149 request. Defaults to `False`
150+ cache : bool
151+ Cache the query results
152+ retry : bool or int
153+ The number of times to retry querying the server if it doesn't
154+ raise an exception but returns a null result (this sort of behavior
155+ seems unique to the NRAO archive)
136156
137157 Returns
138158 -------
@@ -149,7 +169,7 @@ def _args_to_payload(self, **kwargs):
149169
150170 request_payload = dict (
151171 QUERYTYPE = kwargs .get ('querytype' , "OBSSUMMARY" ),
152- PROTOCOL = "VOTable-XML" ,
172+ PROTOCOL = kwargs . get ( 'protocol' , "VOTable-XML" ) ,
153173 MAX_ROWS = "NO LIMIT" ,
154174 SORT_PARM = "Starttime" ,
155175 SORT_ORDER = "Asc" ,
@@ -187,6 +207,20 @@ def _args_to_payload(self, **kwargs):
187207 PASSWD = "" , # TODO: implement login...
188208 SUBMIT = "Submit Query" )
189209
210+ if (request_payload ['QUERYTYPE' ] == "ARCHIVE" and
211+ request_payload ['PROTOCOL' ] != 'HTML' ):
212+ warnings .warn ("Changing protocol to HTML: ARCHIVE queries do not"
213+ " support votable returns" )
214+ request_payload ['PROTOCOL' ] = 'HTML'
215+
216+ if request_payload ['PROTOCOL' ] not in ('HTML' ,'VOTable-XML' ):
217+ raise ValueError ("Only HTML and VOTable-XML returns are supported" )
218+
219+ if request_payload ['QUERYTYPE' ] not in ('ARCHIVE' , 'OBSSUMMARY' ,
220+ 'OBSERVATION' ):
221+ raise ValueError ("Only ARCHIVE, OBSSUMMARY, and OBSERVATION "
222+ "querytypes are supported" )
223+
190224 if 'coordinates' in kwargs :
191225 c = commons .parse_coordinates (
192226 kwargs ['coordinates' ]).transform_to (coordinates .ICRS )
@@ -195,10 +229,91 @@ def _args_to_payload(self, **kwargs):
195229
196230 return request_payload
197231
232+ def _login (self , username = None , store_password = False ,
233+ reenter_password = False ):
234+ """
235+ Login to the NRAO archive
236+
237+ Parameters
238+ ----------
239+ username : str, optional
240+ Username to the NRAO archive. If not given, it should be specified
241+ in the config file.
242+ store_password : bool, optional
243+ Stores the password securely in your keyring. Default is False.
244+ reenter_password : bool, optional
245+ Asks for the password even if it is already stored in the
246+ keyring. This is the way to overwrite an already stored passwork
247+ on the keyring. Default is False.
248+ """
249+
250+ # Developer notes:
251+ # Login via https://my.nrao.edu/cas/login
252+ # # this can be added to auto-redirect back to the query tool: ?service=https://archive.nrao.edu/archive/advquery.jsp
253+
254+ if username is None :
255+ if not self .USERNAME :
256+ raise LoginError ("If you do not pass a username to login(), "
257+ "you should configure a default one!" )
258+ else :
259+ username = self .USERNAME
260+
261+ # Check if already logged in
262+ loginpage = self ._request ("GET" , "https://my.nrao.edu/cas/login" ,
263+ cache = False )
264+ root = BeautifulSoup (loginpage .content , 'html5lib' )
265+ if root .find ('div' , class_ = 'success' ):
266+ log .info ("Already logged in." )
267+ return True
268+
269+ # Get password from keyring or prompt
270+ if reenter_password is False :
271+ password_from_keyring = keyring .get_password (
272+ "astroquery:my.nrao.edu" , username )
273+ else :
274+ password_from_keyring = None
275+
276+ if password_from_keyring is None :
277+ if system_tools .in_ipynb ():
278+ log .warning ("You may be using an ipython notebook:"
279+ " the password form will appear in your terminal." )
280+ password = getpass .getpass ("{0}, enter your NRAO archive password:"
281+ "\n " .format (username ))
282+ else :
283+ password = password_from_keyring
284+ # Authenticate
285+ log .info ("Authenticating {0} on my.nrao.edu ..." .format (username ))
286+ # Do not cache pieces of the login process
287+ data = {kw : root .find ('input' , {'name' : kw })['value' ]
288+ for kw in ('lt' , '_eventId' , 'execution' )}
289+ data ['username' ] = username
290+ data ['password' ] = password
291+ data ['execution' ] = 'e1s1' # not sure if needed
292+ data ['_eventId' ] = 'submit'
293+ data ['submit' ] = 'LOGIN'
294+
295+ login_response = self ._request ("POST" , "https://my.nrao.edu/cas/login" ,
296+ data = data , cache = False )
297+
298+ authenticated = ('You have successfully logged in' in
299+ login_response .text )
300+
301+ if authenticated :
302+ log .info ("Authentication successful!" )
303+ self .USERNAME = username
304+ else :
305+ log .exception ("Authentication failed!" )
306+ # When authenticated, save password in keyring if needed
307+ if authenticated and password_from_keyring is None and store_password :
308+ keyring .set_password ("astroquery:my.nrao.edu" , username , password )
309+
310+ return authenticated
311+
198312 @prepend_docstr_noreturns (_args_to_payload .__doc__ )
199313 def query_async (self ,
200314 get_query_payload = False ,
201315 cache = True ,
316+ retry = False ,
202317 ** kwargs ):
203318 """
204319 Returns
@@ -213,14 +328,32 @@ def query_async(self,
213328 return request_payload
214329 response = self ._request ('POST' , self .DATA_URL , params = request_payload ,
215330 timeout = self .TIMEOUT , cache = cache )
331+ self ._last_response = response
332+
333+ response .raise_for_status ()
334+
335+ if not response .content :
336+ if cache :
337+ last_pickle = self ._last_query .hash ()+ ".pickle"
338+ cache_fn = os .path .join (self .cache_location , last_pickle )
339+ os .remove (cache_fn )
340+ if retry > 0 :
341+ self .query_async (cache = cache , retry = retry - 1 , ** kwargs )
342+ else :
343+ raise ValueError ("Query resulted in an empty result but "
344+ "the server did not raise an error." )
345+
216346 return response
217347
218348 @prepend_docstr_noreturns (_args_to_payload .__doc__ )
219349 def query_region_async (self , coordinates , radius = 1 * u .deg ,
220350 equinox = 'J2000' , telescope = 'all' , start_date = "" ,
221351 end_date = "" , freq_low = None , freq_up = None ,
222352 telescope_config = 'all' , obs_band = 'all' ,
223- sub_array = 'all' , get_query_payload = False ):
353+ querytype = 'OBSSUMMARY' , sub_array = 'all' ,
354+ protocol = 'VOTable-XML' ,
355+ retry = False ,
356+ get_query_payload = False , cache = True ):
224357 """
225358 Returns
226359 -------
@@ -239,9 +372,22 @@ def query_region_async(self, coordinates, radius=1 * u.deg,
239372 telescope_config = telescope_config ,
240373 obs_band = obs_band ,
241374 sub_array = sub_array ,
242- get_query_payload = get_query_payload )
375+ querytype = querytype ,
376+ protocol = protocol ,
377+ get_query_payload = get_query_payload ,
378+ retry = retry ,
379+ cache = cache )
243380
244381 def _parse_result (self , response , verbose = False ):
382+ if '<?xml' in response .text [:5 ]:
383+ return self ._parse_votable_result (response , verbose = verbose )
384+ elif '<html>' in response .text [:6 ]:
385+ return self ._parse_html_result (response , verbose = verbose )
386+ else :
387+ raise ValueError ("Unrecognized response type; it does not appear "
388+ "to be VO-XML or HTML" )
389+
390+ def _parse_votable_result (self , response , verbose = False ):
245391 if not verbose :
246392 commons .suppress_vo_warnings ()
247393
@@ -281,4 +427,26 @@ def _parse_result(self, response, verbose=False):
281427 "raw response can be found in self.response,"
282428 " and the error in self.table_parse_error." )
283429
430+ def _parse_html_result (self , response , verbose = False ):
431+ # pares the HTML return...
432+ root = BeautifulSoup (response .content , 'html5lib' )
433+
434+ htmltable = root .findAll ('table' )
435+ #if len(htmltable) != 1:
436+ # raise ValueError("Found the wrong number of tables: {0}"
437+ # .format(len(htmltable)))
438+
439+ string_to_parse = htmltable [- 1 ].encode ('ascii' )
440+
441+ if six .PY2 :
442+ from astropy .io .ascii import html
443+ from astropy .io .ascii .core import convert_numpy
444+ htmlreader = html .HTML ()
445+ htmlreader .outputter .default_converters .append (convert_numpy (np .unicode ))
446+ table = htmlreader .read (string_to_parse )
447+ else :
448+ table = Table .read (string_to_parse .decode ('utf-8' ), format = 'ascii.html' )
449+
450+ return table
451+
284452Nrao = NraoClass ()
0 commit comments