55import warnings
66import json
77import copy
8+ import re
89
910from astropy .extern import six
11+ from astropy .extern .six import BytesIO
1012import astropy .units as u
1113import astropy .coordinates as coord
1214import astropy .table as tbl
1315import astropy .utils .data as aud
1416from astropy .utils import OrderedDict
1517import astropy .io .votable as votable
18+ from astropy .io import ascii
1619
1720from ..query import BaseQuery
1821from ..utils import commons
@@ -110,6 +113,54 @@ def _server_to_url(self, return_type='votable'):
110113 FITS binary table: asu-binfits
111114 plain text: asu-txt
112115 """
116+
117+ """
118+ Quasi-private performance tests:
119+ It seems that these are dominated by table parsing time.
120+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
121+ 1 loops, best of 3: 7.11 s per loop
122+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='votable', cache=False)
123+ 1 loops, best of 3: 6.79 s per loop
124+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-fits', cache=False)
125+ 1 loops, best of 3: 6.21 s per loop
126+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-binfits', cache=False)
127+ 1 loops, best of 3: 667 ms per loop
128+ Looks like this one led to a segfault on their system?
129+
130+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-txt', cache=False)
131+ 1 loops, best of 3: 6.83 s per loop
132+ %timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
133+ 1 loops, best of 3: 6.8 s per loop
134+
135+ m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
136+ m83votable = Vizier.query_object_async('M83', return_type='votable', cache=False)
137+ m83fits = Vizier.query_object_async('M83', return_type='asu-fits', cache=False)
138+ m83txt = Vizier.query_object_async('M83', return_type='asu-txt', cache=False)
139+ #m83binfits = Vizier.query_object_async('M83', return_type='asu-binfits', cache=False)
140+
141+ # many of these are invalid tables
142+ %timeit fitstbls = fits.open(BytesIO(m83fits.content), ignore_missing_end=True)
143+ 1 loops, best of 3: 541 ms per loop
144+
145+ %timeit tbls = parse_vizier_tsvfile(m83tsv.content)
146+ 1 loops, best of 3: 1.35 s per loop
147+
148+ %timeit votbls = parse_vizier_votable(m83votable.content)
149+ 1 loops, best of 3: 3.62 s per loop
150+
151+ """
152+ # Only votable is supported now, but in case we try to support
153+ # something in the future we should disallow invalid ones.
154+ assert return_type in ('votable' , 'asu-tsv' , 'asu-fits' ,
155+ 'asu-binfits' , 'asu-txt' )
156+ if return_type in ('asu-txt' ,):
157+ # I had a look at the format of these "tables" and... they just
158+ # aren't. They're quasi-fixed-width without schema. I think they
159+ # follow the general philosophy of "consistency is overrated"
160+ # The CDS reader chokes on it.
161+ raise TypeError ("asu-txt is not and cannot be supported: the "
162+ "returned tables are not and cannot be made "
163+ "parseable." )
113164 return "http://" + self .VIZIER_SERVER + "/viz-bin/" + return_type
114165
115166 @property
@@ -126,7 +177,7 @@ def keywords(self):
126177 self ._keywords = None
127178
128179 def find_catalogs (self , keywords , include_obsolete = False , verbose = False ,
129- max_catalogs = None ):
180+ max_catalogs = None , return_type = 'votable' ):
130181 """
131182 Search Vizier for catalogs based on a set of keywords, e.g. author name
132183
@@ -168,12 +219,13 @@ def find_catalogs(self, keywords, include_obsolete=False, verbose=False,
168219 if max_catalogs is not None :
169220 data_payload ['-meta.max' ] = max_catalogs
170221 response = self ._request (method = 'POST' ,
171- url = self ._server_to_url (),
222+ url = self ._server_to_url (return_type = return_type ),
172223 data = data_payload ,
173224 timeout = self .TIMEOUT )
174225 if 'STOP, Max. number of RESOURCE reached' in response .text :
175- raise ValueError ("Maximum number of catalogs exceeded. Try setting max_catalogs "
176- "to a large number and try again" )
226+ raise ValueError ("Maximum number of catalogs exceeded. Try "
227+ "setting max_catalogs to a large number and"
228+ " try again" )
177229 result = self ._parse_result (response , verbose = verbose , get_catalog_names = True )
178230
179231 # Filter out the obsolete catalogs, unless requested
@@ -185,7 +237,7 @@ def find_catalogs(self, keywords, include_obsolete=False, verbose=False,
185237
186238 return result
187239
188- def get_catalogs_async (self , catalog , verbose = False ):
240+ def get_catalogs_async (self , catalog , verbose = False , return_type = 'votable' ):
189241 """
190242 Query the Vizier service for a specific catalog
191243
@@ -202,13 +254,14 @@ def get_catalogs_async(self, catalog, verbose=False):
202254
203255 data_payload = self ._args_to_payload (catalog = catalog )
204256 response = self ._request (method = 'POST' ,
205- url = self ._server_to_url (),
257+ url = self ._server_to_url (return_type = return_type ),
206258 data = data_payload ,
207259 timeout = self .TIMEOUT )
208260 return response
209261
210262 def query_object_async (self , object_name , catalog = None , radius = None ,
211- coordinate_frame = None ):
263+ coordinate_frame = None , get_query_payload = False ,
264+ return_type = 'votable' , cache = True ):
212265 """
213266 Serves the same purpose as `query_object` but only
214267 returns the HTTP response rather than the parsed result.
@@ -248,15 +301,19 @@ def query_object_async(self, object_name, catalog=None, radius=None,
248301 data_payload = self ._args_to_payload (
249302 center = center ,
250303 catalog = catalog )
304+ if get_query_payload :
305+ return data_payload
251306 response = self ._request (method = 'POST' ,
252- url = self ._server_to_url (),
307+ url = self ._server_to_url (return_type = return_type ),
253308 data = data_payload ,
254- timeout = self .TIMEOUT )
309+ timeout = self .TIMEOUT ,
310+ cache = cache )
255311 return response
256312
257313 def query_region_async (self , coordinates , radius = None , inner_radius = None ,
258314 width = None , height = None , catalog = None ,
259- get_query_payload = False ):
315+ get_query_payload = False , cache = True ,
316+ return_type = 'votable' ):
260317 """
261318 Serves the same purpose as `query_region` but only
262319 returns the HTTP response rather than the parsed result.
@@ -374,12 +431,15 @@ def query_region_async(self, coordinates, radius=None, inner_radius=None,
374431 return data_payload
375432
376433 response = self ._request (method = 'POST' ,
377- url = self ._server_to_url (),
434+ url = self ._server_to_url (return_type = return_type ),
378435 data = data_payload ,
379- timeout = self .TIMEOUT )
436+ timeout = self .TIMEOUT ,
437+ cache = cache )
380438 return response
381439
382- def query_constraints_async (self , catalog = None , ** kwargs ):
440+ def query_constraints_async (self , catalog = None , return_type = 'votable' ,
441+ cache = True ,
442+ ** kwargs ):
383443 """
384444 Send a query to Vizier in which you specify constraints with keyword/value
385445 pairs.
@@ -437,9 +497,10 @@ def query_constraints_async(self, catalog=None, **kwargs):
437497 column_filters = kwargs ,
438498 center = {'-c.rd' : 180 })
439499 response = self ._request (method = 'POST' ,
440- url = self ._server_to_url (),
500+ url = self ._server_to_url (return_type = return_type ),
441501 data = data_payload ,
442- timeout = self .TIMEOUT )
502+ timeout = self .TIMEOUT ,
503+ cache = cache )
443504 return response
444505
445506 def _args_to_payload (self , * args , ** kwargs ):
@@ -530,7 +591,8 @@ def _args_to_payload(self, *args, **kwargs):
530591 script += "\n " + str (self .keywords )
531592 return script
532593
533- def _parse_result (self , response , get_catalog_names = False , verbose = False , invalid = 'warn' ):
594+ def _parse_result (self , response , get_catalog_names = False , verbose = False ,
595+ invalid = 'warn' ):
534596 """
535597 Parses the HTTP response to create a `~astropy.table.Table`.
536598
@@ -541,9 +603,11 @@ def _parse_result(self, response, get_catalog_names=False, verbose=False, invali
541603 response : `requests.Response`
542604 The response of the HTTP POST request
543605 get_catalog_names : bool
606+ (only for VOTABLE queries)
544607 If specified, return only the table names (useful for table
545- discovery)
608+ discovery).
546609 invalid : 'warn', 'mask' or 'raise'
610+ (only for VOTABLE queries)
547611 The behavior if a VOTABLE cannot be parsed. Default is 'warn',
548612 which will try to parse the table, then if an exception is raised,
549613 it will be printent but the masked table will be returned
@@ -553,51 +617,22 @@ def _parse_result(self, response, get_catalog_names=False, verbose=False, invali
553617 table_list : `astroquery.utils.TableList` or str
554618 If there are errors in the parsing, then returns the raw results as a string.
555619 """
556- if not verbose :
557- commons .suppress_vo_warnings ()
558- try :
559- tf = six .BytesIO (response .content )
560-
561- if invalid == 'mask' :
562- vo_tree = votable .parse (tf , pedantic = False , invalid = 'mask' )
563- elif invalid == 'warn' :
564- try :
565- vo_tree = votable .parse (tf , pedantic = False , invalid = 'raise' )
566- except Exception as ex :
567- warnings .warn ("VOTABLE parsing raised exception: {0}" .format (ex ))
568- vo_tree = votable .parse (tf , pedantic = False , invalid = 'mask' )
569- elif invalid == 'raise' :
570- vo_tree = votable .parse (tf , pedantic = False , invalid = 'raise' )
571- else :
572- raise ValueError ("Invalid keyword 'invalid'. Must be raise, mask, or warn" )
573-
574- if get_catalog_names :
575- return dict ([(R .name , R ) for R in vo_tree .resources ])
576- else :
577- table_dict = OrderedDict ()
578- for t in vo_tree .iter_tables ():
579- if len (t .array ) > 0 :
580- if t .ref is not None :
581- name = vo_tree .get_table_by_id (t .ref ).name
582- else :
583- name = t .name
584- if name not in table_dict .keys ():
585- table_dict [name ] = []
586- table_dict [name ] += [t .to_table ()]
587- for name in table_dict .keys ():
588- if len (table_dict [name ]) > 1 :
589- table_dict [name ] = tbl .vstack (table_dict [name ])
590- else :
591- table_dict [name ] = table_dict [name ][0 ]
592- return commons .TableList (table_dict )
593-
594- except Exception as ex :
595- self .response = response
596- self .table_parse_error = ex
597- raise TableParseError ("Failed to parse VIZIER result! The raw response can be found "
598- "in self.response, and the error in self.table_parse_error."
599- " The attempted parsed result is in self.parsed_result.\n "
600- "Exception: " + str (self .table_parse_error ))
620+ if response .content [:5 ] == b'<?xml' :
621+ try :
622+ return parse_vizier_votable (response .content , verbose = verbose ,
623+ invalid = invalid ,
624+ get_catalog_names = get_catalog_names )
625+ except Exception as ex :
626+ self .response = response
627+ self .table_parse_error = ex
628+ raise TableParseError ("Failed to parse VIZIER result! The raw response can be found "
629+ "in self.response, and the error in self.table_parse_error."
630+ " The attempted parsed result is in self.parsed_result.\n "
631+ "Exception: " + str (self .table_parse_error ))
632+ elif response .content [:5 ] == b'#\n # ' :
633+ return parse_vizier_tsvfile (data , verbose = verbose )
634+ elif response .content [:6 ] == b'SIMPLE' :
635+ return fits .open (BytesIO (response .content ), ignore_missing_end = True )
601636
602637 @property
603638 def valid_keywords (self ):
@@ -611,6 +646,69 @@ def valid_keywords(self):
611646
612647 return self ._valid_keyword_dict
613648
649+ def parse_vizier_tsvfile (data , verbose = False ):
650+ """
651+ Parse a Vizier-generated list of tsv data tables into a list of astropy
652+ Tables.
653+
654+ Parameters
655+ ----------
656+ data : ascii str
657+ An ascii string containing the vizier-formatted list of tables
658+ """
659+
660+ # http://stackoverflow.com/questions/4664850/find-all-occurrences-of-a-substring-in-python
661+ split_indices = [m .start () for m in re .finditer ('\n \n #' , data )]
662+ # we want to slice out chunks of the file each time
663+ split_limits = zip (split_indices [:- 1 ], split_indices [1 :])
664+ tables = [ascii .read (BytesIO (data [a :b ]), format = 'fast_tab' , delimiter = '\t ' ,
665+ header_start = 0 , comment = "#" ) for
666+ a ,b in split_limits ]
667+ return tables
668+
669+ def parse_vizier_votable (data , verbose = False , invalid = 'warn' ,
670+ get_catalog_names = False ):
671+ """
672+ Given a votable as string, parse it into tables
673+ """
674+ if not verbose :
675+ commons .suppress_vo_warnings ()
676+
677+ tf = BytesIO (data )
678+
679+ if invalid == 'mask' :
680+ vo_tree = votable .parse (tf , pedantic = False , invalid = 'mask' )
681+ elif invalid == 'warn' :
682+ try :
683+ vo_tree = votable .parse (tf , pedantic = False , invalid = 'raise' )
684+ except Exception as ex :
685+ warnings .warn ("VOTABLE parsing raised exception: {0}" .format (ex ))
686+ vo_tree = votable .parse (tf , pedantic = False , invalid = 'mask' )
687+ elif invalid == 'raise' :
688+ vo_tree = votable .parse (tf , pedantic = False , invalid = 'raise' )
689+ else :
690+ raise ValueError ("Invalid keyword 'invalid'. Must be raise, mask, or warn" )
691+
692+ if get_catalog_names :
693+ return dict ([(R .name , R ) for R in vo_tree .resources ])
694+ else :
695+ table_dict = OrderedDict ()
696+ for t in vo_tree .iter_tables ():
697+ if len (t .array ) > 0 :
698+ if t .ref is not None :
699+ name = vo_tree .get_table_by_id (t .ref ).name
700+ else :
701+ name = t .name
702+ if name not in table_dict .keys ():
703+ table_dict [name ] = []
704+ table_dict [name ] += [t .to_table ()]
705+ for name in table_dict .keys ():
706+ if len (table_dict [name ]) > 1 :
707+ table_dict [name ] = tbl .vstack (table_dict [name ])
708+ else :
709+ table_dict [name ] = table_dict [name ][0 ]
710+ return commons .TableList (table_dict )
711+
614712
615713def _parse_angle (angle ):
616714 """
0 commit comments