Skip to content

Commit 7a76890

Browse files
committed
fix votable parsing and add some performance notes
1 parent 6d2554b commit 7a76890

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

astroquery/vizier/core.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def _server_to_url(self, return_type='votable'):
115115

116116
"""
117117
Quasi-private performance tests:
118+
It seems that these are dominated by table parsing time.
118119
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
119120
1 loops, best of 3: 7.11 s per loop
120121
%timeit m83tsv = Vizier.query_object_async('M83', return_type='votable', cache=False)
@@ -135,6 +136,17 @@ def _server_to_url(self, return_type='votable'):
135136
m83fits = Vizier.query_object_async('M83', return_type='asu-fits', cache=False)
136137
m83txt = Vizier.query_object_async('M83', return_type='asu-txt', cache=False)
137138
#m83binfits = Vizier.query_object_async('M83', return_type='asu-binfits', cache=False)
139+
140+
# many of these are invalid tables
141+
%timeit fitstbls = fits.open(BytesIO(m83fits.content), ignore_missing_end=True)
142+
1 loops, best of 3: 541 ms per loop
143+
144+
%timeit tbls = parse_vizier_tsvfile(m83tsv.content)
145+
1 loops, best of 3: 1.35 s per loop
146+
147+
%timeit votbls = parse_vizier_votable(m83votable.content)
148+
1 loops, best of 3: 3.62 s per loop
149+
138150
"""
139151
# Only votable is supported now, but in case we try to support
140152
# something in the future we should disallow invalid ones.
@@ -589,9 +601,11 @@ def _parse_result(self, response, get_catalog_names=False, verbose=False,
589601
response : `requests.Response`
590602
The response of the HTTP POST request
591603
get_catalog_names : bool
604+
(only for VOTABLE queries)
592605
If specified, return only the table names (useful for table
593-
discovery)
606+
discovery).
594607
invalid : 'warn', 'mask' or 'raise'
608+
(only for VOTABLE queries)
595609
The behavior if a VOTABLE cannot be parsed. Default is 'warn',
596610
which will try to parse the table, then if an exception is raised,
597611
it will be printent but the masked table will be returned
@@ -603,7 +617,9 @@ def _parse_result(self, response, get_catalog_names=False, verbose=False,
603617
"""
604618
if response.content[:5] == '<?xml':
605619
try:
606-
return parse_vizier_votable(response.content, verbose=verbose)
620+
return parse_vizier_votable(response.content, verbose=verbose,
621+
invalid=invalid,
622+
get_catalog_names=get_catalog_names)
607623
except Exception as ex:
608624
self.response = response
609625
self.table_parse_error = ex
@@ -613,6 +629,8 @@ def _parse_result(self, response, get_catalog_names=False, verbose=False,
613629
"Exception: " + str(self.table_parse_error))
614630
elif response.content[:5] == '#\n# ':
615631
return parse_vizier_tsvfile(data, verbose=verbose)
632+
elif response.content[:6] == 'SIMPLE':
633+
return fits.open(BytesIO(response.content), ignore_missing_end=True)
616634

617635
@property
618636
def valid_keywords(self):
@@ -641,12 +659,16 @@ def parse_vizier_tsvfile(data, verbose=False):
641659
split_indices = [m.start() for m in re.finditer('\n\n#', data)]
642660
# we want to slice out chunks of the file each time
643661
split_limits = zip(split_indices[:-1], split_indices[1:])
644-
tables = [ascii.read(BytesIO(data[a:b]), format='tab', delimiter='\t',
662+
tables = [ascii.read(BytesIO(data[a:b]), format='fast_tab', delimiter='\t',
645663
header_start=0, comment="#") for
646664
a,b in split_limits]
647665
return tables
648666

649-
def parse_vizier_votable(data, verbose=False):
667+
def parse_vizier_votable(data, verbose=False, invalid='warn',
668+
get_catalog_names=False):
669+
"""
670+
Given a votable as string, parse it into tables
671+
"""
650672
if not verbose:
651673
commons.suppress_vo_warnings()
652674

0 commit comments

Comments
 (0)