Skip to content

Commit 568f252

Browse files
committed
add a tsv parser
1 parent 6bb8a25 commit 568f252

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

astroquery/vizier/core.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import warnings
66
import json
77
import copy
8+
import re
89

910
from astropy.extern import six
1011
import astropy.units as u
@@ -13,6 +14,7 @@
1314
import astropy.utils.data as aud
1415
from astropy.utils import OrderedDict
1516
import astropy.io.votable as votable
17+
from astropy.io import ascii
1618

1719
from ..query import BaseQuery
1820
from ..utils import commons
@@ -110,10 +112,42 @@ def _server_to_url(self, return_type='votable'):
110112
FITS binary table: asu-binfits
111113
plain text: asu-txt
112114
"""
115+
116+
"""
117+
Quasi-private performance tests:
118+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
119+
1 loops, best of 3: 7.11 s per loop
120+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='votable', cache=False)
121+
1 loops, best of 3: 6.79 s per loop
122+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-fits', cache=False)
123+
1 loops, best of 3: 6.21 s per loop
124+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-binfits', cache=False)
125+
1 loops, best of 3: 667 ms per loop
126+
Looks like this one led to a segfault on their system?
127+
128+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-txt', cache=False)
129+
1 loops, best of 3: 6.83 s per loop
130+
%timeit m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
131+
1 loops, best of 3: 6.8 s per loop
132+
133+
m83tsv = Vizier.query_object_async('M83', return_type='asu-tsv', cache=False)
134+
m83votable = Vizier.query_object_async('M83', return_type='votable', cache=False)
135+
m83fits = Vizier.query_object_async('M83', return_type='asu-fits', cache=False)
136+
m83txt = Vizier.query_object_async('M83', return_type='asu-txt', cache=False)
137+
#m83binfits = Vizier.query_object_async('M83', return_type='asu-binfits', cache=False)
138+
"""
113139
# Only votable is supported now, but in case we try to support
114140
# something in the future we should disallow invalid ones.
115141
assert return_type in ('votable', 'asu-tsv', 'asu-fits',
116142
'asu-binfits', 'asu-txt')
143+
if return_type in ('asu-txt',):
144+
# I had a look at the format of these "tables" and... they just
145+
# aren't. They're quasi-fixed-width without schema. I think they
146+
# follow the general philosophy of "consistency is overrated"
147+
# The CDS reader chokes on it.
148+
raise TypeError("asu-txt is not and cannot be supported: the "
149+
"returned tables are not and cannot be made "
150+
"parseable.")
117151
return "http://" + self.VIZIER_SERVER + "/viz-bin/" + return_type
118152

119153
@property
@@ -625,6 +659,26 @@ def valid_keywords(self):
625659

626660
return self._valid_keyword_dict
627661

662+
def parse_vizier_tsvfile(data):
663+
"""
664+
Parse a Vizier-generated list of tsv data tables into a list of astropy
665+
Tables.
666+
667+
Parameters
668+
----------
669+
data : ascii str
670+
An ascii string containing the vizier-formatted list of tables
671+
"""
672+
673+
# http://stackoverflow.com/questions/4664850/find-all-occurrences-of-a-substring-in-python
674+
split_indices = [m.start() for m in re.finditer('\n\n#', data)]
675+
# we want to slice out chunks of the file each time
676+
split_limits = zip(split_indices[:-1], split_indices[1:])
677+
tables = [ascii.read(BytesIO(data[a:b]), format='tab', delimiter='\t',
678+
header_start=0, comment="#") for
679+
a,b in split_limits]
680+
return tables
681+
628682

629683
def _parse_angle(angle):
630684
"""

0 commit comments

Comments
 (0)