Skip to content

Commit 0883e92

Browse files
authored
Merge pull request #2493 from at88mph/datalink-updates
Datalink updates
2 parents f0de9d4 + fefacc7 commit 0883e92

File tree

9 files changed

+1031
-168
lines changed

9 files changed

+1031
-168
lines changed

CHANGES.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ alma
2525

2626
- Fixed a regression to handle arrays of string input for the ``query`` methods. [#2094]
2727
- Throws an error when an unsupported ``kwargs`` (or argument) is passed in to a function. [#2475]
28+
- New DataLink API handling. [#2493]
2829

2930

3031
astrometry.net

astroquery/alma/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77

88
# list the URLs here separately so they can be used in tests.
9-
_url_list = ['http://almascience.org',
9+
_url_list = ['https://almascience.org',
1010
'https://almascience.eso.org',
1111
'https://almascience.nrao.edu',
12-
'https://almascience.nao.ac.jp']
12+
'https://almascience.nao.ac.jp'
13+
]
1314

1415
auth_urls = ['asa.alma.cl', 'rh-cas.alma.cl']
1516

astroquery/alma/core.py

Lines changed: 76 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,12 @@
2929
_gen_band_list_sql, _gen_datetime_sql, _gen_pol_sql, _gen_pub_sql,\
3030
_gen_science_sql, _gen_spec_res_sql, ALMA_DATE_FORMAT
3131
from . import conf, auth_urls
32-
from astroquery.utils.commons import ASTROPY_LT_4_1
3332
from astroquery.exceptions import CorruptDataWarning
3433

3534
__all__ = {'AlmaClass', 'ALMA_BANDS'}
3635

3736
__doctest_skip__ = ['AlmaClass.*']
3837

39-
ALMA_TAP_PATH = 'tap'
40-
ALMA_SIA_PATH = 'sia2'
41-
ALMA_DATALINK_PATH = 'datalink/sync'
4238

4339
# Map from ALMA ObsCore result to ALMA original query result
4440
# The map is provided in order to preserve the name of the columns in the
@@ -159,6 +155,16 @@
159155
}
160156

161157

158+
# used to lookup the TAP service on an ARC
159+
TAP_SERVICE_PATH = 'tap'
160+
161+
# used to lookup the DataLink service on an ARC
162+
DATALINK_SERVICE_PATH = 'datalink/sync'
163+
164+
# used to lookup the SIA service on an ARC
165+
SIA_SERVICE_PATH = 'sia2'
166+
167+
162168
def _gen_sql(payload):
163169
sql = 'select * from ivoa.obscore'
164170
where = ''
@@ -210,44 +216,61 @@ def __init__(self):
210216
self._sia = None
211217
self._tap = None
212218
self._datalink = None
213-
self.sia_url = None
214-
self.tap_url = None
215-
self.datalink_url = None
219+
self._sia_url = None
220+
self._tap_url = None
221+
self._datalink_url = None
216222

217223
@property
218224
def datalink(self):
219225
if not self._datalink:
220-
base_url = self._get_dataarchive_url()
221-
if base_url.endswith('/'):
222-
self.datalink_url = base_url + ALMA_DATALINK_PATH
223-
else:
224-
self.datalink_url = base_url + '/' + ALMA_DATALINK_PATH
225-
self._datalink = pyvo.dal.adhoc.DatalinkService(
226-
baseurl=self.datalink_url)
226+
self._datalink = pyvo.dal.adhoc.DatalinkService(self.datalink_url)
227227
return self._datalink
228228

229+
@property
230+
def datalink_url(self):
231+
if not self._datalink_url:
232+
try:
233+
self._datalink_url = urljoin(self._get_dataarchive_url(), DATALINK_SERVICE_PATH)
234+
except requests.exceptions.HTTPError as err:
235+
log.debug(
236+
f"ERROR getting the ALMA Archive URL: {str(err)}")
237+
raise err
238+
return self._datalink_url
239+
229240
@property
230241
def sia(self):
231242
if not self._sia:
232-
base_url = self._get_dataarchive_url()
233-
if base_url.endswith('/'):
234-
self.sia_url = base_url + ALMA_SIA_PATH
235-
else:
236-
self.sia_url = base_url + '/' + ALMA_SIA_PATH
237243
self._sia = pyvo.dal.sia2.SIAService(baseurl=self.sia_url)
238244
return self._sia
239245

246+
@property
247+
def sia_url(self):
248+
if not self._sia_url:
249+
try:
250+
self._sia_url = urljoin(self._get_dataarchive_url(), SIA_SERVICE_PATH)
251+
except requests.exceptions.HTTPError as err:
252+
log.debug(
253+
f"ERROR getting the ALMA Archive URL: {str(err)}")
254+
raise err
255+
return self._sia_url
256+
240257
@property
241258
def tap(self):
242259
if not self._tap:
243-
base_url = self._get_dataarchive_url()
244-
if base_url.endswith('/'):
245-
self.tap_url = base_url + ALMA_TAP_PATH
246-
else:
247-
self.tap_url = base_url + '/' + ALMA_TAP_PATH
248260
self._tap = pyvo.dal.tap.TAPService(baseurl=self.tap_url)
249261
return self._tap
250262

263+
@property
264+
def tap_url(self):
265+
if not self._tap_url:
266+
try:
267+
self._tap_url = urljoin(self._get_dataarchive_url(), TAP_SERVICE_PATH)
268+
except requests.exceptions.HTTPError as err:
269+
log.debug(
270+
f"ERROR getting the ALMA Archive URL: {str(err)}")
271+
raise err
272+
return self._tap_url
273+
251274
def query_object_async(self, object_name, *, public=True,
252275
science=True, payload=None, **kwargs):
253276
"""
@@ -523,7 +546,7 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
523546
Returns
524547
-------
525548
Table with results or None. Table has the following columns: id (UID),
526-
access_url (URL to access data), content_length, content_type (MIME
549+
access_url (URL to access data), service_def, content_length, content_type (MIME
527550
type), semantics, description (optional), error_message (optional)
528551
"""
529552
if uids is None:
@@ -534,13 +557,18 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
534557
raise TypeError("Datasets must be given as a list of strings.")
535558
# TODO remove this loop and send uids at once when pyvo fixed
536559
result = None
560+
service_def_dict = {}
537561
for uid in uids:
538562
res = self.datalink.run_sync(uid)
539563
if res.status[0] != 'OK':
540564
raise Exception('ERROR {}: {}'.format(res.status[0],
541565
res.status[1]))
566+
567+
# Dictionary of service_def entries
568+
service_def_dict.update({row.service_def: row.access_url for row in res.iter_procs()})
569+
542570
temp = res.to_table()
543-
if ASTROPY_LT_4_1:
571+
if commons.ASTROPY_LT_4_1:
544572
# very annoying
545573
for col in [x for x in temp.colnames
546574
if x not in ['content_length', 'readable']]:
@@ -568,17 +596,20 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
568596
# require an extra call to the datalink service to get the list of
569597
# files.
570598
DATALINK_FILE_TYPE = 'application/x-votable+xml;content=datalink'
571-
DATALINK_SEMANTICS = '#datalink'
599+
# if expand_tarfiles:
600+
# identify the tarballs that can be expandable and replace them
601+
# with the list of components
602+
expanded_result = None
603+
to_delete = []
572604
if expand_tarfiles:
573-
# identify the tarballs that can be expandable and replace them
574-
# with the list of components
575-
expanded_result = None
576-
to_delete = []
577605
for index, row in enumerate(result):
578-
if DATALINK_SEMANTICS in row['semantics'] and \
579-
row['content_type'] == DATALINK_FILE_TYPE:
606+
# Recursive DataLink, so look for service_def
607+
if row['service_def'] and row['content_type'] == DATALINK_FILE_TYPE:
580608
# subsequent call to datalink
581-
file_id = row['access_url'].split('ID=')[1]
609+
610+
# Lookup the access_url from the service_def RESOURCE entries.
611+
recursive_access_url = service_def_dict[row['service_def']]
612+
file_id = recursive_access_url.split('ID=')[1]
582613
expanded_tar = self.get_data_info(file_id)
583614
expanded_tar = expanded_tar[
584615
expanded_tar['semantics'] != '#cutout']
@@ -587,16 +618,15 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
587618
else:
588619
expanded_result = vstack(
589620
[expanded_result, expanded_tar], join_type='exact')
621+
622+
# These DataLink entries have no access_url and are links to service_def RESOURCEs only,
623+
# so they can be removed if expanded.
590624
to_delete.append(index)
591-
# cleanup
592-
result.remove_rows(to_delete)
593-
# add the extra rows
594-
if expanded_result:
595-
result = vstack([result, expanded_result], join_type='exact')
596-
else:
597-
result = result[np.logical_or(np.core.defchararray.find(
598-
result['semantics'].astype(str), DATALINK_SEMANTICS) == -1,
599-
result['content_type'].astype(str) != DATALINK_FILE_TYPE)]
625+
# cleanup
626+
result.remove_rows(to_delete)
627+
# add the extra rows
628+
if expanded_result:
629+
result = vstack([result, expanded_result], join_type='exact')
600630

601631
return result
602632

@@ -707,7 +737,7 @@ def download_files(self, files, *, savedir=None, cache=True,
707737
if 'content-length' in check_filename.headers:
708738
length = int(check_filename.headers['content-length'])
709739
if length == 0:
710-
warnings.warn('URL {0} has length=0'.format(url))
740+
warnings.warn('URL {0} has length=0'.format(file_link))
711741
elif existing_file_length == length:
712742
log.info(f"Found cached file {filename} with expected size {existing_file_length}.")
713743
elif existing_file_length < length:
@@ -718,7 +748,7 @@ def download_files(self, files, *, savedir=None, cache=True,
718748
f"size {length}. The download is likely corrupted.",
719749
CorruptDataWarning)
720750
else:
721-
warnings.warn(f"Could not verify {url} because it has no 'content-length'")
751+
warnings.warn(f"Could not verify {file_link} because it has no 'content-length'")
722752

723753
try:
724754
if not verify_only:
@@ -1160,7 +1190,7 @@ def get_project_metadata(self, projectid, *, cache=True):
11601190
result = self.query_tap(
11611191
"select distinct proposal_abstract from "
11621192
"ivoa.obscore where proposal_id='{}'".format(projectid))
1163-
if ASTROPY_LT_4_1:
1193+
if commons.ASTROPY_LT_4_1:
11641194
return [result[0]['proposal_abstract'].astype(str)]
11651195
else:
11661196
return [result[0]['proposal_abstract']]

0 commit comments

Comments
 (0)