Skip to content

Commit e2f3b5d

Browse files
authored
Merge pull request #2535 from at88mph/alma-timeouts
BUG: fix for alma timeout
2 parents 8898b54 + 5a33aa1 commit e2f3b5d

File tree

4 files changed

+60
-41
lines changed

4 files changed

+60
-41
lines changed

astroquery/alma/core.py

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@
158158
# used to lookup the TAP service on an ARC
159159
TAP_SERVICE_PATH = 'tap'
160160

161+
# standard ID URI to look for when expanding TAR files
162+
DATALINK_STANDARD_ID = 'ivo://ivoa.net/std/DataLink#links-1.0'
163+
161164
# used to lookup the DataLink service on an ARC
162165
DATALINK_SERVICE_PATH = 'datalink/sync'
163166

@@ -503,7 +506,7 @@ def _get_dataarchive_url(self):
503506
"""
504507
if not hasattr(self, 'dataarchive_url'):
505508
if self.archive_url in ('http://almascience.org', 'https://almascience.org'):
506-
response = self._request('GET', self.archive_url,
509+
response = self._request('GET', self.archive_url, timeout=self.TIMEOUT,
507510
cache=False)
508511
response.raise_for_status()
509512
# Jan 2017: we have to force https because the archive doesn't
@@ -557,15 +560,18 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
557560
raise TypeError("Datasets must be given as a list of strings.")
558561
# TODO remove this loop and send uids at once when pyvo fixed
559562
result = None
560-
service_def_dict = {}
563+
datalink_service_def_dict = {}
561564
for uid in uids:
562565
res = self.datalink.run_sync(uid)
563566
if res.status[0] != 'OK':
564567
raise Exception('ERROR {}: {}'.format(res.status[0],
565568
res.status[1]))
566569

567-
# Dictionary of service_def entries
568-
service_def_dict.update({row.service_def: row.access_url for row in res.iter_procs()})
570+
# Collect the ad-hoc DataLink services for later retrieval if expand_tarballs is set
571+
if expand_tarfiles:
572+
for adhoc_service in res.iter_adhocservices():
573+
if self.is_datalink_adhoc_service(adhoc_service):
574+
datalink_service_def_dict[adhoc_service.ID] = adhoc_service
569575

570576
temp = res.to_table()
571577
if commons.ASTROPY_LT_4_1:
@@ -589,26 +595,19 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
589595
if not with_rawdata:
590596
result = result[np.core.defchararray.find(
591597
result['semantics'], '#progenitor') == -1]
592-
# primary data delivery type is files packaged in tarballs. However
593-
# some type of data has an alternative way to retrieve each individual
594-
# file as an alternative (semantics='#datalink' and
595-
# 'content_type=application/x-votable+xml;content=datalink'). They also
596-
# require an extra call to the datalink service to get the list of
597-
# files.
598-
DATALINK_FILE_TYPE = 'application/x-votable+xml;content=datalink'
599598
# if expand_tarfiles:
600599
# identify the tarballs that can be expandable and replace them
601600
# with the list of components
602601
expanded_result = None
603602
to_delete = []
604603
if expand_tarfiles:
605604
for index, row in enumerate(result):
606-
# Recursive DataLink, so look for service_def
607-
if row['service_def'] and row['content_type'] == DATALINK_FILE_TYPE:
605+
service_def_id = row['service_def']
606+
# service_def record, so check if it points to a DataLink document
607+
if service_def_id and service_def_id in datalink_service_def_dict:
608608
# subsequent call to datalink
609-
610-
# Lookup the access_url from the service_def RESOURCE entries.
611-
recursive_access_url = service_def_dict[row['service_def']]
609+
adhoc_service = datalink_service_def_dict[service_def_id]
610+
recursive_access_url = self.get_adhoc_service_access_url(adhoc_service)
612611
file_id = recursive_access_url.split('ID=')[1]
613612
expanded_tar = self.get_data_info(file_id)
614613
expanded_tar = expanded_tar[
@@ -630,6 +629,18 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
630629

631630
return result
632631

632+
def is_datalink_adhoc_service(self, adhoc_service):
633+
standard_id = self.get_adhoc_service_parameter(adhoc_service, 'standardID')
634+
return standard_id == DATALINK_STANDARD_ID
635+
636+
def get_adhoc_service_access_url(self, adhoc_service):
637+
return self.get_adhoc_service_parameter(adhoc_service, 'accessURL')
638+
639+
def get_adhoc_service_parameter(self, adhoc_service, parameter_id):
640+
for p in adhoc_service.params:
641+
if p.ID == parameter_id:
642+
return p.value
643+
633644
def is_proprietary(self, uid):
634645
"""
635646
Given an ALMA UID, query the servers to determine whether it is
@@ -708,7 +719,7 @@ def download_files(self, files, *, savedir=None, cache=True,
708719
for file_link in unique(files):
709720
log.debug("Downloading {0} to {1}".format(file_link, savedir))
710721
try:
711-
check_filename = self._request('HEAD', file_link, auth=auth)
722+
check_filename = self._request('HEAD', file_link, auth=auth, timeout=self.TIMEOUT)
712723
check_filename.raise_for_status()
713724
except requests.HTTPError as ex:
714725
if ex.response.status_code == 401:
@@ -988,7 +999,7 @@ def _cycle0_tarfile_content(self):
988999
if not hasattr(self, '_cycle0_tarfile_content_table'):
9891000
url = urljoin(self._get_dataarchive_url(),
9901001
'alma-data/archive/cycle-0-tarfile-content')
991-
response = self._request('GET', url, cache=True)
1002+
response = self._request('GET', url, cache=True, timeout=self.TIMEOUT)
9921003

9931004
# html.parser is needed because some <tr>'s have form:
9941005
# <tr width="blah"> which the default parser does not pick up

astroquery/alma/tests/test_alma.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -453,31 +453,32 @@ def _test_datalink_url(data_archive_url):
453453

454454

455455
def test_get_data_info():
456-
datalink_mock = Mock()
457-
dl_result = Table.read(data_path('alma-datalink.xml'), format='votable')
458-
459-
# Emulate the DatalinkResults
460-
service_def_1 = type('', (object, ), {'service_def': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar', 'access_url': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar'})()
461-
service_def_2 = type('', (object, ), {'service_def': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar', 'access_url': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar'})()
456+
class MockDataLinkService:
457+
def run_sync(self, uid):
458+
return _mocked_datalink_sync(uid)
462459

463-
mock_response = Mock(to_table=Mock(return_value=dl_result), iter_procs=Mock(return_value=[service_def_1, service_def_2]))
464-
mock_response.status = ['OK']
465-
datalink_mock.run_sync.return_value = mock_response
466460
alma = Alma()
467461
alma._get_dataarchive_url = Mock()
468-
alma._datalink = datalink_mock
462+
alma._datalink = MockDataLinkService()
469463
result = alma.get_data_info(uids='uid://A001/X12a3/Xe9')
470464
assert len(result) == 9
471465

472-
datalink_mock.run_sync.assert_called_once_with('uid://A001/X12a3/Xe9')
473-
474466

475467
# This method will be used by the mock in test_get_data_info_expand_tarfiles to replace requests.get
476468
def _mocked_datalink_sync(*args, **kwargs):
477469
class MockResponse:
478-
# Emulate the DatalinkResults
479-
service_def_1 = type('', (object, ), {'service_def': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar', 'access_url': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar'})()
480-
service_def_2 = type('', (object, ), {'service_def': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar', 'access_url': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar'})()
470+
adhoc_service_1_param1 = type('', (object, ), {'ID': 'standardID', 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})()
471+
adhoc_service_1_param2 = type('', (object, ), {'ID': 'accessURL', 'value': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar'})()
472+
adhoc_service_1 = type('', (object, ), {'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar', 'params': [adhoc_service_1_param1, adhoc_service_1_param2]})()
473+
474+
adhoc_service_2_param1 = type('', (object, ), {'ID': 'standardID', 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})()
475+
adhoc_service_2_param2 = type('', (object, ), {'ID': 'accessURL', 'value': 'https://almascience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar'})()
476+
adhoc_service_2 = type('', (object, ), {'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar', 'params': [adhoc_service_1_param1, adhoc_service_1_param2]})()
477+
478+
adhoc_services = {
479+
'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar': adhoc_service_1,
480+
'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar': adhoc_service_2
481+
}
481482

482483
def __init__(self, table):
483484
self.table = table
@@ -489,8 +490,11 @@ def to_table(self):
489490
def status(self):
490491
return ['OK']
491492

492-
def iter_procs(self):
493-
return [self.service_def_1, self.service_def_2]
493+
def iter_adhocservices(self):
494+
return [self.adhoc_service_1, self.adhoc_service_2]
495+
496+
def get_adhocservice_by_id(self, adhoc_service_id):
497+
return self.adhoc_services[adhoc_service_id]
494498

495499
print(f"\n\nFOUND ARGS {args}\n\n")
496500

@@ -514,8 +518,8 @@ def run_sync(self, uid):
514518
alma._datalink = MockDataLinkService()
515519
result = alma.get_data_info(uids='uid://A001/X12a3/Xe9', expand_tarfiles=True)
516520

517-
# Entire expanded structure is 61 links long.
518-
assert len(result) == 61
521+
# Entire expanded structure is 19 links long.
522+
assert len(result) == 19
519523

520524

521525
def test_galactic_query():

astroquery/alma/tests/test_alma_remote.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,13 @@ def test_data_info(self, tmp_path, alma):
185185
download_files_mock = Mock()
186186
alma.download_files = download_files_mock
187187
alma.retrieve_data_from_uid([uid])
188-
189-
comparison = download_files_mock.mock_calls[0][1] == data_info_tar['access_url']
190-
assert comparison.all()
188+
trimmed_access_url_list = [e for e in data_info_tar['access_url'].data if len(e) > 0]
189+
trimmed_access_urls = (trimmed_access_url_list,)
190+
mock_calls = download_files_mock.mock_calls[0][1]
191+
print(f"\n\nComparing {mock_calls} to {trimmed_access_urls}\n\n")
192+
# comparison = download_files_mock.mock_calls[0][1] == data_info_tar['access_url']
193+
assert mock_calls == trimmed_access_urls
194+
# assert comparison.all()
191195

192196
def test_download_data(self, tmp_path, alma):
193197
# test only fits files from a program

astroquery/query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def _request(self, method, url,
293293
local_filename = local_filename.replace(':', '_')
294294
local_filepath = os.path.join(savedir or self.cache_location or '.', local_filename)
295295

296-
response = self._download_file(url, local_filepath, cache=cache,
296+
response = self._download_file(url, local_filepath, cache=cache, timeout=timeout,
297297
continuation=continuation, method=method,
298298
allow_redirects=allow_redirects,
299299
auth=auth, **req_kwargs)

0 commit comments

Comments
 (0)