158
158
# used to lookup the TAP service on an ARC
159
159
TAP_SERVICE_PATH = 'tap'
160
160
161
+ # standard ID URI to look for when expanding TAR files
162
+ DATALINK_STANDARD_ID = 'ivo://ivoa.net/std/DataLink#links-1.0'
163
+
161
164
# used to lookup the DataLink service on an ARC
162
165
DATALINK_SERVICE_PATH = 'datalink/sync'
163
166
@@ -503,7 +506,7 @@ def _get_dataarchive_url(self):
503
506
"""
504
507
if not hasattr (self , 'dataarchive_url' ):
505
508
if self .archive_url in ('http://almascience.org' , 'https://almascience.org' ):
506
- response = self ._request ('GET' , self .archive_url ,
509
+ response = self ._request ('GET' , self .archive_url , timeout = self . TIMEOUT ,
507
510
cache = False )
508
511
response .raise_for_status ()
509
512
# Jan 2017: we have to force https because the archive doesn't
@@ -557,15 +560,18 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
557
560
raise TypeError ("Datasets must be given as a list of strings." )
558
561
# TODO remove this loop and send uids at once when pyvo fixed
559
562
result = None
560
- service_def_dict = {}
563
+ datalink_service_def_dict = {}
561
564
for uid in uids :
562
565
res = self .datalink .run_sync (uid )
563
566
if res .status [0 ] != 'OK' :
564
567
raise Exception ('ERROR {}: {}' .format (res .status [0 ],
565
568
res .status [1 ]))
566
569
567
- # Dictionary of service_def entries
568
- service_def_dict .update ({row .service_def : row .access_url for row in res .iter_procs ()})
570
+ # Collect the ad-hoc DataLink services for later retrieval if expand_tarballs is set
571
+ if expand_tarfiles :
572
+ for adhoc_service in res .iter_adhocservices ():
573
+ if self .is_datalink_adhoc_service (adhoc_service ):
574
+ datalink_service_def_dict [adhoc_service .ID ] = adhoc_service
569
575
570
576
temp = res .to_table ()
571
577
if commons .ASTROPY_LT_4_1 :
@@ -589,26 +595,19 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
589
595
if not with_rawdata :
590
596
result = result [np .core .defchararray .find (
591
597
result ['semantics' ], '#progenitor' ) == - 1 ]
592
- # primary data delivery type is files packaged in tarballs. However
593
- # some type of data has an alternative way to retrieve each individual
594
- # file as an alternative (semantics='#datalink' and
595
- # 'content_type=application/x-votable+xml;content=datalink'). They also
596
- # require an extra call to the datalink service to get the list of
597
- # files.
598
- DATALINK_FILE_TYPE = 'application/x-votable+xml;content=datalink'
599
598
# if expand_tarfiles:
600
599
# identify the tarballs that can be expandable and replace them
601
600
# with the list of components
602
601
expanded_result = None
603
602
to_delete = []
604
603
if expand_tarfiles :
605
604
for index , row in enumerate (result ):
606
- # Recursive DataLink, so look for service_def
607
- if row ['service_def' ] and row ['content_type' ] == DATALINK_FILE_TYPE :
605
+ service_def_id = row ['service_def' ]
606
+ # service_def record, so check if it points to a DataLink document
607
+ if service_def_id and service_def_id in datalink_service_def_dict :
608
608
# subsequent call to datalink
609
-
610
- # Lookup the access_url from the service_def RESOURCE entries.
611
- recursive_access_url = service_def_dict [row ['service_def' ]]
609
+ adhoc_service = datalink_service_def_dict [service_def_id ]
610
+ recursive_access_url = self .get_adhoc_service_access_url (adhoc_service )
612
611
file_id = recursive_access_url .split ('ID=' )[1 ]
613
612
expanded_tar = self .get_data_info (file_id )
614
613
expanded_tar = expanded_tar [
@@ -630,6 +629,18 @@ def get_data_info(self, uids, *, expand_tarfiles=False,
630
629
631
630
return result
632
631
632
+ def is_datalink_adhoc_service (self , adhoc_service ):
633
+ standard_id = self .get_adhoc_service_parameter (adhoc_service , 'standardID' )
634
+ return standard_id == DATALINK_STANDARD_ID
635
+
636
+ def get_adhoc_service_access_url (self , adhoc_service ):
637
+ return self .get_adhoc_service_parameter (adhoc_service , 'accessURL' )
638
+
639
+ def get_adhoc_service_parameter (self , adhoc_service , parameter_id ):
640
+ for p in adhoc_service .params :
641
+ if p .ID == parameter_id :
642
+ return p .value
643
+
633
644
def is_proprietary (self , uid ):
634
645
"""
635
646
Given an ALMA UID, query the servers to determine whether it is
@@ -708,7 +719,7 @@ def download_files(self, files, *, savedir=None, cache=True,
708
719
for file_link in unique (files ):
709
720
log .debug ("Downloading {0} to {1}" .format (file_link , savedir ))
710
721
try :
711
- check_filename = self ._request ('HEAD' , file_link , auth = auth )
722
+ check_filename = self ._request ('HEAD' , file_link , auth = auth , timeout = self . TIMEOUT )
712
723
check_filename .raise_for_status ()
713
724
except requests .HTTPError as ex :
714
725
if ex .response .status_code == 401 :
@@ -988,7 +999,7 @@ def _cycle0_tarfile_content(self):
988
999
if not hasattr (self , '_cycle0_tarfile_content_table' ):
989
1000
url = urljoin (self ._get_dataarchive_url (),
990
1001
'alma-data/archive/cycle-0-tarfile-content' )
991
- response = self ._request ('GET' , url , cache = True )
1002
+ response = self ._request ('GET' , url , cache = True , timeout = self . TIMEOUT )
992
1003
993
1004
# html.parser is needed because some <tr>'s have form:
994
1005
# <tr width="blah"> which the default parser does not pick up
0 commit comments