@@ -240,7 +240,7 @@ def _get_dataarchive_url(self):
240240 "on github." )
241241 return self .dataarchive_url
242242
243- def stage_data (self , uids ):
243+ def stage_data (self , uids , expand_tarfiles = False , return_json = False ):
244244 """
245245 Obtain table of ALMA files
246246
@@ -249,6 +249,13 @@ def stage_data(self, uids):
249249 uids : list or str
250250 A list of valid UIDs or a single UID.
251251 UIDs should have the form: 'uid://A002/X391d0b/X7b'
252+ expand_tarfiles : bool
253+ Expand the tarfiles to obtain lists of all contained files. If
254+ this is specified, the parent tarfile will *not* be included
255+ return_json : bool
256+ Return a list of the JSON data sets returned from the query. This
257+ is primarily intended as a debug routine, but may be useful if there
258+ are unusual scheduling block layouts.
252259
253260 Returns
254261 -------
@@ -280,33 +287,50 @@ def stage_data(self, uids):
280287 # this indicates a wrong server is being used;
281288 # the "pre-feb2020" stager will be phased out
282289 # when the new services are deployed
283- return self .stage_data_prefeb2020 (uids )
290+ raise RemoteServiceError ("Failed query! This shouldn't happen - please "
291+ "report the issue as it may indicate a change in "
292+ "the ALMA servers." )
284293 else :
285294 raise
286- if jdata ['type' ] != 'PROJECT' :
287- log .error ("Skipped uid {uu} because it is not a project and"
288- "lacks the appropriate metadata; it is a "
289- "{jdata}" .format (uu = uu , jdata = jdata ['type' ]))
290- continue
291- table = uid_json_to_table (jdata )
292- table ['sizeInBytes' ].unit = u .B
293- table .rename_column ('sizeInBytes' , 'size' )
294- table .add_column (Column (data = ['{dataarchive_url}/dataPortal/{name}'
295- .format (dataarchive_url = dataarchive_url ,
296- name = name )
297- for name in table ['name' ]],
298- name = 'URL' ))
299-
300- isp = self .is_proprietary (uid )
301- table .add_column (Column (data = [isp for row in table ],
302- name = 'isProprietary' ))
303-
304- tables .append (table )
305- log .debug ("Completed metadata retrieval for {0}" .format (uu ))
295+
296+ if return_json :
297+ tables .append (jdata )
298+ else :
299+ if jdata ['type' ] != 'PROJECT' :
300+ log .error ("Skipped uid {uu} because it is not a project and"
301+ "lacks the appropriate metadata; it is a "
302+ "{jdata}" .format (uu = uu , jdata = jdata ['type' ]))
303+ continue
304+ if expand_tarfiles :
305+ table = uid_json_to_table (jdata , productlist = ['ASDM' ,
306+ 'PIPELINE_PRODUCT' ])
307+ else :
308+ table = uid_json_to_table (jdata ,
309+ productlist = ['ASDM' ,
310+ 'PIPELINE_PRODUCT'
311+ 'PIPELINE_PRODUCT_TARFILE' ,
312+ 'PIPELINE_AUXILIARY_TARFILE' ])
313+ table ['sizeInBytes' ].unit = u .B
314+ table .rename_column ('sizeInBytes' , 'size' )
315+ table .add_column (Column (data = ['{dataarchive_url}/dataPortal/{name}'
316+ .format (dataarchive_url = dataarchive_url ,
317+ name = name )
318+ for name in table ['name' ]],
319+ name = 'URL' ))
320+
321+ isp = self .is_proprietary (uid )
322+ table .add_column (Column (data = [isp for row in table ],
323+ name = 'isProprietary' ))
324+
325+ tables .append (table )
326+ log .debug ("Completed metadata retrieval for {0}" .format (uu ))
306327
307328 if len (tables ) == 0 :
308329 raise ValueError ("No valid UIDs supplied." )
309330
331+ if return_json :
332+ return tables
333+
310334 table = table_vstack (tables )
311335
312336 return table
@@ -330,167 +354,6 @@ def is_proprietary(self, uid):
330354
331355 return isp
332356
333- def stage_data_prefeb2020 (self , uids ):
334- """
335- Stage ALMA data - old server style
336-
337- NOTE: this method will be removed when a new ALMA service is deployed
338- in March 2020
339-
340- Parameters
341- ----------
342- uids : list or str
343- A list of valid UIDs or a single UID.
344- UIDs should have the form: 'uid://A002/X391d0b/X7b'
345-
346- Returns
347- -------
348- data_file_table : Table
349- A table containing 3 columns: the UID, the file URL (for future
350- downloading), and the file size
351- """
352-
353- """
354- With log.set_level(10)
355- INFO: Staging files... [astroquery.alma.core]
356- DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
357- DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
358- DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
359- DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
360- DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
361- .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
362- """
363-
364- import time
365- from requests import HTTPError
366- from ..utils import url_helpers
367- import sys
368- from six .moves .urllib_parse import urlparse
369-
370- if isinstance (uids , six .string_types + (np .bytes_ ,)):
371- uids = [uids ]
372- if not isinstance (uids , (list , tuple , np .ndarray )):
373- raise TypeError ("Datasets must be given as a list of strings." )
374-
375- log .info ("Staging files..." )
376-
377- self ._get_dataarchive_url ()
378-
379- url = urljoin (self ._get_dataarchive_url (), 'rh/submission' )
380- log .debug ("First request URL: {0}" .format (url ))
381- # 'ALMA+uid___A002_X391d0b_X7b'
382- payload = {'dataset' : ['ALMA+' + clean_uid (uid ) for uid in uids ]}
383- log .debug ("First request payload: {0}" .format (payload ))
384-
385- self ._staging_log = {'first_post_url' : url }
386-
387- # Request staging for the UIDs
388- # This component cannot be cached, since the returned data can change
389- # if new data are uploaded
390- response = self ._request ('POST' , url , data = payload ,
391- timeout = self .TIMEOUT , cache = False )
392- self ._staging_log ['initial_response' ] = response
393- log .debug ("First response URL: {0}" .format (response .url ))
394- if 'login' in response .url :
395- raise ValueError ("You must login before downloading this data set." )
396-
397- if response .status_code == 405 :
398- if hasattr (self , '_last_successful_staging_log' ):
399- log .warning ("Error 405 received. If you have previously staged "
400- "the same UIDs, the result returned is probably "
401- "correct, otherwise you may need to create a fresh "
402- "astroquery.Alma instance." )
403- return self ._last_successful_staging_log ['result' ]
404- else :
405- raise HTTPError ("Received an error 405: this may indicate you "
406- "have already staged the data. Try downloading "
407- "the file URLs directly with download_files." )
408- response .raise_for_status ()
409-
410- if 'j_spring_cas_security_check' in response .url :
411- time .sleep (1 )
412- # CANNOT cache this stage: it not a real data page! results in
413- # infinite loops
414- response = self ._request ('POST' , url , data = payload ,
415- timeout = self .TIMEOUT , cache = False )
416- self ._staging_log ['initial_response' ] = response
417- if 'j_spring_cas_security_check' in response .url :
418- log .warning ("Staging request was not successful. Try again?" )
419- response .raise_for_status ()
420-
421- if 'j_spring_cas_security_check' in response .url :
422- raise RemoteServiceError ("Could not access data. This error "
423- "can arise if the data are private and "
424- "you do not have access rights or are "
425- "not logged in." )
426-
427- # make sure the URL is formatted as expected, otherwise the request ID
428- # will be wrong
429- # (the request ID can also be found from the javascript in the request
430- # response)
431- if response .url .split ("/" )[- 1 ] == 'submission' :
432- request_id = response .url .split ("/" )[- 2 ]
433- self ._staging_log ['request_id' ] = request_id
434- log .debug ("Request ID: {0}" .format (request_id ))
435-
436- # Submit a request for the specific request ID identified above
437- submission_url = urljoin (self ._get_dataarchive_url (),
438- url_helpers .join ('rh/submission' , request_id ))
439- log .debug ("Submission URL: {0}" .format (submission_url ))
440- self ._staging_log ['submission_url' ] = submission_url
441- staging_submission = self ._request ('GET' , submission_url , cache = True )
442- self ._staging_log ['staging_submission' ] = staging_submission
443- staging_submission .raise_for_status ()
444-
445- data_page_url = staging_submission .url
446- elif response .url .split ("/" )[- 3 ] == 'requests' :
447- data_page_url = response .url
448-
449- self ._staging_log ['data_page_url' ] = data_page_url
450- dpid = data_page_url .split ("/" )[- 1 ]
451- self ._staging_log ['staging_page_id' ] = dpid
452-
453- # CANNOT cache this step: please_wait will happen infinitely
454- data_page = self ._request ('GET' , data_page_url , cache = False )
455- self ._staging_log ['data_page' ] = data_page
456- data_page .raise_for_status ()
457-
458- has_completed = False
459- while not has_completed :
460- time .sleep (1 )
461- summary = self ._request ('GET' , url_helpers .join (data_page_url ,
462- 'summary' ),
463- cache = False )
464- summary .raise_for_status ()
465- print ("." , end = '' )
466- sys .stdout .flush ()
467- has_completed = summary .json ()['complete' ]
468-
469- self ._staging_log ['summary' ] = summary
470- summary .raise_for_status ()
471- self ._staging_log ['json_data' ] = json_data = summary .json ()
472-
473- username = self .USERNAME if self .USERNAME else 'anonymous'
474-
475- # templates:
476- # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
477- # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
478- # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar
479-
480- url_decomposed = urlparse (data_page_url )
481- base_url = ('{uri.scheme}://{uri.netloc}/'
482- 'dataPortal/requests/{username}/'
483- '{staging_page_id}/ALMA' .format (uri = url_decomposed ,
484- staging_page_id = dpid ,
485- username = username ,
486- ))
487- tbl = self ._json_summary_to_table (json_data , base_url = base_url )
488- self ._staging_log ['result' ] = tbl
489- self ._staging_log ['file_urls' ] = tbl ['URL' ]
490- self ._last_successful_staging_log = self ._staging_log
491-
492- return tbl
493-
494357 def _HEADER_data_size (self , files ):
495358 """
496359 Given a list of file URLs, return the data size. This is useful for
@@ -1088,53 +951,6 @@ def _validate_payload(self, payload):
1088951 " by the ALMA query service:"
1089952 " {0}" .format (invalid_params ))
1090953
1091- def _json_summary_to_table (self , data , base_url ):
1092- """
1093- Special tool to convert some JSON metadata to a table Obsolete as of
1094- March 2020 - should be removed along with stage_data_prefeb2020
1095- """
1096- from ..utils import url_helpers
1097- from six import iteritems
1098- columns = {'mous_uid' : [], 'URL' : [], 'size' : []}
1099- for entry in data ['node_data' ]:
1100- # de_type can be useful (e.g., MOUS), but it is not necessarily
1101- # specified
1102- # file_name and file_key *must* be specified.
1103- is_file = (entry ['file_name' ] != 'null' and
1104- entry ['file_key' ] != 'null' )
1105- if is_file :
1106- # "de_name": "ALMA+uid://A001/X122/X35e",
1107- columns ['mous_uid' ].append (entry ['de_name' ][5 :])
1108- if entry ['file_size' ] == 'null' :
1109- columns ['size' ].append (np .nan * u .Gbyte )
1110- else :
1111- columns ['size' ].append (
1112- (int (entry ['file_size' ]) * u .B ).to (u .Gbyte ))
1113- # example template for constructing url:
1114- # https://almascience.eso.org/dataPortal/requests/keflavich/940238268/ALMA/
1115- # uid___A002_X9d6f4c_X154/2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar
1116- # above is WRONG... except for ASDMs, when it's right
1117- # should be:
1118- # 2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar/2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar
1119- #
1120- # apparently ASDMs are different from others:
1121- # templates:
1122- # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
1123- # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
1124- # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar
1125- url = url_helpers .join (base_url ,
1126- entry ['file_key' ],
1127- entry ['file_name' ])
1128- if 'null' in url :
1129- raise ValueError ("The URL {0} was created containing "
1130- "'null', which is invalid." .format (url ))
1131- columns ['URL' ].append (url )
1132-
1133- columns ['size' ] = u .Quantity (columns ['size' ], u .Gbyte )
1134-
1135- tbl = Table ([Column (name = k , data = v ) for k , v in iteritems (columns )])
1136- return tbl
1137-
1138954 def get_project_metadata (self , projectid , cache = True ):
1139955 """
1140956 Get the metadata - specifically, the project abstract - for a given project ID.
0 commit comments