@@ -240,7 +240,7 @@ def _get_dataarchive_url(self):
240
240
"on github." )
241
241
return self .dataarchive_url
242
242
243
- def stage_data (self , uids ):
243
+ def stage_data (self , uids , expand_tarfiles = False , return_json = False ):
244
244
"""
245
245
Obtain table of ALMA files
246
246
@@ -249,6 +249,13 @@ def stage_data(self, uids):
249
249
uids : list or str
250
250
A list of valid UIDs or a single UID.
251
251
UIDs should have the form: 'uid://A002/X391d0b/X7b'
252
+ expand_tarfiles : bool
253
+ Expand the tarfiles to obtain lists of all contained files. If
254
+ this is specified, the parent tarfile will *not* be included
255
+ return_json : bool
256
+ Return a list of the JSON data sets returned from the query. This
257
+ is primarily intended as a debug routine, but may be useful if there
258
+ are unusual scheduling block layouts.
252
259
253
260
Returns
254
261
-------
@@ -280,33 +287,50 @@ def stage_data(self, uids):
280
287
# this indicates a wrong server is being used;
281
288
# the "pre-feb2020" stager will be phased out
282
289
# when the new services are deployed
283
- return self .stage_data_prefeb2020 (uids )
290
+ raise RemoteServiceError ("Failed query! This shouldn't happen - please "
291
+ "report the issue as it may indicate a change in "
292
+ "the ALMA servers." )
284
293
else :
285
294
raise
286
- if jdata ['type' ] != 'PROJECT' :
287
- log .error ("Skipped uid {uu} because it is not a project and"
288
- "lacks the appropriate metadata; it is a "
289
- "{jdata}" .format (uu = uu , jdata = jdata ['type' ]))
290
- continue
291
- table = uid_json_to_table (jdata )
292
- table ['sizeInBytes' ].unit = u .B
293
- table .rename_column ('sizeInBytes' , 'size' )
294
- table .add_column (Column (data = ['{dataarchive_url}/dataPortal/{name}'
295
- .format (dataarchive_url = dataarchive_url ,
296
- name = name )
297
- for name in table ['name' ]],
298
- name = 'URL' ))
299
-
300
- isp = self .is_proprietary (uid )
301
- table .add_column (Column (data = [isp for row in table ],
302
- name = 'isProprietary' ))
303
-
304
- tables .append (table )
305
- log .debug ("Completed metadata retrieval for {0}" .format (uu ))
295
+
296
+ if return_json :
297
+ tables .append (jdata )
298
+ else :
299
+ if jdata ['type' ] != 'PROJECT' :
300
+ log .error ("Skipped uid {uu} because it is not a project and"
301
+ "lacks the appropriate metadata; it is a "
302
+ "{jdata}" .format (uu = uu , jdata = jdata ['type' ]))
303
+ continue
304
+ if expand_tarfiles :
305
+ table = uid_json_to_table (jdata , productlist = ['ASDM' ,
306
+ 'PIPELINE_PRODUCT' ])
307
+ else :
308
+ table = uid_json_to_table (jdata ,
309
+ productlist = ['ASDM' ,
310
+ 'PIPELINE_PRODUCT'
311
+ 'PIPELINE_PRODUCT_TARFILE' ,
312
+ 'PIPELINE_AUXILIARY_TARFILE' ])
313
+ table ['sizeInBytes' ].unit = u .B
314
+ table .rename_column ('sizeInBytes' , 'size' )
315
+ table .add_column (Column (data = ['{dataarchive_url}/dataPortal/{name}'
316
+ .format (dataarchive_url = dataarchive_url ,
317
+ name = name )
318
+ for name in table ['name' ]],
319
+ name = 'URL' ))
320
+
321
+ isp = self .is_proprietary (uid )
322
+ table .add_column (Column (data = [isp for row in table ],
323
+ name = 'isProprietary' ))
324
+
325
+ tables .append (table )
326
+ log .debug ("Completed metadata retrieval for {0}" .format (uu ))
306
327
307
328
if len (tables ) == 0 :
308
329
raise ValueError ("No valid UIDs supplied." )
309
330
331
+ if return_json :
332
+ return tables
333
+
310
334
table = table_vstack (tables )
311
335
312
336
return table
@@ -330,167 +354,6 @@ def is_proprietary(self, uid):
330
354
331
355
return isp
332
356
333
- def stage_data_prefeb2020 (self , uids ):
334
- """
335
- Stage ALMA data - old server style
336
-
337
- NOTE: this method will be removed when a new ALMA service is deployed
338
- in March 2020
339
-
340
- Parameters
341
- ----------
342
- uids : list or str
343
- A list of valid UIDs or a single UID.
344
- UIDs should have the form: 'uid://A002/X391d0b/X7b'
345
-
346
- Returns
347
- -------
348
- data_file_table : Table
349
- A table containing 3 columns: the UID, the file URL (for future
350
- downloading), and the file size
351
- """
352
-
353
- """
354
- With log.set_level(10)
355
- INFO: Staging files... [astroquery.alma.core]
356
- DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
357
- DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
358
- DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
359
- DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
360
- DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
361
- .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
362
- """
363
-
364
- import time
365
- from requests import HTTPError
366
- from ..utils import url_helpers
367
- import sys
368
- from six .moves .urllib_parse import urlparse
369
-
370
- if isinstance (uids , six .string_types + (np .bytes_ ,)):
371
- uids = [uids ]
372
- if not isinstance (uids , (list , tuple , np .ndarray )):
373
- raise TypeError ("Datasets must be given as a list of strings." )
374
-
375
- log .info ("Staging files..." )
376
-
377
- self ._get_dataarchive_url ()
378
-
379
- url = urljoin (self ._get_dataarchive_url (), 'rh/submission' )
380
- log .debug ("First request URL: {0}" .format (url ))
381
- # 'ALMA+uid___A002_X391d0b_X7b'
382
- payload = {'dataset' : ['ALMA+' + clean_uid (uid ) for uid in uids ]}
383
- log .debug ("First request payload: {0}" .format (payload ))
384
-
385
- self ._staging_log = {'first_post_url' : url }
386
-
387
- # Request staging for the UIDs
388
- # This component cannot be cached, since the returned data can change
389
- # if new data are uploaded
390
- response = self ._request ('POST' , url , data = payload ,
391
- timeout = self .TIMEOUT , cache = False )
392
- self ._staging_log ['initial_response' ] = response
393
- log .debug ("First response URL: {0}" .format (response .url ))
394
- if 'login' in response .url :
395
- raise ValueError ("You must login before downloading this data set." )
396
-
397
- if response .status_code == 405 :
398
- if hasattr (self , '_last_successful_staging_log' ):
399
- log .warning ("Error 405 received. If you have previously staged "
400
- "the same UIDs, the result returned is probably "
401
- "correct, otherwise you may need to create a fresh "
402
- "astroquery.Alma instance." )
403
- return self ._last_successful_staging_log ['result' ]
404
- else :
405
- raise HTTPError ("Received an error 405: this may indicate you "
406
- "have already staged the data. Try downloading "
407
- "the file URLs directly with download_files." )
408
- response .raise_for_status ()
409
-
410
- if 'j_spring_cas_security_check' in response .url :
411
- time .sleep (1 )
412
- # CANNOT cache this stage: it not a real data page! results in
413
- # infinite loops
414
- response = self ._request ('POST' , url , data = payload ,
415
- timeout = self .TIMEOUT , cache = False )
416
- self ._staging_log ['initial_response' ] = response
417
- if 'j_spring_cas_security_check' in response .url :
418
- log .warning ("Staging request was not successful. Try again?" )
419
- response .raise_for_status ()
420
-
421
- if 'j_spring_cas_security_check' in response .url :
422
- raise RemoteServiceError ("Could not access data. This error "
423
- "can arise if the data are private and "
424
- "you do not have access rights or are "
425
- "not logged in." )
426
-
427
- # make sure the URL is formatted as expected, otherwise the request ID
428
- # will be wrong
429
- # (the request ID can also be found from the javascript in the request
430
- # response)
431
- if response .url .split ("/" )[- 1 ] == 'submission' :
432
- request_id = response .url .split ("/" )[- 2 ]
433
- self ._staging_log ['request_id' ] = request_id
434
- log .debug ("Request ID: {0}" .format (request_id ))
435
-
436
- # Submit a request for the specific request ID identified above
437
- submission_url = urljoin (self ._get_dataarchive_url (),
438
- url_helpers .join ('rh/submission' , request_id ))
439
- log .debug ("Submission URL: {0}" .format (submission_url ))
440
- self ._staging_log ['submission_url' ] = submission_url
441
- staging_submission = self ._request ('GET' , submission_url , cache = True )
442
- self ._staging_log ['staging_submission' ] = staging_submission
443
- staging_submission .raise_for_status ()
444
-
445
- data_page_url = staging_submission .url
446
- elif response .url .split ("/" )[- 3 ] == 'requests' :
447
- data_page_url = response .url
448
-
449
- self ._staging_log ['data_page_url' ] = data_page_url
450
- dpid = data_page_url .split ("/" )[- 1 ]
451
- self ._staging_log ['staging_page_id' ] = dpid
452
-
453
- # CANNOT cache this step: please_wait will happen infinitely
454
- data_page = self ._request ('GET' , data_page_url , cache = False )
455
- self ._staging_log ['data_page' ] = data_page
456
- data_page .raise_for_status ()
457
-
458
- has_completed = False
459
- while not has_completed :
460
- time .sleep (1 )
461
- summary = self ._request ('GET' , url_helpers .join (data_page_url ,
462
- 'summary' ),
463
- cache = False )
464
- summary .raise_for_status ()
465
- print ("." , end = '' )
466
- sys .stdout .flush ()
467
- has_completed = summary .json ()['complete' ]
468
-
469
- self ._staging_log ['summary' ] = summary
470
- summary .raise_for_status ()
471
- self ._staging_log ['json_data' ] = json_data = summary .json ()
472
-
473
- username = self .USERNAME if self .USERNAME else 'anonymous'
474
-
475
- # templates:
476
- # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
477
- # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
478
- # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar
479
-
480
- url_decomposed = urlparse (data_page_url )
481
- base_url = ('{uri.scheme}://{uri.netloc}/'
482
- 'dataPortal/requests/{username}/'
483
- '{staging_page_id}/ALMA' .format (uri = url_decomposed ,
484
- staging_page_id = dpid ,
485
- username = username ,
486
- ))
487
- tbl = self ._json_summary_to_table (json_data , base_url = base_url )
488
- self ._staging_log ['result' ] = tbl
489
- self ._staging_log ['file_urls' ] = tbl ['URL' ]
490
- self ._last_successful_staging_log = self ._staging_log
491
-
492
- return tbl
493
-
494
357
def _HEADER_data_size (self , files ):
495
358
"""
496
359
Given a list of file URLs, return the data size. This is useful for
@@ -1088,53 +951,6 @@ def _validate_payload(self, payload):
1088
951
" by the ALMA query service:"
1089
952
" {0}" .format (invalid_params ))
1090
953
1091
- def _json_summary_to_table (self , data , base_url ):
1092
- """
1093
- Special tool to convert some JSON metadata to a table Obsolete as of
1094
- March 2020 - should be removed along with stage_data_prefeb2020
1095
- """
1096
- from ..utils import url_helpers
1097
- from six import iteritems
1098
- columns = {'mous_uid' : [], 'URL' : [], 'size' : []}
1099
- for entry in data ['node_data' ]:
1100
- # de_type can be useful (e.g., MOUS), but it is not necessarily
1101
- # specified
1102
- # file_name and file_key *must* be specified.
1103
- is_file = (entry ['file_name' ] != 'null' and
1104
- entry ['file_key' ] != 'null' )
1105
- if is_file :
1106
- # "de_name": "ALMA+uid://A001/X122/X35e",
1107
- columns ['mous_uid' ].append (entry ['de_name' ][5 :])
1108
- if entry ['file_size' ] == 'null' :
1109
- columns ['size' ].append (np .nan * u .Gbyte )
1110
- else :
1111
- columns ['size' ].append (
1112
- (int (entry ['file_size' ]) * u .B ).to (u .Gbyte ))
1113
- # example template for constructing url:
1114
- # https://almascience.eso.org/dataPortal/requests/keflavich/940238268/ALMA/
1115
- # uid___A002_X9d6f4c_X154/2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar
1116
- # above is WRONG... except for ASDMs, when it's right
1117
- # should be:
1118
- # 2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar/2013.1.00546.S_uid___A002_X9d6f4c_X154.asdm.sdm.tar
1119
- #
1120
- # apparently ASDMs are different from others:
1121
- # templates:
1122
- # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
1123
- # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
1124
- # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar
1125
- url = url_helpers .join (base_url ,
1126
- entry ['file_key' ],
1127
- entry ['file_name' ])
1128
- if 'null' in url :
1129
- raise ValueError ("The URL {0} was created containing "
1130
- "'null', which is invalid." .format (url ))
1131
- columns ['URL' ].append (url )
1132
-
1133
- columns ['size' ] = u .Quantity (columns ['size' ], u .Gbyte )
1134
-
1135
- tbl = Table ([Column (name = k , data = v ) for k , v in iteritems (columns )])
1136
- return tbl
1137
-
1138
954
def get_project_metadata (self , projectid , cache = True ):
1139
955
"""
1140
956
Get the metadata - specifically, the project abstract - for a given project ID.
0 commit comments