Skip to content

Commit c8d678b

Browse files
authored
Merge pull request #1614 from gbrammer/master
Allow retrieval from a previous ESO archive request
2 parents de0a326 + 0ecad92 commit c8d678b

File tree

3 files changed

+92
-30
lines changed

3 files changed

+92
-30
lines changed

CHANGES.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ New Tools and Services
88
Service fixes and enhancements
99
------------------------------
1010

11+
eso
12+
^^^
13+
14+
- Add option to retrieve_data from an earlier archive query [#1614]
15+
1116
sdss
1217
^^^^
1318

astroquery/eso/core.py

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,8 @@ def _download_file(self, url, local_filepath, **kwargs):
629629
return resp
630630

631631
def retrieve_data(self, datasets, continuation=False, destination=None,
632-
with_calib='none', request_all_objects=False, unzip=True):
632+
with_calib='none', request_all_objects=False,
633+
unzip=True, request_id=None):
633634
"""
634635
Retrieve a list of datasets form the ESO archive.
635636
@@ -657,6 +658,12 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
657658
unzip : bool
658659
Unzip compressed files from the archive after download. `True` by
659660
default.
661+
request_id : str, int
662+
Retrieve from an existing request number rather than sending a new
663+
query, with the identifier from the URL in the email sent from
664+
the archive from the earlier request as in:
665+
666+
https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id]
660667
661668
Returns
662669
-------
@@ -694,9 +701,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
694701
datasets, continuation=continuation, destination=destination)
695702

696703
# Second: Check that the datasets to download are in the archive
697-
log.info("Checking availability of datasets to download...")
698-
valid_datasets = [self.verify_data_exists(ds)
704+
if request_id is None:
705+
log.info("Checking availability of datasets to download...")
706+
valid_datasets = [self.verify_data_exists(ds)
699707
for ds in datasets_to_download]
708+
else:
709+
# Assume all valid if a request_id was provided
710+
valid_datasets = [(ds, True) for ds in datasets_to_download]
711+
700712
if not all(valid_datasets):
701713
invalid_datasets = [ds for ds, v in zip(datasets_to_download,
702714
valid_datasets) if not v]
@@ -710,33 +722,52 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
710722
self.login()
711723
url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html"
712724
with suspend_cache(self): # Never cache staging operations
713-
log.info("Contacting retrieval server...")
714-
retrieve_data_form = self._request("GET", url, cache=False)
715-
retrieve_data_form.raise_for_status()
716-
log.info("Staging request...")
717-
inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
718-
data_confirmation_form = self._activate_form(
719-
retrieve_data_form, form_index=-1, inputs=inputs,
720-
cache=False)
721-
722-
data_confirmation_form.raise_for_status()
723-
724-
root = BeautifulSoup(data_confirmation_form.content,
725-
'html5lib')
726-
login_button = root.select('input[value=LOGIN]')
727-
if login_button:
728-
raise LoginError("Not logged in. "
729-
"You must be logged in to download data.")
730-
inputs = {}
731-
if with_calib != 'none':
732-
inputs['requestCommand'] = calib_options[with_calib]
733-
734-
# TODO: There may be another screen for Not Authorized; that
735-
# should be included too
736-
# form name is "retrieve"; no id
737-
data_download_form = self._activate_form(
738-
data_confirmation_form, form_index=-1, inputs=inputs,
739-
cache=False)
725+
if request_id is None:
726+
log.info("Contacting retrieval server...")
727+
retrieve_data_form = self._request("GET", url,
728+
cache=False)
729+
retrieve_data_form.raise_for_status()
730+
log.info("Staging request...")
731+
inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
732+
data_confirmation_form = self._activate_form(
733+
retrieve_data_form, form_index=-1, inputs=inputs,
734+
cache=False)
735+
736+
data_confirmation_form.raise_for_status()
737+
738+
root = BeautifulSoup(data_confirmation_form.content,
739+
'html5lib')
740+
login_button = root.select('input[value=LOGIN]')
741+
if login_button:
742+
raise LoginError("Not logged in. "
743+
"You must be logged in to download data.")
744+
inputs = {}
745+
if with_calib != 'none':
746+
inputs['requestCommand'] = calib_options[with_calib]
747+
748+
# TODO: There may be another screen for Not Authorized;
749+
# that should be included too
750+
# form name is "retrieve"; no id
751+
data_download_form = self._activate_form(
752+
data_confirmation_form, form_index=-1, inputs=inputs,
753+
cache=False)
754+
else:
755+
# Build URL by hand
756+
request_url = 'https://dataportal.eso.org/rh/requests/'
757+
request_url += f'{self.USERNAME}/{request_id}'
758+
data_download_form = self._request("GET", request_url,
759+
cache=False)
760+
761+
_content = data_download_form.content.decode('utf-8')
762+
if ('Request Handler - Error' in _content):
763+
# Likely a problem with the request_url
764+
msg = (f"The form at {request_url} returned an error."
765+
" See your recent requests at "
766+
"https://dataportal.eso.org/rh/requests/"
767+
f"{self.USERNAME}/recentRequests")
768+
769+
raise RemoteServiceError(msg)
770+
740771
log.info("Staging form is at {0}"
741772
.format(data_download_form.url))
742773
root = BeautifulSoup(data_download_form.content, 'html5lib')
@@ -809,6 +840,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
809840
log.debug("Files:\n{}".format('\n'.join(fileLinks)))
810841
for i, fileLink in enumerate(fileLinks, 1):
811842
fileId = fileLink.rsplit('/', maxsplit=1)[1]
843+
844+
if request_id is not None:
845+
# Since we fetched the script directly without sending
846+
# a new request, check here that the file in the list
847+
# is among those requested in the input list
848+
if fileId.split('.fits')[0] not in datasets_to_download:
849+
continue
850+
812851
log.info("Downloading file {}/{}: {}..."
813852
.format(i, nfiles, fileId))
814853
filename = self._request("GET", fileLink, save=True,

docs/eso/eso.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,24 @@ a ``location`` keyword in the call to :meth:`~astroquery.eso.EsoClass.retrieve_d
348348
In all cases, if a requested dataset is already found,
349349
it is not downloaded again from the archive.
350350

351+
By default, calling ``eso.retrieve_data`` submits a new archive request
352+
through the web form to stage and download the requested ``datasets``. If you
353+
would like to download datasets from an existing request, either submitted
354+
through the functions here or externally, call ``retrieve_data`` with the
355+
``request_id`` option:
356+
357+
.. code-block:: python
358+
359+
>>> data_files = eso.retrieve_data(table['DP.ID'][:2], request_id=999999)
360+
361+
The ``request_id`` can be found in the automatic email sent by the archive after
362+
staging the initial request, i.e., https://dataportal.eso.org/rh/requests/[USERNAME]/{request_id}. A summary of your available requests is shown at https://dataportal.eso.org/rh/requests/[USERNAME]/recentRequests.
363+
364+
Note: The function does check that the specified retrieval URL based on
365+
``request_id`` is valid and then that the datasets indicated there are
366+
consistent with the user-specified ``datasets``, but there is currently no
367+
reverse checking that the specified ``datasets`` are provided in
368+
``request_id``.
351369

352370
Reference/API
353371
=============

0 commit comments

Comments
 (0)