Merge pull request #1614 from gbrammer/master

bsipocz · web-flow · commit c8d678b69050 · 2021-11-24T09:41:46.000-08:00
Allow retrieval from a previous ESO archive request
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -8,6 +8,11 @@ New Tools and Services
 Service fixes and enhancements
 ------------------------------
 
+eso
+^^^
+
+- Add option to retrieve_data from an earlier archive query [#1614]
+
 sdss
 ^^^^
 
diff --git a/astroquery/eso/core.py b/astroquery/eso/core.py
@@ -629,7 +629,8 @@ def _download_file(self, url, local_filepath, **kwargs):
         return resp
 
     def retrieve_data(self, datasets, continuation=False, destination=None,
-                      with_calib='none', request_all_objects=False, unzip=True):
+                      with_calib='none', request_all_objects=False,
+                      unzip=True, request_id=None):
         """
         Retrieve a list of datasets form the ESO archive.
 
@@ -657,6 +658,12 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
         unzip : bool
             Unzip compressed files from the archive after download. `True` by
             default.
+        request_id : str, int
+            Retrieve from an existing request number rather than sending a new
+            query, with the identifier from the URL in the email sent from
+            the archive from the earlier request as in:
+
+                https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id]
 
         Returns
         -------
@@ -694,9 +701,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
                 datasets, continuation=continuation, destination=destination)
 
         # Second: Check that the datasets to download are in the archive
-        log.info("Checking availability of datasets to download...")
-        valid_datasets = [self.verify_data_exists(ds)
+        if request_id is None:
+            log.info("Checking availability of datasets to download...")
+            valid_datasets = [self.verify_data_exists(ds)
                           for ds in datasets_to_download]
+        else:
+            # Assume all valid if a request_id was provided
+            valid_datasets = [(ds, True) for ds in datasets_to_download]
+
         if not all(valid_datasets):
             invalid_datasets = [ds for ds, v in zip(datasets_to_download,
                                                     valid_datasets) if not v]
@@ -710,33 +722,52 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
                 self.login()
             url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html"
             with suspend_cache(self):  # Never cache staging operations
-                log.info("Contacting retrieval server...")
-                retrieve_data_form = self._request("GET", url, cache=False)
-                retrieve_data_form.raise_for_status()
-                log.info("Staging request...")
-                inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
-                data_confirmation_form = self._activate_form(
-                    retrieve_data_form, form_index=-1, inputs=inputs,
-                    cache=False)
-
-                data_confirmation_form.raise_for_status()
-
-                root = BeautifulSoup(data_confirmation_form.content,
-                                     'html5lib')
-                login_button = root.select('input[value=LOGIN]')
-                if login_button:
-                    raise LoginError("Not logged in. "
-                                     "You must be logged in to download data.")
-                inputs = {}
-                if with_calib != 'none':
-                    inputs['requestCommand'] = calib_options[with_calib]
-
-                # TODO: There may be another screen for Not Authorized; that
-                # should be included too
-                # form name is "retrieve"; no id
-                data_download_form = self._activate_form(
-                    data_confirmation_form, form_index=-1, inputs=inputs,
-                    cache=False)
+                if request_id is None:
+                    log.info("Contacting retrieval server...")
+                    retrieve_data_form = self._request("GET", url,
+                                                        cache=False)
+                    retrieve_data_form.raise_for_status()
+                    log.info("Staging request...")
+                    inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
+                    data_confirmation_form = self._activate_form(
+                        retrieve_data_form, form_index=-1, inputs=inputs,
+                        cache=False)
+
+                    data_confirmation_form.raise_for_status()
+
+                    root = BeautifulSoup(data_confirmation_form.content,
+                                         'html5lib')
+                    login_button = root.select('input[value=LOGIN]')
+                    if login_button:
+                        raise LoginError("Not logged in. "
+                                    "You must be logged in to download data.")
+                    inputs = {}
+                    if with_calib != 'none':
+                        inputs['requestCommand'] = calib_options[with_calib]
+
+                    # TODO: There may be another screen for Not Authorized;
+                    # that should be included too
+                    # form name is "retrieve"; no id
+                    data_download_form = self._activate_form(
+                        data_confirmation_form, form_index=-1, inputs=inputs,
+                        cache=False)
+                else:
+                    # Build URL by hand
+                    request_url = 'https://dataportal.eso.org/rh/requests/'
+                    request_url += f'{self.USERNAME}/{request_id}'
+                    data_download_form = self._request("GET", request_url,
+                                                       cache=False)
+
+                    _content = data_download_form.content.decode('utf-8')
+                    if ('Request Handler - Error' in _content):
+                        # Likely a problem with the request_url
+                        msg = (f"The form at {request_url} returned an error."
+                                " See your recent requests at "
+                                "https://dataportal.eso.org/rh/requests/"
+                                f"{self.USERNAME}/recentRequests")
+
+                        raise RemoteServiceError(msg)
+
                 log.info("Staging form is at {0}"
                          .format(data_download_form.url))
                 root = BeautifulSoup(data_download_form.content, 'html5lib')
@@ -809,6 +840,14 @@ def retrieve_data(self, datasets, continuation=False, destination=None,
             log.debug("Files:\n{}".format('\n'.join(fileLinks)))
             for i, fileLink in enumerate(fileLinks, 1):
                 fileId = fileLink.rsplit('/', maxsplit=1)[1]
+
+                if request_id is not None:
+                    # Since we fetched the script directly without sending
+                    # a new request, check here that the file in the list
+                    # is among those requested in the input list
+                    if fileId.split('.fits')[0] not in datasets_to_download:
+                        continue
+
                 log.info("Downloading file {}/{}: {}..."
                          .format(i, nfiles, fileId))
                 filename = self._request("GET", fileLink, save=True,
diff --git a/docs/eso/eso.rst b/docs/eso/eso.rst
@@ -348,6 +348,24 @@ a ``location`` keyword in the call to :meth:`~astroquery.eso.EsoClass.retrieve_d
 In all cases, if a requested dataset is already found,
 it is not downloaded again from the archive.
 
+By default, calling ``eso.retrieve_data`` submits a new archive request
+through the web form to stage and download the requested ``datasets``. If you
+would like to download datasets from an existing request, either submitted
+through the functions here or externally, call ``retrieve_data`` with the
+``request_id`` option:
+
+.. code-block:: python
+
+    >>> data_files = eso.retrieve_data(table['DP.ID'][:2], request_id=999999)
+
+The ``request_id`` can be found in the automatic email sent by the archive after
+staging the initial request, i.e., https://dataportal.eso.org/rh/requests/[USERNAME]/{request_id}. A summary of your available requests is shown at https://dataportal.eso.org/rh/requests/[USERNAME]/recentRequests.  
+
+Note: The function does check that the specified retrieval URL based on
+``request_id`` is valid and then that the datasets indicated there are
+consistent with the user-specified ``datasets``, but there is currently no
+reverse checking that the specified ``datasets`` are provided in
+``request_id``.
 
 Reference/API
 =============