Skip to content

Commit 3f16e62

Browse files
authored
Merge pull request #2797 from esdc-esac-esa-int/ESA_ehst-download_improvements
Esa ehst download improvements
2 parents cef75bc + f1a833d commit 3f16e62

File tree

5 files changed

+361
-132
lines changed

5 files changed

+361
-132
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ esa.hubble
3131

3232
- Update to TAP url to query data and download files, aligned with the new eHST Science Archive. [#2567][#2597]
3333
- Status and maintenance messages from eHST TAP when the module is instantiated. get_status_messages method to retrieve them. [#2597]
34+
- New methods to download single files ``download_file`` and download FITS associated to an observation ``download_fits_files``. [#2797]
35+
- New function to retrieve all the files associated to an observation. [#2797]
3436

3537
solarsystem.neodys
3638
^^^^^^^^^^^^^^^^^^
@@ -64,6 +66,7 @@ esa.hubble
6466
a lot faster. [#2524]
6567
- Method query_hst_tap has been deprecated and is replaced with query_tap, with the same arguments. [#2597]
6668
- Product types in download_product method have been modified to: PRODUCT, SCIENCE_PRODUCT or POSTCARD. [#2597]
69+
- Added ``proposal`` keyword argument to several methods now allows to filter by Proposal ID. [#2797]
6770

6871
alma
6972
^^^^

astroquery/esa/hubble/core.py

Lines changed: 114 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
European Space Agency (ESA)
99
1010
"""
11+
import os
1112
from urllib.parse import urlencode
1213

1314
from astropy import units
@@ -27,14 +28,29 @@
2728
__all__ = ['ESAHubble', 'ESAHubbleClass']
2829

2930

31+
def _check_rename_to_gz(filename):
32+
rename = False
33+
if os.path.exists(filename):
34+
with open(filename, 'rb') as test_f:
35+
if test_f.read(2) == b'\x1f\x8b' and not filename.endswith('.fits.gz'):
36+
rename = True
37+
38+
if rename:
39+
output = os.path.splitext(filename)[0] + '.fits.gz'
40+
os.rename(filename, output)
41+
return output
42+
else:
43+
return filename
44+
45+
3046
class ESAHubbleClass(BaseQuery):
3147
"""
3248
Class to init ESA Hubble Module and communicate with eHST TAP
3349
"""
3450
TIMEOUT = conf.TIMEOUT
3551
calibration_levels = {"AUXILIARY": 0, "RAW": 1, "CALIBRATED": 2,
3652
"PRODUCT": 3}
37-
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL" or "AUXILIARY"]
53+
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL", "AUXILIARY"]
3854
copying_string = "Copying file to {0}..."
3955

4056
def __init__(self, *, tap_handler=None, show_messages=True):
@@ -93,7 +109,7 @@ def download_product(self, observation_id, *, calibration_level=None,
93109
"RETRIEVAL_TYPE": "OBSERVATION"}
94110

95111
if filename is None:
96-
filename = observation_id + ".tar"
112+
filename = observation_id
97113

98114
if calibration_level:
99115
params["CALIBRATIONLEVEL"] = calibration_level
@@ -107,7 +123,7 @@ def download_product(self, observation_id, *, calibration_level=None,
107123
filename = self._get_product_filename(product_type, filename)
108124
self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)
109125

110-
return filename
126+
return _check_rename_to_gz(filename=filename)
111127

112128
def __set_product_type(self, product_type):
113129
if product_type:
@@ -216,18 +232,12 @@ def __validate_product_type(self, product_type):
216232
raise ValueError("This product_type is not allowed")
217233

218234
def _get_product_filename(self, product_type, filename):
219-
if (product_type == "PRODUCT"):
220-
return filename
221-
elif (product_type == "SCIENCE"):
222-
log.info("This is a SCIENCE_PRODUCT, the filename will be "
223-
f"renamed to {filename}.fits.gz")
224-
return f"{filename}.fits.gz"
225-
elif (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
226-
log.info("This is a POSTCARD, the filename will be "
235+
if (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
236+
log.info("This is an image, the filename will be "
227237
f"renamed to {filename}.jpg")
228238
return f"{filename}.jpg"
229-
230-
return filename
239+
else:
240+
return f"{filename}.zip"
231241

232242
def get_artifact(self, artifact_id, *, filename=None, verbose=False):
233243
"""
@@ -236,7 +246,7 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
236246
Parameters
237247
----------
238248
artifact_id : string
239-
id of the artifact to be downloaded, mandatory
249+
filename to be downloaded, mandatory
240250
The identifier of the physical product (file) we want to retrieve.
241251
filename : string
242252
file name to be used to store the artifact, optional, default None
@@ -250,13 +260,83 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
250260
None. It downloads the artifact indicated
251261
"""
252262

253-
params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": artifact_id, "TAPCLIENT": "ASTROQUERY"}
263+
return self.download_file(file=artifact_id, filename=filename, verbose=verbose)
264+
265+
def get_associated_files(self, observation_id, *, verbose=False):
266+
"""
267+
Retrieves all the files associated to an observation
268+
269+
Parameters
270+
----------
271+
observation_id : string
272+
id of the observation to be downloaded, mandatory
273+
The identifier of the observation we want to retrieve, regardless
274+
of whether it is simple or composite.
275+
verbose : bool
276+
optional, default 'False'
277+
flag to display information about the process
278+
279+
Returns
280+
-------
281+
None. The file is associated
282+
"""
283+
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
284+
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
285+
f"join ehst.plane p on p.plane_id = art.plane_id where "
286+
f"art.observation_id = '{observation_id}'")
287+
return self.query_tap(query=query)
288+
289+
def download_fits_files(self, observation_id, *, verbose=False):
290+
"""
291+
Retrieves all the FITS files associated to an observation
292+
293+
Parameters
294+
----------
295+
observation_id : string
296+
id of the observation to be downloaded, mandatory
297+
The identifier of the observation we want to retrieve, regardless
298+
of whether it is simple or composite.
299+
verbose : bool
300+
optional, default 'False'
301+
flag to display information about the process
302+
303+
Returns
304+
-------
305+
None. The file is associated
306+
"""
307+
results = self.get_associated_files(observation_id=observation_id, verbose=verbose)
308+
for file in [i['filename'] for i in results if i['filename'].endswith('.fits')]:
309+
if verbose:
310+
print(f"Downloading {file} ...")
311+
self.download_file(file=file, filename=file, verbose=verbose)
312+
313+
def download_file(self, file, *, filename=None, verbose=False):
314+
"""
315+
Download a file from eHST based on its filename.
316+
317+
Parameters
318+
----------
319+
file : string
320+
file name of the artifact to be downloaded
321+
322+
filename : string
323+
file name to be used to store the file, optional, default None
324+
verbose : bool
325+
optional, default 'False'
326+
flag to display information about the process
327+
328+
Returns
329+
-------
330+
None. The file is associated
331+
"""
332+
333+
params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": file, "TAPCLIENT": "ASTROQUERY"}
254334
if filename is None:
255-
filename = artifact_id
335+
filename = file
256336

257337
self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)
258338

259-
return filename
339+
return _check_rename_to_gz(filename=filename)
260340

261341
def get_postcard(self, observation_id, *, calibration_level="RAW",
262342
resolution=256, filename=None, verbose=False):
@@ -391,6 +471,7 @@ def cone_search_criteria(self, radius, *, target=None,
391471
obs_collection=None,
392472
instrument_name=None,
393473
filters=None,
474+
proposal=None,
394475
async_job=True,
395476
filename=None,
396477
output_format='votable',
@@ -428,6 +509,8 @@ def cone_search_criteria(self, radius, *, target=None,
428509
Name(s) of the instrument(s) used to generate the dataset
429510
filters : list of str, optional
430511
Name(s) of the filter(s) used to generate the dataset
512+
proposal : int, optional
513+
Proposal ID associated to the observations
431514
async_job : bool, optional, default 'False'
432515
executes the query (job) in asynchronous/synchronous mode (default
433516
synchronous)
@@ -460,6 +543,7 @@ def cone_search_criteria(self, radius, *, target=None,
460543
obs_collection=obs_collection,
461544
instrument_name=instrument_name,
462545
filters=filters,
546+
proposal=proposal,
463547
async_job=True,
464548
get_query=True)
465549
if crit_query.endswith(")"):
@@ -619,7 +703,7 @@ def query_hst_tap(self, query, *, async_job=False, output_file=None,
619703
def query_criteria(self, *, calibration_level=None,
620704
data_product_type=None, intent=None,
621705
obs_collection=None, instrument_name=None,
622-
filters=None, async_job=True, output_file=None,
706+
filters=None, proposal=None, async_job=True, output_file=None,
623707
output_format="votable", verbose=False,
624708
get_query=False):
625709
"""
@@ -639,13 +723,15 @@ def query_criteria(self, *, calibration_level=None,
639723
intent : str, optional
640724
The intent of the original observer in acquiring this observation.
641725
SCIENCE or CALIBRATION
642-
collection : list of str, optional
726+
obs_collection : list of str, optional
643727
List of collections that are available in eHST catalogue.
644-
HLA, HST
728+
HLA, HST, HAP
645729
instrument_name : list of str, optional
646730
Name(s) of the instrument(s) used to generate the dataset
647731
filters : list of str, optional
648732
Name(s) of the filter(s) used to generate the dataset
733+
proposal : int, optional
734+
Proposal ID associated to the observations
649735
async_job : bool, optional, default 'True'
650736
executes the query (job) in asynchronous/synchronous mode (default
651737
synchronous)
@@ -680,6 +766,11 @@ def query_criteria(self, *, calibration_level=None,
680766
parameters.append("intent LIKE '%{}%'".format(intent.lower()))
681767
else:
682768
raise ValueError("intent must be a string")
769+
if proposal is not None:
770+
if isinstance(proposal, int):
771+
parameters.append("proposal_id = '{}'".format(proposal))
772+
else:
773+
raise ValueError("Proposal ID must be an integer")
683774
if self.__check_list_strings(obs_collection):
684775
parameters.append("(collection LIKE '%{}%')".format(
685776
"%' OR collection LIKE '%".join(obs_collection)
@@ -767,7 +858,7 @@ def get_status_messages(self):
767858
if response.status == 200:
768859
for line in response:
769860
string_message = line.decode("utf-8")
770-
print(string_message[string_message.index('=')+1:])
861+
print(string_message[string_message.index('=') + 1:])
771862
except OSError:
772863
print("Status messages could not be retrieved")
773864

@@ -810,10 +901,8 @@ def get_columns(self, table_name, *, only_names=True, verbose=False):
810901
return columns
811902

812903
def _getCoordInput(self, value):
813-
if not (isinstance(value, str)
814-
or isinstance(value, SkyCoord)):
815-
raise ValueError("Coordinates"
816-
+ " must be either a string or astropy.coordinates")
904+
if not (isinstance(value, str) or isinstance(value, SkyCoord)):
905+
raise ValueError("Coordinates must be either a string or astropy.coordinates")
817906
if isinstance(value, str):
818907
return SkyCoord(value)
819908
else:

astroquery/esa/hubble/tests/test_esa_hubble.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import os
1313
import shutil
14+
import gzip
1415
from pathlib import Path
1516
from unittest.mock import MagicMock
1617
from unittest.mock import patch
@@ -22,6 +23,7 @@
2223
from requests.models import Response
2324

2425
from astroquery.esa.hubble import ESAHubbleClass
26+
from astroquery.esa.hubble.core import _check_rename_to_gz
2527
from astroquery.esa.hubble.tests.dummy_tap_handler import DummyHubbleTapHandler
2628
from astropy.utils.exceptions import AstropyDeprecationWarning
2729

@@ -76,8 +78,10 @@ def pformat():
7678

7779
class TestESAHubble:
7880

79-
def get_dummy_tap_handler(self):
80-
parameterst = {'query': "select top 10 * from hsc_v2.hubble_sc2",
81+
def get_dummy_tap_handler(self, query=None):
82+
if query is None:
83+
query = "select top 10 * from hsc_v2.hubble_sc2"
84+
parameterst = {'query': query,
8185
'output_file': "test2.vot",
8286
'output_format': "votable",
8387
'verbose': False}
@@ -228,6 +232,58 @@ def test_get_artifact(self, tmp_path):
228232
path = Path(tmp_path, "w0ji0v01t_c2f.fits.gz")
229233
ehst.get_artifact(artifact_id=path)
230234

235+
def test_download_file(self, tmp_path):
236+
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
237+
file = 'w0ji0v01t_c2f.fits'
238+
path = Path(tmp_path, file + '.gz')
239+
ehst.download_file(file=path, filename=path)
240+
241+
def test_get_associated_files(self):
242+
observation_id = 'test'
243+
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
244+
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
245+
f"join ehst.plane p on p.plane_id = art.plane_id where "
246+
f"art.observation_id = '{observation_id}'")
247+
parameters = {'query': query,
248+
'output_file': 'test2.vot',
249+
'output_format': "votable",
250+
'verbose': False}
251+
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
252+
ehst.get_associated_files(observation_id=observation_id)
253+
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)
254+
255+
@patch.object(ESAHubbleClass, 'get_associated_files')
256+
def test_download_fits(self, mock_associated_files):
257+
observation_id = 'test'
258+
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
259+
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
260+
f"join ehst.plane p on p.plane_id = art.plane_id where "
261+
f"art.observation_id = '{observation_id}'")
262+
parameters = {'query': query,
263+
'output_file': 'test2.vot',
264+
'output_format': "votable",
265+
'verbose': False}
266+
mock_associated_files.return_value = [{'filename': 'test.fits'}]
267+
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
268+
ehst.download_fits_files(observation_id=observation_id)
269+
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)
270+
271+
def test_is_not_gz(self, tmp_path):
272+
target_file = data_path('cone_search.vot')
273+
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
274+
assert _check_rename_to_gz(target_file) == target_file
275+
276+
def test_is_gz(self, tmp_path):
277+
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
278+
# test_file = data_path('m31.vot.test')
279+
temp_file = 'testgz'
280+
target_file = os.path.join(tmp_path, temp_file)
281+
with gzip.open(target_file, 'wb') as f:
282+
f.write(b'')
283+
# with open(test_file, 'rb') as f_in, gzip.open(target_file, 'wb') as f_out:
284+
# f_out.writelines(f_in)
285+
assert _check_rename_to_gz(target_file) == target_file + '.fits.gz'
286+
231287
def test_get_columns(self):
232288
parameters = {'table_name': "table",
233289
'only_names': True,
@@ -238,6 +294,32 @@ def test_get_columns(self):
238294
ehst.get_columns(table_name="table", only_names=True, verbose=True)
239295
dummyTapHandler.check_call("get_columns", parameters)
240296

297+
def test_query_criteria_proposal(self):
298+
parameters1 = {'proposal': 12345,
299+
'async_job': False,
300+
'output_file': "output_test_query_by_criteria.vot.gz",
301+
'output_format': "votable",
302+
'verbose': True,
303+
'get_query': True}
304+
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
305+
test_query = ehst.query_criteria(proposal=parameters1['proposal'],
306+
async_job=parameters1['async_job'],
307+
output_file=parameters1['output_file'],
308+
output_format=parameters1['output_format'],
309+
verbose=parameters1['verbose'],
310+
get_query=parameters1['get_query'])
311+
parameters2 = {'query': test_query,
312+
'output_file': "output_test_query_by_criteria.vot.gz",
313+
'output_format': "votable",
314+
'verbose': False}
315+
parameters3 = {'query': "select * from ehst.archive where("
316+
"proposal_id = '12345')",
317+
'output_file': "output_test_query_by_criteria.vot.gz",
318+
'output_format': "votable",
319+
'verbose': False}
320+
dummy_tap_handler = DummyHubbleTapHandler("launch_job", parameters2)
321+
dummy_tap_handler.check_call("launch_job", parameters3)
322+
241323
def test_query_criteria(self):
242324
parameters1 = {'calibration_level': "PRODUCT",
243325
'data_product_type': "image",

0 commit comments

Comments
 (0)