Skip to content

Commit 5a9633c

Browse files
authored
Merge pull request #3155 from snbianco/ASB-29334-download-missions
Download Products with MastMissions
2 parents 68b0f5b + aab812a commit 5a9633c

File tree

11 files changed

+1178
-104
lines changed

11 files changed

+1178
-104
lines changed

CHANGES.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,19 @@ jplspec
2626

2727
- minor improvement to lookuptable behavior [#3173,#2901]
2828

29+
mast
30+
^^^^
31+
32+
- Retrieve data products from the Missions-MAST API with ``mast.MastMissions.get_product_list``. Retrieve unique data
33+
products only with ``mast.MastMissions.get_unique_product_list``. [#3155]
34+
35+
- Filter data products retrieved from the Missions-MAST API with ``mast.MastMissions.filter_products``. [#3155]
36+
37+
- Download data products from the Missions-MAST API with ``mast.MastMissions.download_products``.
38+
Download a single data product using ``mast.MastMissions.download_file``. [#3155]
39+
40+
- Get the keyword corresponding to the dataset ID for a specific mission with ``mast.MastMissions.get_dataset_kwd``. [#3155]
41+
2942
mocserver
3043
^^^^^^^^^
3144

astroquery/mast/missions.py

Lines changed: 401 additions & 23 deletions
Large diffs are not rendered by default.

astroquery/mast/observations.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import astropy.units as u
2020
import astropy.coordinates as coord
2121

22-
from astropy.table import Table, Row, unique, vstack
22+
from astropy.table import Table, Row, vstack
2323
from astroquery import log
2424
from astroquery.mast.cloud import CloudAccess
2525

@@ -816,7 +816,7 @@ def download_products(self, products, *, download_dir=None, flat=False,
816816
products = self.filter_products(products, mrp_only=mrp_only, **filters)
817817

818818
# remove duplicate products
819-
products = self._remove_duplicate_products(products)
819+
products = utils.remove_duplicate_products(products, 'dataURI')
820820

821821
if not len(products):
822822
warnings.warn("No products to download.", NoResultsWarning)
@@ -928,7 +928,7 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
928928
return
929929

930930
# Remove duplicate products
931-
data_products = self._remove_duplicate_products(data_products)
931+
data_products = utils.remove_duplicate_products(data_products, 'dataURI')
932932

933933
return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url)
934934

@@ -966,30 +966,6 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
966966
# Query for product URIs
967967
return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url)
968968

969-
def _remove_duplicate_products(self, data_products):
970-
"""
971-
Removes duplicate data products that have the same dataURI.
972-
973-
Parameters
974-
----------
975-
data_products : `~astropy.table.Table`
976-
Table containing products to be checked for duplicates.
977-
978-
Returns
979-
-------
980-
unique_products : `~astropy.table.Table`
981-
Table containing products with unique dataURIs.
982-
983-
"""
984-
number = len(data_products)
985-
unique_products = unique(data_products, keys="dataURI")
986-
number_unique = len(unique_products)
987-
if number_unique < number:
988-
log.info(f"{number - number_unique} of {number} products were duplicates. "
989-
f"Only returning {number_unique} unique product(s).")
990-
991-
return unique_products
992-
993969
def get_unique_product_list(self, observations):
994970
"""
995971
Given a "Product Group Id" (column name obsid), returns a list of associated data products with
@@ -1009,7 +985,7 @@ def get_unique_product_list(self, observations):
1009985
Table containing products with unique dataURIs.
1010986
"""
1011987
products = self.get_product_list(observations)
1012-
unique_products = self._remove_duplicate_products(products)
988+
unique_products = utils.remove_duplicate_products(products, 'dataURI')
1013989
if len(unique_products) < len(products):
1014990
log.info("To return all products, use `Observations.get_product_list`")
1015991
return unique_products

astroquery/mast/services.py

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from astropy.table import Table, MaskedColumn
1515
from astropy.utils.decorators import deprecated_renamed_argument
1616

17+
from .. import log
1718
from ..query import BaseQuery
1819
from ..utils import async_to_sync
1920
from ..utils.class_or_instance import class_or_instance
@@ -84,7 +85,12 @@ def _json_to_table(json_obj, data_key='data'):
8485
col_data = np.array([x[idx] for x in json_obj[data_key]], dtype=object)
8586
except KeyError:
8687
# it's not a data array, fall back to using column name as it is array of dictionaries
87-
col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object)
88+
try:
89+
col_data = np.array([x[col_name] for x in json_obj[data_key]], dtype=object)
90+
except KeyError:
91+
# Skip column names not found in data
92+
log.debug('Column %s was not found in data. Skipping...', col_name)
93+
continue
8894
if ignore_value is not None:
8995
col_data[np.where(np.equal(col_data, None))] = ignore_value
9096

@@ -112,6 +118,8 @@ class ServiceAPI(BaseQuery):
112118

113119
SERVICE_URL = conf.server
114120
REQUEST_URL = conf.server + "/api/v0.1/"
121+
MISSIONS_DOWNLOAD_URL = conf.server + "/search/"
122+
MAST_DOWNLOAD_URL = conf.server + "/api/v0.1/Download/file"
115123
SERVICES = {}
116124

117125
def __init__(self, session=None):
@@ -270,27 +278,28 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j
270278

271279
request_url = self.REQUEST_URL + service_url.format(**compiled_service_args)
272280

281+
# Default headers
273282
headers = {
274283
'User-Agent': self._session.headers['User-Agent'],
275284
'Content-Type': 'application/x-www-form-urlencoded',
276285
'Accept': 'application/json'
277286
}
287+
278288
# Params as a list of tuples to allow for multiple parameters added
279289
catalogs_request = []
280-
if not page:
281-
page = params.pop('page', None)
282-
if not pagesize:
283-
pagesize = params.pop('pagesize', None)
290+
page = page or params.pop('page', None)
291+
pagesize = pagesize or params.pop('pagesize', None)
284292

293+
# Add pagination if specified
285294
if page is not None:
286295
catalogs_request.append(('page', page))
287296
if pagesize is not None:
288297
catalogs_request.append(('pagesize', pagesize))
289298

299+
# Populate parameters based on `use_json`
290300
if not use_json:
291-
# Decompose filters, sort
292-
for prop, value in kwargs.items():
293-
params[prop] = value
301+
# When not using JSON, merge kwargs into params and build query
302+
params.update(kwargs)
294303
catalogs_request.extend(self._build_catalogs_params(params))
295304
else:
296305
headers['Content-Type'] = 'application/json'
@@ -307,9 +316,10 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j
307316
catalogs_request = params_dict
308317

309318
# Removing single-element lists. Single values will live on their own (except for `sort_by`)
310-
for key in catalogs_request.keys():
311-
if (key != 'sort_by') & (len(catalogs_request[key]) == 1):
312-
catalogs_request[key] = catalogs_request[key][0]
319+
catalogs_request = {
320+
k: v if k == 'sort_by' or len(v) > 1 else v[0]
321+
for k, v in params_dict.items()
322+
}
313323

314324
# Otherwise, catalogs_request can remain as the original params dict
315325
else:
@@ -318,6 +328,40 @@ def service_request_async(self, service, params, pagesize=None, page=None, use_j
318328
response = self._request('POST', request_url, data=catalogs_request, headers=headers, use_json=use_json)
319329
return response
320330

331+
@class_or_instance
332+
def missions_request_async(self, service, params):
333+
"""
334+
Builds and executes an asynchronous query to the MAST Search API.
335+
Parameters
336+
----------
337+
service : str
338+
The MAST Search API service to query. Should be present in self.SERVICES.
339+
params : dict
340+
JSON object containing service parameters.
341+
Returns
342+
-------
343+
response : list of `~requests.Response`
344+
"""
345+
service_config = self.SERVICES.get(service.lower())
346+
request_url = self.REQUEST_URL + service_config.get('path')
347+
348+
# Default headers
349+
headers = {
350+
'User-Agent': self._session.headers['User-Agent'],
351+
'Content-Type': 'application/json',
352+
'Accept': 'application/json'
353+
}
354+
355+
# make request
356+
data, params = (params, None)
357+
response = self._request(method='POST',
358+
url=request_url,
359+
params=params,
360+
data=data,
361+
headers=headers,
362+
use_json=True)
363+
return response
364+
321365
def _build_catalogs_params(self, params):
322366
"""
323367
Gathers parameters for Catalogs.MAST usage and translates to valid API syntax tuples
@@ -387,12 +431,6 @@ def check_catalogs_criteria_params(self, criteria):
387431
response : boolean
388432
Whether the passed dict has at least one criteria parameter
389433
"""
390-
criteria_check = False
391-
non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"]
392-
criteria_keys = criteria.keys()
393-
for key in criteria_keys:
394-
if key not in non_criteria_params:
395-
criteria_check = True
396-
break
397434

398-
return criteria_check
435+
non_criteria_params = ["columns", "sort_by", "page_size", "pagesize", "page"]
436+
return any(key not in non_criteria_params for key in criteria)

astroquery/mast/tests/data/README.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,14 @@ To generate `~astroquery.mast.tests.data.panstarrs_columns.json`, use the follow
2525
>>> resp = utils._simple_request('https://catalogs.mast.stsci.edu/api/v0.1/panstarrs/dr2/mean/metadata.json')
2626
>>> with open('panstarrs_columns.json', 'w') as file:
2727
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP
28+
29+
To generate `~astroquery.mast.tests.data.mission_products.json`, use the following:
30+
31+
.. doctest-remote-data::
32+
33+
>>> import json
34+
>>> from astroquery.mast import utils
35+
...
36+
>>> resp = utils._simple_request('https://mast.stsci.edu/search/hst/api/v0.1/list_products', {'dataset_ids': 'Z14Z0104T'})
37+
>>> with open('panstarrs_columns.json', 'w') as file:
38+
... json.dump(resp.json(), file, indent=4) # doctest: +SKIP
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"products": [
3+
{
4+
"product_key": "Z14Z0104T_z14z0104t_shf.fits",
5+
"access": "PUBLIC",
6+
"dataset": "Z14Z0104T",
7+
"instrument_name": "HRS ",
8+
"filters": "MIRROR-N2",
9+
"filename": "z14z0104t_shf.fits",
10+
"uri": "Z14Z0104T/z14z0104t_shf.fits",
11+
"authz_primary_identifier": "Z14Z0104T",
12+
"authz_secondary_identifier": "CAL",
13+
"file_suffix": "SHF",
14+
"category": "UNCALIBRATED",
15+
"size": 31680,
16+
"type": "science"
17+
},
18+
{
19+
"product_key": "Z14Z0104T_z14z0104t_trl.fits",
20+
"access": "PUBLIC",
21+
"dataset": "Z14Z0104T",
22+
"instrument_name": "HRS ",
23+
"filters": "MIRROR-N2",
24+
"filename": "z14z0104t_trl.fits",
25+
"uri": "Z14Z0104T/z14z0104t_trl.fits",
26+
"authz_primary_identifier": "Z14Z0104T",
27+
"authz_secondary_identifier": "CAL",
28+
"file_suffix": "TRL",
29+
"category": "AUX",
30+
"size": 17280,
31+
"type": "science"
32+
},
33+
{
34+
"product_key": "Z14Z0104T_z14z0104t_ulf.fits",
35+
"access": "PUBLIC",
36+
"dataset": "Z14Z0104T",
37+
"instrument_name": "HRS ",
38+
"filters": "MIRROR-N2",
39+
"filename": "z14z0104t_ulf.fits",
40+
"uri": "Z14Z0104T/z14z0104t_ulf.fits",
41+
"authz_primary_identifier": "Z14Z0104T",
42+
"authz_secondary_identifier": "CAL",
43+
"file_suffix": "ULF",
44+
"category": "UNCALIBRATED",
45+
"size": 14400,
46+
"type": "science"
47+
},
48+
{
49+
"product_key": "Z14Z0104T_z14z0104t_pdq.fits",
50+
"access": "PUBLIC",
51+
"dataset": "Z14Z0104T",
52+
"instrument_name": "HRS ",
53+
"filters": "MIRROR-N2",
54+
"filename": "z14z0104t_pdq.fits",
55+
"uri": "Z14Z0104T/z14z0104t_pdq.fits",
56+
"authz_primary_identifier": "Z14Z0104T",
57+
"authz_secondary_identifier": "PDQ",
58+
"file_suffix": "PDQ",
59+
"category": "AUX",
60+
"size": 11520,
61+
"type": "science"
62+
},
63+
{
64+
"product_key": "Z14Z0104T_z14z0104x_ocx.fits",
65+
"access": "PUBLIC",
66+
"dataset": "Z14Z0104T",
67+
"instrument_name": "HRS ",
68+
"filters": "MIRROR-N2",
69+
"filename": "z14z0104x_ocx.fits",
70+
"uri": "Z14Z0104T/z14z0104x_ocx.fits",
71+
"authz_primary_identifier": "Z14Z0104X",
72+
"authz_secondary_identifier": "OCX",
73+
"file_suffix": "OCX",
74+
"category": "OTHER",
75+
"size": 11520,
76+
"type": "science"
77+
}
78+
]
79+
}

0 commit comments

Comments
 (0)