Skip to content

Commit 6fe4c2b

Browse files
snbiancobsipocz
authored andcommitted
Move logic into get_cloud_uris()
1 parent 9bd2c06 commit 6fe4c2b

File tree

4 files changed

+88
-74
lines changed

4 files changed

+88
-74
lines changed

CHANGES.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,7 @@ mast
169169

170170
- Fix bug in ``Catalogs.query_criteria()`` to use ``page`` and ``pagesize`` parameters correctly. [#3065]
171171

172-
- Add ``mast.Observations.get_cloud_uris_query`` method so that given a set of query criteria and optional filters,
173-
the user receives a list of cloud data URIs for matching data products. [#3064]
172+
- Modify ``mast.Observations.get_cloud_uris`` to also accept query criteria and data product filters. [#3064]
174173

175174

176175
0.4.7 (2024-03-08)

astroquery/mast/observations.py

Lines changed: 58 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -774,26 +774,56 @@ def download_products(self, products, *, download_dir=None, flat=False,
774774

775775
return manifest
776776

777-
def get_cloud_uris(self, data_products, *, include_bucket=True, full_url=False):
777+
def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=False, pagesize=None, page=None,
778+
mrp_only=False, extension=None, filter_products={}, **criteria):
778779
"""
779-
Takes an `~astropy.table.Table` of data products and returns the associated cloud data uris.
780+
Given an `~astropy.table.Table` of data products or query criteria and filter parameters,
781+
returns the associated cloud data URIs.
780782
781783
Parameters
782784
----------
783785
data_products : `~astropy.table.Table`
784-
Table containing products to be converted into cloud data uris.
786+
Table containing products to be converted into cloud data uris. If provided, this will supercede
787+
page_size, page, or any arguments passed in as **criteria.
785788
include_bucket : bool
786-
Default True. When false returns the path of the file relative to the
789+
Default True. When False, returns the path of the file relative to the
787790
top level cloud storage location.
788791
Must be set to False when using the full_url argument.
789792
full_url : bool
790793
Default False. Return an HTTP fetchable url instead of a cloud uri.
791794
Must set include_bucket to False to use this option.
795+
pagesize : int, optional
796+
Default None. Can be used to override the default pagesize when making a query.
797+
E.g. when using a slow internet connection. Query criteria must also be provided.
798+
page : int, optional
799+
Default None. Can be used to override the default behavior of all results being returned for a query
800+
to obtain one specific page of results. Query criteria must also be provided.
801+
mrp_only : bool, optional
802+
Default False. When set to True, only "Minimum Recommended Products" will be returned.
803+
extension : string or array, optional
804+
Default None. Option to filter by file extension.
805+
filter_products : dict, optional
806+
Filters to be applied to data products. Valid filters are all products fields listed
807+
`here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__.
808+
The column name as a string is the key. The corresponding value is one
809+
or more acceptable values for that parameter.
810+
Filter behavior is AND between the filters and OR within a filter set.
811+
For example: {"productType": "SCIENCE", "extension"=["fits","jpg"]}
812+
**criteria
813+
Criteria to apply. At least one non-positional criteria must be supplied.
814+
Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`),
815+
and all observation fields returned by the ``get_metadata("observations")``.
816+
The Column Name is the keyword, with the argument being one or more acceptable values for that parameter,
817+
except for fields with a float datatype where the argument should be in the form [minVal, maxVal].
818+
For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however
819+
only one wildcarded value can be processed per criterion.
820+
RA and Dec must be given in decimal degrees, and datetimes in MJD.
821+
For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914]
792822
793823
Returns
794824
-------
795825
response : list
796-
List of URIs generated from the data products, list way contain entries that are None
826+
List of URIs generated from the data products. May contain entries that are None
797827
if data_products includes products not found in the cloud.
798828
"""
799829

@@ -802,6 +832,29 @@ def get_cloud_uris(self, data_products, *, include_bucket=True, full_url=False):
802832
'Please enable anonymous cloud access by calling `enable_cloud_dataset` method. '
803833
'Refer to `~astroquery.mast.ObservationsClass.enable_cloud_dataset` documentation for more info.')
804834

835+
if data_products is None:
836+
if not criteria:
837+
raise InvalidQueryError(
838+
'Please provide either a `~astropy.table.Table` of data products or query criteria.'
839+
)
840+
else:
841+
# Get table of observations based on query criteria
842+
obs = self.query_criteria(pagesize=pagesize, page=page, **criteria)
843+
844+
if not len(obs):
845+
# Warning raised by ~astroquery.mast.ObservationsClass.query_criteria
846+
return
847+
848+
# Return list of associated data products
849+
data_products = self.get_product_list(obs)
850+
851+
# Filter product list
852+
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension, **filter_products)
853+
854+
if not len(data_products):
855+
warnings.warn("No matching products to fetch associated cloud URIs.", NoResultsWarning)
856+
return
857+
805858
# Remove duplicate products
806859
data_products = self._remove_duplicate_products(data_products)
807860

@@ -841,59 +894,6 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
841894
# Query for product URIs
842895
return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url)
843896

844-
def get_cloud_uris_query(self, *, pagesize=None, page=None, mrp_only=False, extension=None,
845-
filter_products={}, **criteria):
846-
"""
847-
Given a set of criteria and optional filters, get a list of matching data products and return their
848-
associated cloud data URIs.
849-
850-
Parameters
851-
----------
852-
pagesize : int, optional
853-
Default None. Can be used to override the default pagesize.
854-
E.g. when using a slow internet connection.
855-
page : int, optional
856-
Default None. Can be used to override the default behavior of all results being returned to obtain
857-
one specific page of results.
858-
mrp_only : bool, optional
859-
Default False. When set to True, only "Minimum Recommended Products" will be returned.
860-
extension : string or array, optional
861-
Default None. Option to filter by file extension.
862-
filter_products : dict, optional
863-
Filters to be applied. Valid filters are all products fields listed
864-
`here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__.
865-
The column name as a string is the key. The corresponding value is one
866-
or more acceptable values for that parameter.
867-
Filter behavior is AND between the filters and OR within a filter set.
868-
For example: {"productType": "SCIENCE", "extension"=["fits","jpg"]}
869-
**criteria
870-
Criteria to apply. At least one non-positional criteria must be supplied.
871-
Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`),
872-
and all observation fields returned by the ``get_metadata("observations")``.
873-
The Column Name is the keyword, with the argument being one or more acceptable values for that parameter,
874-
except for fields with a float datatype where the argument should be in the form [minVal, maxVal].
875-
For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however
876-
only one wildcarded value can be processed per criterion.
877-
RA and Dec must be given in decimal degrees, and datetimes in MJD.
878-
For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914]
879-
880-
Returns
881-
-------
882-
response : list
883-
884-
"""
885-
# Get table of observations based on query criteria
886-
obs = self.query_criteria(pagesize=pagesize, page=page, **criteria)
887-
888-
# Return list of associated data products
889-
prod = self.get_product_list(obs)
890-
891-
# Filter product list
892-
filt = self.filter_products(prod, mrp_only=mrp_only, extension=extension, **filter_products)
893-
894-
# Return list of cloud URIs
895-
return self.get_cloud_uris(filt)
896-
897897
def _remove_duplicate_products(self, data_products):
898898
"""
899899
Removes duplicate data products that have the same dataURI.

astroquery/mast/tests/test_mast_remote.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,11 @@ def test_get_cloud_uris(self):
544544

545545
assert len(uris) > 0, f'Products for OBSID {test_obs_id} were not found in the cloud.'
546546

547+
# check for warning if no data products match filters
548+
with pytest.warns(NoResultsWarning):
549+
Observations.get_cloud_uris(products,
550+
extension='png')
551+
547552
def test_get_cloud_uris_query(self):
548553
pytest.importorskip("boto3")
549554

@@ -559,12 +564,20 @@ def test_get_cloud_uris_query(self):
559564
s3_uris = Observations.get_cloud_uris(filt)
560565

561566
# get uris with streamlined function
562-
uris = Observations.get_cloud_uris_query(target_name=234295610,
563-
provenance_name="SPOC",
564-
sequence_number=[1, 2],
565-
filter_products={'calib_level': [2]})
567+
uris = Observations.get_cloud_uris(target_name=234295610,
568+
provenance_name="SPOC",
569+
sequence_number=[1, 2],
570+
filter_products={'calib_level': [2]})
566571
assert s3_uris == uris
567572

573+
# check that InvalidQueryError is thrown if neither data_products or **criteria are defined
574+
with pytest.raises(InvalidQueryError):
575+
Observations.get_cloud_uris(filter_products={'calib_level': [2]})
576+
577+
# check for warning if query returns no observations
578+
with pytest.warns(NoResultsWarning):
579+
Observations.get_cloud_uris(target_name=234295611)
580+
568581
######################
569582
# CatalogClass tests #
570583
######################

docs/mast/mast_obsquery.rst

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,9 @@ MAST until it is disabled with `~astroquery.mast.ObservationsClass.disable_cloud
427427
To directly access a list of cloud URIs for a given dataset, use the
428428
`~astroquery.mast.ObservationsClass.get_cloud_uris`
429429
function (Python will prompt you to enable cloud access if you haven't already).
430-
To return a list of cloud URIs based on query criteria and product filters, use the
431-
`~astroquery.mast.ObservationsClass.get_cloud_uris_query` function.
430+
With this function, users may specify a `~astropy.table.Table` of data products or
431+
query criteria. Query criteria are supplied as keyword arguments, and product filters
432+
may be supplied through the ``mrp_only``, ``extension``, and ``filter_products`` parameters.
432433

433434
When cloud access is enabled, the standard download function
434435
`~astroquery.mast.ObservationsClass.download_products` preferentially pulls files from AWS when they
@@ -462,8 +463,9 @@ To get a list of S3 URIs, use the following workflow:
462463
...
463464
>>> Observations.disable_cloud_dataset()
464465

465-
Alternatively, you can use the streamlined `~astroquery.mast.ObservationsClass.get_cloud_uris_query` function. This approach is recommended
466-
for code brevity. Query criteria are supplied as keyword arguments, and filters are supplied through the ``filter_products`` parameter.
466+
Alternatively, this workflow can be streamlined by providing the query criteria directly to `~astroquery.mast.ObservationsClass.get_cloud_uris`.
467+
This approach is recommended for code brevity. Query criteria are supplied as keyword arguments, and filters are supplied through the
468+
``filter_products`` parameter. If both ``data_products`` and query criteria are provided, ``data_products`` takes precedence.
467469

468470
.. doctest-skip::
469471

@@ -474,12 +476,12 @@ for code brevity. Query criteria are supplied as keyword arguments, and filters
474476
INFO: Using the S3 STScI public dataset [astroquery.mast.core]
475477
...
476478
>>> # Getting the cloud URIs
477-
>>> s3_uris = Observations.get_cloud_uris_query(obs_collection='HST',
478-
filters='F606W',
479-
instrument_name='ACS/WFC',
480-
proposal_id=['12062'],
481-
dataRights='PUBLIC',
482-
filter_products={'productSubGroupDescription': 'DRZ'})
479+
>>> s3_uris = Observations.get_cloud_uris(obs_collection='HST',
480+
filters='F606W',
481+
instrument_name='ACS/WFC',
482+
proposal_id=['12062'],
483+
dataRights='PUBLIC',
484+
filter_products={'productSubGroupDescription': 'DRZ'})
483485
>>> print(s3_uris)
484486
['s3://stpubdata/hst/public/jbev/jbeveo010/jbeveo010_drz.fits', 's3://stpubdata/hst/public/jbev/jbevet010/jbevet010_drz.fits']
485487
...

0 commit comments

Comments
 (0)