Skip to content

Commit 5087451

Browse files
committed
Finish implementations, docs, tests
1 parent 5240817 commit 5087451

File tree

7 files changed

+217
-89
lines changed

7 files changed

+217
-89
lines changed

astroquery/mast/missions.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -472,11 +472,21 @@ def filter_products(self, products, *, extension=None, **filters):
472472
extension : string or array, optional
473473
Default is None. Filters by file extension(s), matching any specified extensions.
474474
**filters :
475-
Column-based filters to be applied.
475+
Column-based filters to apply to the products table.
476+
476477
Each keyword corresponds to a column name in the table, with the argument being one or more
477478
acceptable values for that column. AND logic is applied between filters, OR logic within
478479
each filter set.
479-
For example: type="science", extension=["fits","jpg"]
480+
481+
For example:
482+
type="science", extension=["fits", "jpg"]
483+
484+
For columns with numeric data types (int or float), filter values can be expressed
485+
in several ways:
486+
- A single number: size=100
487+
- A range in the form "start..end": size="100..1000"
488+
- A comparison operator followed by a number: size=">=1000"
489+
- A list of expressions (OR logic): size=[100, "500..1000", ">=1500"]
480490
481491
Returns
482492
-------
@@ -504,21 +514,20 @@ def filter_products(self, products, *, extension=None, **filters):
504514
continue
505515

506516
col_data = products[colname]
507-
# If the column is an integer or float, treat differently
508-
if col_data.dtype.kind in 'if' and isinstance(vals, str):
517+
# If the column is an integer or float, accept numeric filters
518+
if col_data.dtype.kind in 'if':
509519
try:
510520
col_mask = utils.parse_numeric_product_filter(vals)(col_data)
511521
except ValueError:
512522
warnings.warn(f"Could not parse numeric filter '{vals}' for column '{colname}'.", InputWarning)
513523
continue
514-
else:
524+
else: # Assume string or list filter
515525
if isinstance(vals, str):
516526
vals = [vals]
517527
col_mask = np.isin(col_data, vals)
518528

519529
filter_mask &= col_mask
520530

521-
# Return filtered products
522531
return products[filter_mask]
523532

524533
def download_file(self, uri, *, local_path=None, cache=True, verbose=True):

astroquery/mast/observations.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ def get_product_list_async(self, observations):
545545

546546
def filter_products(self, products, *, mrp_only=False, extension=None, **filters):
547547
"""
548-
Takes an `~astropy.table.Table` of MAST observation data products and filters it based on given filters.
548+
Filters an `~astropy.table.Table` of data products based on given filters.
549549
550550
Parameters
551551
----------
@@ -556,47 +556,67 @@ def filter_products(self, products, *, mrp_only=False, extension=None, **filters
556556
extension : string or array, optional
557557
Default None. Option to filter by file extension.
558558
**filters :
559-
Filters to be applied. Valid filters are all products fields listed
559+
Column-based filters to apply to the products table. Valid filters are all products fields listed
560560
`here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__.
561-
The column name is the keyword, with the argument being one or more acceptable values
562-
for that parameter.
563-
Filter behavior is AND between the filters and OR within a filter set.
564-
For example: productType="SCIENCE",extension=["fits","jpg"]
561+
562+
Each keyword corresponds to a column name in the table, with the argument being one or more
563+
acceptable values for that column. AND logic is applied between filters, OR logic within
564+
each filter set.
565+
566+
For example:
567+
type="science", extension=["fits", "jpg"]
568+
569+
For columns with numeric data types (int or float), filter values can be expressed
570+
in several ways:
571+
- A single number: size=100
572+
- A range in the form "start..end": size="100..1000"
573+
- A comparison operator followed by a number: size=">=1000"
574+
- A list of expressions (OR logic): size=[100, "500..1000", ">=1500"]
565575
566576
Returns
567577
-------
568578
response : `~astropy.table.Table`
579+
Filtered table of data products.
569580
"""
570581

571582
filter_mask = np.full(len(products), True, dtype=bool)
572583

573-
# Applying the special filters (mrp_only and extension)
584+
# Filter by minimum recommended products (MRP) if specified
574585
if mrp_only:
575586
filter_mask &= (products['productGroupDescription'] == "Minimum Recommended Products")
576587

588+
# Filter by file extension, if provided
577589
if extension:
578-
if isinstance(extension, str):
579-
extension = [extension]
580-
581-
mask = np.full(len(products), False, dtype=bool)
582-
for elt in extension:
583-
mask |= [False if isinstance(x, np.ma.core.MaskedConstant) else x.endswith(elt)
584-
for x in products["productFilename"]]
585-
filter_mask &= mask
586-
587-
# Applying the rest of the filters
590+
extensions = [extension] if isinstance(extension, str) else extension
591+
ext_mask = np.array(
592+
[not isinstance(x, np.ma.core.MaskedConstant) and any(x.endswith(ext) for ext in extensions)
593+
for x in products["productFilename"]],
594+
dtype=bool
595+
)
596+
filter_mask &= ext_mask
597+
598+
# Applying column-based filters
588599
for colname, vals in filters.items():
600+
if colname not in products.colnames:
601+
warnings.warn(f"Column '{colname}' not found in product table.", InputWarning)
602+
continue
589603

590-
if isinstance(vals, str):
591-
vals = [vals]
592-
593-
mask = np.full(len(products), False, dtype=bool)
594-
for elt in vals:
595-
mask |= (products[colname] == elt)
596-
597-
filter_mask &= mask
598-
599-
return products[np.where(filter_mask)]
604+
col_data = products[colname]
605+
# If the column is an integer or float, accept numeric filters
606+
if col_data.dtype.kind in 'if':
607+
try:
608+
col_mask = utils.parse_numeric_product_filter(vals)(col_data)
609+
except ValueError:
610+
warnings.warn(f"Could not parse numeric filter '{vals}' for column '{colname}'.", InputWarning)
611+
continue
612+
else: # Assume string or list filter
613+
if isinstance(vals, str):
614+
vals = [vals]
615+
col_mask = np.isin(col_data, vals)
616+
617+
filter_mask &= col_mask
618+
619+
return products[filter_mask]
600620

601621
def download_file(self, uri, *, local_path=None, base_url=None, cache=True, cloud_only=False, verbose=True):
602622
"""

astroquery/mast/tests/test_mast.py

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,11 +371,59 @@ def test_missions_filter_products(patch_post):
371371
assert isinstance(filtered, Table)
372372
assert all(filtered['category'] == 'CALIBRATED')
373373

374+
# Filter by extension
375+
filtered = mast.MastMissions.filter_products(products,
376+
extension='fits')
377+
assert len(filtered) > 0
378+
374379
# Filter by non-existing column
375380
with pytest.warns(InputWarning):
376381
mast.MastMissions.filter_products(products,
377382
invalid=True)
378383

384+
# Numeric filtering
385+
# Single integer value
386+
filtered = mast.MastMissions.filter_products(products,
387+
size=11520)
388+
assert all(filtered['size'] == 11520)
389+
390+
# Single string value
391+
filtered = mast.MastMissions.filter_products(products,
392+
size='11520')
393+
assert all(filtered['size'] == 11520)
394+
395+
# Comparison operators
396+
filtered = mast.MastMissions.filter_products(products,
397+
size='<15000')
398+
assert all(filtered['size'] < 15000)
399+
400+
filtered = mast.MastMissions.filter_products(products,
401+
size='>15000')
402+
assert all(filtered['size'] > 15000)
403+
404+
filtered = mast.MastMissions.filter_products(products,
405+
size='>=14400')
406+
assert all(filtered['size'] >= 14400)
407+
408+
filtered = mast.MastMissions.filter_products(products,
409+
size='<=14400')
410+
assert all(filtered['size'] <= 14400)
411+
412+
# Range operator
413+
filtered = mast.MastMissions.filter_products(products,
414+
size='14400..17280')
415+
assert all((filtered['size'] >= 14400) & (filtered['size'] <= 17280))
416+
417+
# List of expressions
418+
filtered = mast.MastMissions.filter_products(products,
419+
size=[14400, '>20000'])
420+
assert all((filtered['size'] == 14400) | (filtered['size'] > 20000))
421+
422+
with pytest.warns(InputWarning, match="Could not parse numeric filter 'invalid' for column 'size'"):
423+
# Invalid filter value
424+
mast.MastMissions.filter_products(products,
425+
size='invalid')
426+
379427

380428
def test_missions_download_products(patch_post, tmp_path):
381429
# Check string input
@@ -670,11 +718,36 @@ def test_observations_get_product_list(patch_post):
670718

671719
def test_observations_filter_products(patch_post):
672720
products = mast.Observations.get_product_list('2003738726')
673-
result = mast.Observations.filter_products(products,
674-
productType=["SCIENCE"],
675-
mrp_only=False)
676-
assert isinstance(result, Table)
677-
assert len(result) == 7
721+
filtered = mast.Observations.filter_products(products,
722+
productType=["SCIENCE"],
723+
mrp_only=False)
724+
assert isinstance(filtered, Table)
725+
assert len(filtered) == 7
726+
727+
# Filter for minimum recommended products
728+
filtered = mast.Observations.filter_products(products,
729+
mrp_only=True)
730+
assert all(filtered['productGroupDescription'] == 'Minimum Recommended Products')
731+
732+
# Filter by extension
733+
filtered = mast.Observations.filter_products(products,
734+
extension='fits')
735+
assert len(filtered) > 0
736+
737+
# Filter by non-existing column
738+
with pytest.warns(InputWarning):
739+
mast.Observations.filter_products(products,
740+
invalid=True)
741+
742+
# Numeric filtering
743+
filtered = mast.Observations.filter_products(products,
744+
size='<50000')
745+
assert all(filtered['size'] < 50000)
746+
747+
# Numeric filter that cannot be parsed
748+
with pytest.warns(InputWarning, match="Could not parse numeric filter 'invalid' for column 'size'"):
749+
filtered = mast.Observations.filter_products(products,
750+
size='invalid')
678751

679752

680753
def test_observations_download_products(patch_post, tmpdir):

astroquery/mast/utils.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -354,27 +354,42 @@ def parse_numeric_product_filter(val):
354354
355355
Parameters
356356
----------
357-
val : str
358-
The filter value as a string. It can be a single number, a range in the form of "start..end",
359-
or a comparison operator followed by a number (e.g., ">=10", "<5", ">100.5", etc.).
357+
val : str or list of str
358+
The filter value(s). Each entry can be:
359+
- A single number (e.g., "100")
360+
- A range in the form "start..end" (e.g., "100..200")
361+
- A comparison operator followed by a number (e.g., ">=10", "<5", ">100.5")
360362
361363
Returns
362364
-------
363365
response : function
364366
A function that takes a column of a product table and returns a boolean mask indicating
365367
which rows satisfy the filter condition.
366368
"""
369+
# Regular expression to match range patterns
367370
range_pattern = re.compile(r'[+-]?(\d+(\.\d*)?|\.\d+)\.\.[+-]?(\d+(\.\d*)?|\.\d+)')
368-
if val.startswith('>='):
369-
return lambda col: col >= float(val[2:])
370-
elif val.startswith('<='):
371-
return lambda col: col <= float(val[2:])
372-
elif val.startswith('>'):
373-
return lambda col: col > float(val[1:])
374-
elif val.startswith('<'):
375-
return lambda col: col < float(val[1:])
376-
elif range_pattern.fullmatch(val):
377-
start, end = map(float, val.split('..'))
378-
return lambda col: (col >= start) & (col <= end)
371+
372+
def single_condition(cond):
373+
"""Helper function to create a condition function for a single value."""
374+
if isinstance(cond, (int, float)):
375+
return lambda col: col == float(cond)
376+
if cond.startswith('>='):
377+
return lambda col: col >= float(cond[2:])
378+
elif cond.startswith('<='):
379+
return lambda col: col <= float(cond[2:])
380+
elif cond.startswith('>'):
381+
return lambda col: col > float(cond[1:])
382+
elif cond.startswith('<'):
383+
return lambda col: col < float(cond[1:])
384+
elif range_pattern.fullmatch(cond):
385+
start, end = map(float, cond.split('..'))
386+
return lambda col: (col >= start) & (col <= end)
387+
else:
388+
return lambda col: col == float(cond)
389+
390+
if isinstance(val, list):
391+
# If val is a list, create a condition for each value and combine them with logical OR
392+
conditions = [single_condition(v) for v in val]
393+
return lambda col: np.logical_or.reduce([cond(col) for cond in conditions])
379394
else:
380-
return lambda col: col == float(val)
395+
return single_condition(val)

docs/mast/mast_cut.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ To access sector information for a particular coordinate, object, or moving targ
196196
-------------- ------ ------ ---
197197
tess-s0008-1-1 8 1 1
198198
tess-s0034-1-2 34 1 2
199+
tess-s0061-1-2 61 1 2
200+
tess-s0088-1-2 88 1 2
199201

200202
Note that because of the delivery cadence of the
201203
TICA high level science products, later sectors will be available sooner with TICA than with
@@ -242,6 +244,7 @@ The following example requests SPOC cutouts for a moving target.
242244
tess-s0029-1-4 29 1 4
243245
tess-s0043-3-3 43 3 3
244246
tess-s0044-2-4 44 2 4
247+
tess-s0092-4-3 92 4 3
245248

246249
Note that the moving targets functionality is not currently available for TICA,
247250
so the query will always default to SPOC.

docs/mast/mast_missions.rst

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -243,21 +243,27 @@ In many cases, you will not need to download every product that is associated wi
243243
and any other of the product fields.
244244

245245
The **AND** operation is performed for a list of filters, and the **OR** operation is performed within a filter set.
246-
For example, the filter below will return FITS products that are "science" type **and** have a ``file_suffix`` of "ASN" (association
247-
files) **or** "JIF" (job information files).
246+
247+
For columns with numeric data types (``int`` or ``float``), filter values can be expressed in several ways:
248+
- A single number: ``size=100``
249+
- A range in the form "start..end": ``size="100..1000"``
250+
- A comparison operator followed by a number: ``size=">=1000"``
251+
- A list of expressions (OR logic): ``size=[100, "500..1000", ">=1500"]``
252+
253+
The filter below returns FITS products that are "science" type **and** less than or equal to 20,000 bytes in size
254+
**and** have a ``file_suffix`` of "ASN" (association files) **or** "JIF" (job information files).
248255

249256
.. doctest-remote-data::
250257
>>> filtered = missions.filter_products(products,
251258
... extension='fits',
252259
... type='science',
260+
... size='<=20000',
253261
... file_suffix=['ASN', 'JIF'])
254262
>>> print(filtered) # doctest: +IGNORE_OUTPUT
255263
product_key access dataset ... category size type
256264
---------------------------- ------ --------- ... -------------- ----- -------
257265
JBTAA0010_jbtaa0010_asn.fits PUBLIC JBTAA0010 ... AUX 11520 science
258-
JBTAA0010_jbtaa0010_jif.fits PUBLIC JBTAA0010 ... JITTER/SUPPORT 60480 science
259266
JBTAA0020_jbtaa0020_asn.fits PUBLIC JBTAA0020 ... AUX 11520 science
260-
JBTAA0020_jbtaa0020_jif.fits PUBLIC JBTAA0020 ... JITTER/SUPPORT 60480 science
261267

262268
Downloading Data Products
263269
-------------------------

0 commit comments

Comments
 (0)