Skip to content

Commit 30c69ee

Browse files
committed
Validate panstarrs criteria
1 parent 18772c5 commit 30c69ee

File tree

8 files changed

+2527
-12
lines changed

8 files changed

+2527
-12
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ mast
225225

226226
- Present users with an error when nonexistent query criteria are used in ``mast.MastMissions`` query functions. [#3126]
227227

228+
- Present users with an error when nonexistent query criteria are used in ``mast.Catalogs.query_region`` and
229+
``mast.Catalogs.query_object`` [#3126]
230+
228231
mpc
229232
^^^
230233

astroquery/mast/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ class Conf(_config.ConfigNamespace):
2020
ssoserver = _config.ConfigItem(
2121
'https://ssoportal.stsci.edu',
2222
'MAST SSO Portal server.')
23+
catalogs_server = _config.ConfigItem(
24+
'https://catalogs.mast.stsci.edu',
25+
'Catalogs.MAST server.')
2326
timeout = _config.ConfigItem(
2427
600,
2528
'Time limit for requests from the STScI server.')

astroquery/mast/collections.py

Lines changed: 125 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
"""
88

99
import difflib
10+
from json import JSONDecodeError
1011
import warnings
1112
import os
1213
import time
1314

14-
from requests import HTTPError
15+
from requests import HTTPError, RequestException
1516

1617
import astropy.units as u
1718
import astropy.coordinates as coord
@@ -22,7 +23,7 @@
2223
from ..utils.class_or_instance import class_or_instance
2324
from ..exceptions import InvalidQueryError, MaxResultsWarning, InputWarning
2425

25-
from . import utils
26+
from . import utils, conf
2627
from .core import MastQueryWithLogin
2728

2829

@@ -43,11 +44,13 @@ def __init__(self):
4344

4445
services = {"panstarrs": {"path": "panstarrs/{data_release}/{table}.json",
4546
"args": {"data_release": "dr2", "table": "mean"}}}
47+
self._catalogs_mast_search_options = ['columns', 'sort_by', 'table', 'data_release']
4648

4749
self._service_api_connection.set_service_params(services, "catalogs", True)
4850

4951
self.catalog_limit = None
5052
self._current_connection = None
53+
self._service_columns = dict() # Info about columns for Catalogs.MAST services
5154

5255
def _parse_result(self, response, *, verbose=False):
5356

@@ -59,6 +62,99 @@ def _parse_result(self, response, *, verbose=False):
5962

6063
return results_table
6164

65+
def _get_service_col_config(self, catalog, release='dr2', table='mean'):
66+
"""
67+
For a given Catalogs.MAST catalog, return a list of all searchable columns and their descriptions.
68+
As of now, this function is exclusive to the Pan-STARRS catalog.
69+
70+
Parameters
71+
----------
72+
catalog : str
73+
The catalog to be queried.
74+
release : str, optional
75+
Catalog data release to query from.
76+
table : str, optional
77+
Catalog table to query from.
78+
79+
Returns
80+
-------
81+
response : `~astropy.table.Table` that contains columns names, types, and descriptions
82+
"""
83+
# Only supported for PanSTARRS currently
84+
if catalog != 'panstarrs':
85+
return
86+
87+
service_key = (catalog, release, table)
88+
if service_key not in self._service_columns:
89+
try:
90+
# Send server request to get column list for given parameters
91+
request_url = f'{conf.catalogs_server}/api/v0.1/{catalog}/{release}/{table}/metadata.json'
92+
resp = utils._simple_request(request_url)
93+
94+
# Parse JSON and extract necessary info
95+
results = resp.json()
96+
rows = [
97+
(result['column_name'], result['db_type'], result['description'])
98+
for result in results
99+
]
100+
101+
# Create Table with parsed data
102+
col_table = Table(rows=rows, names=('name', 'data_type', 'description'))
103+
self._service_columns[service_key] = col_table
104+
105+
except JSONDecodeError as ex:
106+
raise JSONDecodeError(f'Failed to decode JSON response while attempting to get column list'
107+
f' for {catalog} catalog {table}, {release}: {ex}')
108+
except RequestException as ex:
109+
raise ConnectionError(f'Failed to connect to the server while attempting to get column list'
110+
f' for {catalog} catalog {table}, {release}: {ex}')
111+
except KeyError as ex:
112+
raise KeyError(f'Expected key not found in response data while attempting to get column list'
113+
f' for {catalog} catalog {table}, {release}: {ex}')
114+
except Exception as ex:
115+
raise RuntimeError(f'An unexpected error occurred while attempting to get column list'
116+
f' for {catalog} catalog {table}, {release}: {ex}')
117+
118+
return self._service_columns[service_key]
119+
120+
def _validate_service_criteria(self, catalog, **criteria):
121+
"""
122+
Check that criteria keyword arguments are valid column names for the service.
123+
Raises InvalidQueryError if a criteria argument is invalid.
124+
125+
Parameters
126+
----------
127+
catalog : str
128+
The catalog to be queried.
129+
**criteria
130+
Keyword arguments representing criteria filters to apply.
131+
132+
Raises
133+
-------
134+
InvalidQueryError
135+
If a keyword does not match any valid column names, an error is raised that suggests the closest
136+
matching column name, if available.
137+
"""
138+
# Ensure that self._service_columns is populated
139+
release = criteria.get('data_release', 'dr2')
140+
table = criteria.get('table', 'mean')
141+
col_config = self._get_service_col_config(catalog, release, table)
142+
143+
if col_config:
144+
# Check each criteria argument for validity
145+
valid_cols = list(col_config['name']) + self._catalogs_mast_search_options
146+
for kwd in criteria.keys():
147+
col = next((name for name in valid_cols if name.lower() == kwd.lower()), None)
148+
if not col:
149+
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
150+
error_msg = (
151+
f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}. "
152+
f"Did you mean '{closest_match[0]}'?"
153+
if closest_match
154+
else f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}."
155+
)
156+
raise InvalidQueryError(error_msg)
157+
62158
@class_or_instance
63159
def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
64160
version=None, pagesize=None, page=None, **criteria):
@@ -92,7 +188,15 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
92188
**criteria
93189
Other catalog-specific keyword args.
94190
These can be found in the (service documentation)[https://mast.stsci.edu/api/v0/_services.html]
95-
for specific catalogs. For example one can specify the magtype for an HSC search.
191+
for specific catalogs. For example, one can specify the magtype for an HSC search.
192+
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
193+
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
194+
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
195+
consisting of a list of column names. Results may also be sorted through the query with the parameter
196+
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
197+
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
198+
Detailed information of Catalogs.MAST criteria usage can
199+
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
96200
97201
Returns
98202
-------
@@ -110,21 +214,24 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
110214
'dec': coordinates.dec.deg,
111215
'radius': radius.deg}
112216

113-
# valid criteria keywords
114-
valid_criteria = []
115-
116217
# Determine API connection and service name
117218
if catalog.lower() in self._service_api_connection.SERVICES:
118219
self._current_connection = self._service_api_connection
119220
service = catalog
120221

222+
# validate user criteria
223+
self._validate_service_criteria(catalog.lower(), **criteria)
224+
121225
# adding additional user specified parameters
122226
for prop, value in criteria.items():
123227
params[prop] = value
124228

125229
else:
126230
self._current_connection = self._portal_api_connection
127231

232+
# valid criteria keywords
233+
valid_criteria = []
234+
128235
# Sorting out the non-standard portal service names
129236
if catalog.lower() == "hsc":
130237
if version == 2:
@@ -217,7 +324,15 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
217324
**criteria
218325
Catalog-specific keyword args.
219326
These can be found in the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__.
220-
for specific catalogs. For example one can specify the magtype for an HSC search.
327+
for specific catalogs. For example, one can specify the magtype for an HSC search.
328+
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
329+
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
330+
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
331+
consisting of a list of column names. Results may also be sorted through the query with the parameter
332+
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
333+
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
334+
Detailed information of Catalogs.MAST criteria usage can
335+
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
221336
222337
Returns
223338
-------
@@ -298,6 +413,9 @@ def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria)
298413
self._current_connection = self._service_api_connection
299414
service = catalog
300415

416+
# validate user criteria
417+
self._validate_service_criteria(catalog.lower(), **criteria)
418+
301419
if not self._current_connection.check_catalogs_criteria_params(criteria):
302420
raise InvalidQueryError("At least one non-positional criterion must be supplied.")
303421

astroquery/mast/missions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,14 @@ def _validate_criteria(self, **criteria):
8888
If a keyword does not match any valid column names, an error is raised that suggests the closest
8989
matching column name, if available.
9090
"""
91-
# Ensure that self.columns in populated
91+
# Ensure that self.columns is populated
9292
self.get_column_list()
9393

9494
# Check each criteria argument for validity
95-
valid_cols = self.columns[self.mission]['name']
95+
valid_cols = list(self.columns[self.mission]['name']) + self._search_option_fields
9696
for kwd in criteria.keys():
97-
if kwd not in valid_cols and kwd not in self._search_option_fields:
97+
col = next((name for name in valid_cols if name.lower() == kwd.lower()), None)
98+
if not col:
9899
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
99100
error_msg = (
100101
f"Filter '{kwd}' does not exist. Did you mean '{closest_match[0]}'?"

astroquery/mast/tests/data/README.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,14 @@ To generate `~astroquery.mast.tests.data.mission_columns.json`, use the followin
1414
>>> resp = utils._simple_request('https://mast.stsci.edu/search/util/api/v0.1/column_list', {'mission': 'hst'})
1515
>>> with open('mission_columns.json', 'w') as file:
1616
... json.dump(resp.json(), file, indent=4)
17+
18+
To generate `~astroquery.mast.tests.data.panstarrs_columns.json`, use the following:
19+
20+
.. doctest-remote-data::
21+
22+
>>> import json
23+
>>> from astroquery.mast import utils
24+
...
25+
>>> resp = utils._simple_request('https://catalogs.mast.stsci.edu/api/v0.1/panstarrs/dr2/mean/metadata.json')
26+
>>> with open('panstarrs_columns.json', 'w') as file:
27+
... json.dump(resp.json(), file, indent=4)

0 commit comments

Comments
 (0)