Skip to content

Commit 72952b7

Browse files
authored
Merge pull request #3235 from cds-astro/perf-simbad-query-region
[SIMBAD] Improve query region preformances
2 parents 50eaddd + 8c84b28 commit 72952b7

File tree

5 files changed

+96
-51
lines changed

5 files changed

+96
-51
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ simbad
6868
empty for an object, there will now be a line with masked values instead of no line in
6969
the result [#3199]
7070

71+
- perf: use a TAP upload in ``query_region`` when there are more than 300 coordinates.
72+
This prevents timeouts. [#3235]
73+
7174
xmatch
7275
^^^^^^
7376

astroquery/simbad/core.py

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from astroquery.query import BaseVOQuery
2020
from astroquery.utils import commons
21-
from astroquery.exceptions import LargeQueryWarning, NoResultsWarning
21+
from astroquery.exceptions import NoResultsWarning
2222
from astroquery.simbad.utils import (_catch_deprecated_fields_with_arguments,
2323
_wildcard_to_regexp, CriteriaTranslator,
2424
query_criteria_fields)
@@ -108,6 +108,7 @@ def __init__(self, ROW_LIMIT=None):
108108
self._server = conf.server
109109
self._tap = None
110110
self._hardlimit = None
111+
self._uploadlimit = None
111112
# attributes to construct ADQL queries
112113
self._columns_in_output = None # a list of _Column
113114
self.joins = [] # a list of _Join
@@ -166,6 +167,12 @@ def hardlimit(self):
166167
self._hardlimit = self.tap.hardlimit
167168
return self._hardlimit
168169

170+
@property
171+
def uploadlimit(self):
172+
if self._uploadlimit is None:
173+
self._uploadlimit = self.tap.get_tap_capability().uploadlimit.hard.content
174+
return self._uploadlimit
175+
169176
@property
170177
def columns_in_output(self):
171178
"""A list of _Column.
@@ -698,7 +705,6 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None,
698705

699706
@deprecated_renamed_argument(["equinox", "epoch", "cache"],
700707
new_name=[None]*3,
701-
alternative=["astropy.coordinates.SkyCoord"]*3,
702708
since=['0.4.8']*3, relax=True)
703709
def query_region(self, coordinates, radius=2*u.arcmin, *,
704710
criteria=None, get_query_payload=False,
@@ -768,42 +774,54 @@ def query_region(self, coordinates, radius=2*u.arcmin, *,
768774
center = center.transform_to("icrs")
769775

770776
top, columns, joins, instance_criteria = self._get_query_parameters()
777+
if criteria:
778+
instance_criteria.append(f"({criteria})")
771779

772780
if center.isscalar:
773781
radius = coord.Angle(radius)
774782
instance_criteria.append(f"CONTAINS(POINT('ICRS', basic.ra, basic.dec), "
775783
f"CIRCLE('ICRS', {center.ra.deg}, {center.dec.deg}, "
776784
f"{radius.to(u.deg).value})) = 1")
785+
return self._query(top, columns, joins, instance_criteria,
786+
get_query_payload=get_query_payload)
777787

788+
# from uploadLimit in SIMBAD's capabilities
789+
# http://simbad.cds.unistra.fr/simbad/sim-tap/capabilities
790+
if len(center) > self.uploadlimit:
791+
raise ValueError(f"'query_region' can process up to {self.uploadlimit} "
792+
"centers. For larger queries, split your centers list.")
793+
794+
# `radius` as `str` is iterable, but contains only one value.
795+
if isiterable(radius) and not isinstance(radius, str):
796+
if len(radius) != len(center):
797+
raise ValueError(f"Mismatch between radii of length {len(radius)}"
798+
f" and center coordinates of length {len(center)}.")
799+
radius = [coord.Angle(item).to(u.deg).value for item in radius]
778800
else:
779-
if len(center) > 10000:
780-
warnings.warn(
781-
"For very large queries, you may receive a timeout error. SIMBAD suggests"
782-
" splitting queries with >10000 entries into multiple threads",
783-
LargeQueryWarning, stacklevel=2
784-
)
785-
786-
# `radius` as `str` is iterable, but contains only one value.
787-
if isiterable(radius) and not isinstance(radius, str):
788-
if len(radius) != len(center):
789-
raise ValueError(f"Mismatch between radii of length {len(radius)}"
790-
f" and center coordinates of length {len(center)}.")
791-
radius = [coord.Angle(item) for item in radius]
792-
else:
793-
radius = [coord.Angle(radius)] * len(center)
801+
radius = [coord.Angle(radius).to(u.deg).value] * len(center)
794802

803+
# for small number of centers, not using the upload method is faster
804+
if len(center) <= 300:
795805
cone_criteria = [(f"CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE('ICRS', "
796-
f"{center.ra.deg}, {center.dec.deg}, {radius.to(u.deg).value})) = 1 ")
806+
f"{center.ra.deg}, {center.dec.deg}, {radius})) = 1 ")
797807
for center, radius in zip(center, radius)]
798808

799809
cone_criteria = f" ({'OR '.join(cone_criteria)})"
800810
instance_criteria.append(cone_criteria)
801811

802-
if criteria:
803-
instance_criteria.append(f"({criteria})")
812+
return self._query(top, columns, joins, instance_criteria,
813+
get_query_payload=get_query_payload)
814+
815+
# for longer centers list, we use a TAP upload
816+
upload_centers = Table({"ra": center.ra.deg, "dec": center.dec.deg,
817+
"radius": radius})
818+
sub_query = "(SELECT ra, dec, radius FROM TAP_UPLOAD.centers) AS centers"
819+
instance_criteria.append("CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE"
820+
"('ICRS', centers.ra, centers.dec, centers.radius)) = 1 ")
804821

805822
return self._query(top, columns, joins, instance_criteria,
806-
get_query_payload=get_query_payload)
823+
from_table=f"{sub_query}, basic",
824+
get_query_payload=get_query_payload, centers=upload_centers)
807825

808826
@deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None],
809827
since=['0.4.8', '0.4.8'], relax=True)

astroquery/simbad/tests/test_simbad.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from ... import simbad
1515
from .test_simbad_remote import multicoords
16-
from astroquery.exceptions import LargeQueryWarning, NoResultsWarning
16+
from astroquery.exceptions import NoResultsWarning
1717

1818

1919
GALACTIC_COORDS = SkyCoord(l=-67.02084 * u.deg, b=-29.75447 * u.deg, frame="galactic")
@@ -33,6 +33,7 @@ def _mock_simbad_class(monkeypatch):
3333
# >>> options = Simbad.list_votable_fields()
3434
# >>> options.write("simbad_output_options.xml", format="votable")
3535
monkeypatch.setattr(simbad.SimbadClass, "hardlimit", 2000000)
36+
monkeypatch.setattr(simbad.SimbadClass, "uploadlimit", 200000)
3637
monkeypatch.setattr(simbad.SimbadClass, "list_votable_fields", lambda self: table)
3738

3839

@@ -151,6 +152,8 @@ def test_mocked_simbad():
151152
assert len(options) >= 115
152153
# this mocks the hardlimit
153154
assert simbad_instance.hardlimit == 2000000
155+
# and the uploadlimit
156+
assert simbad_instance.uploadlimit == 200000
154157

155158
# ----------------------------
156159
# Test output options settings
@@ -448,11 +451,9 @@ def test_query_region_with_criteria():
448451
adql = simbad.core.Simbad.query_region(ICRS_COORDS, radius="0.1s",
449452
criteria="galdim_majaxis>0.2",
450453
get_query_payload=True)["QUERY"]
451-
assert adql.endswith("AND (galdim_majaxis>0.2)")
454+
assert "(galdim_majaxis>0.2)" in adql
452455

453456

454-
# transform large query warning into error to save execution time
455-
@pytest.mark.filterwarnings("error:For very large queries")
456457
@pytest.mark.usefixtures("_mock_simbad_class")
457458
def test_query_region_errors():
458459
with pytest.raises(u.UnitsError):
@@ -462,9 +463,24 @@ def test_query_region_errors():
462463
with pytest.raises(ValueError, match="Mismatch between radii of length 3 "
463464
"and center coordinates of length 2."):
464465
simbad.SimbadClass().query_region(multicoords, radius=[1, 2, 3] * u.deg)
465-
centers = SkyCoord([0] * 10001, [0] * 10001, unit="deg", frame="icrs")
466-
with pytest.raises(LargeQueryWarning, match="For very large queries, you may receive a timeout error.*"):
467-
simbad.core.Simbad.query_region(centers, radius="2m", get_query_payload=True)["QUERY"]
466+
467+
468+
@pytest.mark.usefixtures("_mock_simbad_class")
469+
def test_query_region_error_on_long_list_of_centers(monkeypatch):
470+
# initiating a SkyCoord longer than 200000 takes a few seconds
471+
monkeypatch.setattr(SkyCoord, "__len__", lambda self: 200001)
472+
centers = SkyCoord([0, 0], [0, 0], unit="deg", frame="icrs")
473+
with pytest.raises(ValueError, match="'query_region' can process up to 200000 centers.*"):
474+
simbad.core.Simbad.query_region(centers, radius="2m")
475+
476+
477+
@pytest.mark.usefixtures("_mock_simbad_class")
478+
def test_query_region_upload():
479+
centers = SkyCoord([0] * 301, [0] * 301, unit="deg", frame="icrs")
480+
adql = simbad.core.Simbad.query_region(centers, radius=["2m"] * 301,
481+
get_query_payload=True)["QUERY"]
482+
assert adql.endswith("WHERE CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE"
483+
"('ICRS', centers.ra, centers.dec, centers.radius)) = 1 ")
468484

469485

470486
@pytest.mark.usefixtures("_mock_simbad_class")

astroquery/simbad/tests/test_simbad_remote.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
import numpy as np
23
import pytest
34

45
from astropy.coordinates import SkyCoord
@@ -79,6 +80,13 @@ def test_query_regions(self):
7980
# filtering on main_id to retrieve the two cone centers
8081
assert {"M 81", "M 10"} == set(result["main_id"].data.data)
8182

83+
def test_query_regions_long_list(self):
84+
self.simbad.ROW_LIMIT = -1
85+
# we create a list of centers longer than 300 to trigger the TAP upload case
86+
centers = SkyCoord(np.arange(0, 360, 1), np.arange(0, 180, 0.5) - 90, unit="deg")
87+
result = self.simbad.query_region(centers, radius="1m")
88+
assert len(result) > 90
89+
8290
def test_query_object_ids(self):
8391
self.simbad.ROW_LIMIT = -1
8492
result = self.simbad.query_objectids("Polaris")

docs/simbad/simbad.rst

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -621,29 +621,29 @@ with:
621621
>>> from astroquery.simbad import Simbad
622622
>>> Simbad.list_votable_fields()[["name", "description"]]
623623
<Table length=...>
624-
name description
625-
object object
626-
----------- -------------------------------------------------------
627-
mesDiameter Collection of stellar diameters.
628-
mesPM Collection of proper motions.
629-
mesISO Infrared Space Observatory (ISO) observing log.
630-
mesSpT Collection of spectral types.
631-
allfluxes all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_
632-
ident Identifiers of an astronomical object
633-
flux Magnitude/Flux information about an astronomical object
634-
mesPLX Collection of trigonometric parallaxes.
635-
otypedef all names and definitions for the object types
636-
... ...
637-
K Magnitude K
638-
u Magnitude SDSS u
639-
g Magnitude SDSS g
640-
r Magnitude SDSS r
641-
i Magnitude SDSS i
642-
z Magnitude SDSS z
643-
G Magnitude Gaia G
644-
F150W JWST NIRCam F150W
645-
F200W JWST NIRCam F200W
646-
F444W JWST NIRCan F444W
624+
name description
625+
object object
626+
----------- ---------------------------------------------------------
627+
mesDiameter Collection of stellar diameters.
628+
mesPM Collection of proper motions.
629+
mesISO Infrared Space Observatory (ISO) observing log.
630+
mesSpT Collection of spectral types.
631+
allfluxes all flux/magnitudes U,B,V,I,J,H,K,u_,g_,r_,i_,z_
632+
ident Identifiers of an astronomical object
633+
flux Magnitude/Flux information about an astronomical object
634+
mesOtype Other object types associated with an object with origins
635+
mesPLX Collection of trigonometric parallaxes.
636+
... ...
637+
K Magnitude K
638+
u Magnitude SDSS u
639+
g Magnitude SDSS g
640+
r Magnitude SDSS r
641+
i Magnitude SDSS i
642+
z Magnitude SDSS z
643+
G Magnitude Gaia G
644+
F150W JWST NIRCam F150W
645+
F200W JWST NIRCam F200W
646+
F444W JWST NIRCan F444W
647647

648648
You can also access a single field description with
649649
`~astroquery.simbad.SimbadClass.get_field_description`

0 commit comments

Comments
 (0)