Skip to content

Commit a021803

Browse files
committed
perf: improve query_region for large number of centers
1 parent b6c9612 commit a021803

File tree

4 files changed

+64
-28
lines changed

4 files changed

+64
-28
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ simbad
5656
empty for an object, there will now be a line with masked values instead of no line in
5757
the result [#3199]
5858

59+
- perf: use a TAP upload in ``query_region`` when there are more than 300 coordinates.
60+
This prevents timeouts. [#3235]
61+
5962
xmatch
6063
^^^^^^
6164

astroquery/simbad/core.py

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from astroquery.query import BaseVOQuery
2020
from astroquery.utils import commons
21-
from astroquery.exceptions import LargeQueryWarning, NoResultsWarning
21+
from astroquery.exceptions import NoResultsWarning
2222
from astroquery.simbad.utils import (_catch_deprecated_fields_with_arguments,
2323
_wildcard_to_regexp, CriteriaTranslator,
2424
query_criteria_fields)
@@ -698,7 +698,7 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None,
698698

699699
@deprecated_renamed_argument(["equinox", "epoch", "cache"],
700700
new_name=[None]*3,
701-
alternative=["astropy.coordinates.SkyCoord"]*3,
701+
alternative=["astropy.coordinates.SkyCoord"]*2 + [None],
702702
since=['0.4.8']*3, relax=True)
703703
def query_region(self, coordinates, radius=2*u.arcmin, *,
704704
criteria=None, get_query_payload=False,
@@ -768,42 +768,54 @@ def query_region(self, coordinates, radius=2*u.arcmin, *,
768768
center = center.transform_to("icrs")
769769

770770
top, columns, joins, instance_criteria = self._get_query_parameters()
771+
if criteria:
772+
instance_criteria.append(f"({criteria})")
771773

772774
if center.isscalar:
773775
radius = coord.Angle(radius)
774776
instance_criteria.append(f"CONTAINS(POINT('ICRS', basic.ra, basic.dec), "
775777
f"CIRCLE('ICRS', {center.ra.deg}, {center.dec.deg}, "
776778
f"{radius.to(u.deg).value})) = 1")
779+
return self._query(top, columns, joins, instance_criteria,
780+
get_query_payload=get_query_payload)
777781

782+
# from uploadLimit in SIMBAD's capabilities
783+
# http://simbad.cds.unistra.fr/simbad/sim-tap/capabilities
784+
if len(center) > 200000:
785+
raise ValueError("'query_region' can process up to 200000 centers. For "
786+
"larger queries, split your centers list.")
787+
788+
# `radius` as `str` is iterable, but contains only one value.
789+
if isiterable(radius) and not isinstance(radius, str):
790+
if len(radius) != len(center):
791+
raise ValueError(f"Mismatch between radii of length {len(radius)}"
792+
f" and center coordinates of length {len(center)}.")
793+
radius = [coord.Angle(item).to(u.deg).value for item in radius]
778794
else:
779-
if len(center) > 10000:
780-
warnings.warn(
781-
"For very large queries, you may receive a timeout error. SIMBAD suggests"
782-
" splitting queries with >10000 entries into multiple threads",
783-
LargeQueryWarning, stacklevel=2
784-
)
785-
786-
# `radius` as `str` is iterable, but contains only one value.
787-
if isiterable(radius) and not isinstance(radius, str):
788-
if len(radius) != len(center):
789-
raise ValueError(f"Mismatch between radii of length {len(radius)}"
790-
f" and center coordinates of length {len(center)}.")
791-
radius = [coord.Angle(item) for item in radius]
792-
else:
793-
radius = [coord.Angle(radius)] * len(center)
795+
radius = [coord.Angle(radius).to(u.deg).value] * len(center)
794796

797+
# for small number of centers, not using the upload method is faster
798+
if len(center) <= 300:
795799
cone_criteria = [(f"CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE('ICRS', "
796-
f"{center.ra.deg}, {center.dec.deg}, {radius.to(u.deg).value})) = 1 ")
800+
f"{center.ra.deg}, {center.dec.deg}, {radius})) = 1 ")
797801
for center, radius in zip(center, radius)]
798802

799803
cone_criteria = f" ({'OR '.join(cone_criteria)})"
800804
instance_criteria.append(cone_criteria)
801805

802-
if criteria:
803-
instance_criteria.append(f"({criteria})")
806+
return self._query(top, columns, joins, instance_criteria,
807+
get_query_payload=get_query_payload)
808+
809+
# for longer centers list, we use a TAP upload
810+
upload_centers = Table({"ra": center.ra.deg, "dec": center.dec.deg,
811+
"radius": radius})
812+
sub_query = "(SELECT ra, dec, radius FROM TAP_UPLOAD.centers) AS centers"
813+
instance_criteria.append("CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE"
814+
"('ICRS', centers.ra, centers.dec, centers.radius)) = 1 ")
804815

805816
return self._query(top, columns, joins, instance_criteria,
806-
get_query_payload=get_query_payload)
817+
from_table=f"{sub_query}, basic",
818+
get_query_payload=get_query_payload, centers=upload_centers)
807819

808820
@deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None],
809821
since=['0.4.8', '0.4.8'], relax=True)

astroquery/simbad/tests/test_simbad.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from ... import simbad
1515
from .test_simbad_remote import multicoords
16-
from astroquery.exceptions import LargeQueryWarning, NoResultsWarning
16+
from astroquery.exceptions import NoResultsWarning
1717

1818

1919
GALACTIC_COORDS = SkyCoord(l=-67.02084 * u.deg, b=-29.75447 * u.deg, frame="galactic")
@@ -448,11 +448,9 @@ def test_query_region_with_criteria():
448448
adql = simbad.core.Simbad.query_region(ICRS_COORDS, radius="0.1s",
449449
criteria="galdim_majaxis>0.2",
450450
get_query_payload=True)["QUERY"]
451-
assert adql.endswith("AND (galdim_majaxis>0.2)")
451+
assert "(galdim_majaxis>0.2)" in adql
452452

453453

454-
# transform large query warning into error to save execution time
455-
@pytest.mark.filterwarnings("error:For very large queries")
456454
@pytest.mark.usefixtures("_mock_simbad_class")
457455
def test_query_region_errors():
458456
with pytest.raises(u.UnitsError):
@@ -462,9 +460,24 @@ def test_query_region_errors():
462460
with pytest.raises(ValueError, match="Mismatch between radii of length 3 "
463461
"and center coordinates of length 2."):
464462
simbad.SimbadClass().query_region(multicoords, radius=[1, 2, 3] * u.deg)
465-
centers = SkyCoord([0] * 10001, [0] * 10001, unit="deg", frame="icrs")
466-
with pytest.raises(LargeQueryWarning, match="For very large queries, you may receive a timeout error.*"):
467-
simbad.core.Simbad.query_region(centers, radius="2m", get_query_payload=True)["QUERY"]
463+
464+
465+
@pytest.mark.usefixtures("_mock_simbad_class")
466+
def test_query_region_error_on_long_list_of_centers(monkeypatch):
467+
# initiating a SkyCoord longer than 200000 takes a few seconds
468+
monkeypatch.setattr(SkyCoord, "__len__", lambda self: 200001)
469+
centers = SkyCoord([0, 0], [0, 0], unit="deg", frame="icrs")
470+
with pytest.raises(ValueError, match="'query_region' can process up to 200000 centers.*"):
471+
simbad.core.Simbad.query_region(centers, radius="2m")
472+
473+
474+
@pytest.mark.usefixtures("_mock_simbad_class")
475+
def test_query_region_upload():
476+
centers = SkyCoord([0] * 301, [0] * 301, unit="deg", frame="icrs")
477+
adql = simbad.core.Simbad.query_region(centers, radius=["2m"] * 301,
478+
get_query_payload=True)["QUERY"]
479+
assert adql.endswith("WHERE CONTAINS(POINT('ICRS', basic.ra, basic.dec), CIRCLE"
480+
"('ICRS', centers.ra, centers.dec, centers.radius)) = 1 ")
468481

469482

470483
@pytest.mark.usefixtures("_mock_simbad_class")

astroquery/simbad/tests/test_simbad_remote.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
import numpy as np
23
import pytest
34

45
from astropy.coordinates import SkyCoord
@@ -79,6 +80,13 @@ def test_query_regions(self):
7980
# filtering on main_id to retrieve the two cone centers
8081
assert {"M 81", "M 10"} == set(result["main_id"].data.data)
8182

83+
def test_query_regions_long_list(self):
84+
self.simbad.ROW_LIMIT = -1
85+
# we create a list of centers longer than 300 to trigger the TAP upload case
86+
centers = SkyCoord(np.arange(0, 360, 1), np.arange(0, 180, 0.5) - 90, unit="deg")
87+
result = self.simbad.query_region(centers, radius="1m")
88+
assert len(result) > 90
89+
8290
def test_query_object_ids(self):
8391
self.simbad.ROW_LIMIT = -1
8492
result = self.simbad.query_objectids("Polaris")

0 commit comments

Comments
 (0)