Skip to content

Commit 8b57bef

Browse files
authored
Merge pull request #3320 from esdc-esac-esa-int/ESA_gaia_GAIASWRQ-25_improve_cross-match
GAIA: new simplified cross match method
2 parents 5b4ffe6 + 2676412 commit 8b57bef

File tree

8 files changed

+476
-63
lines changed

8 files changed

+476
-63
lines changed

CHANGES.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ alma
2323

2424
- Bug fix in ``footprint_to_reg`` that did not allow regions to be plotted. [#3285]
2525

26+
27+
gaia
28+
^^^^
29+
30+
- New method cross_match_basic that simplifies the positional x-match method [#3320]
31+
2632
linelists.cdms
2733
^^^^^^^^^^^^^^
2834

astroquery/esa/euclid/tests/test_euclidtap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def test_load_table():
266266
responseLaunchJob = DummyResponse(200)
267267
responseLaunchJob.set_data(method='GET', context=None, body=TABLE_DATA, headers=None)
268268

269-
table = 'my_table'
269+
table = 'schema.my_table'
270270
conn_handler.set_response(f"tables?tables={table}", responseLaunchJob)
271271
tap = EuclidClass(tap_plus_conn_handler=conn_handler, datalink_handler=tap_plus, show_server_messages=False)
272272

astroquery/gaia/core.py

Lines changed: 177 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,9 @@
44
Gaia TAP plus
55
=============
66
7-
@author: Juan Carlos Segovia
8-
9-
107
European Space Astronomy Centre (ESAC)
118
European Space Agency (ESA)
129
13-
Created on 30 jun. 2016
14-
Modified on 18 Ene. 2022 by mhsarmiento
1510
"""
1611
import datetime
1712
import json
@@ -795,7 +790,7 @@ def __getQuantityInput(self, value, msg):
795790
if value is None:
796791
raise ValueError(f"Missing required argument: {msg}")
797792
if not (isinstance(value, str) or isinstance(value, units.Quantity)):
798-
raise ValueError(f"{msg} must be either a string or astropy.coordinates")
793+
raise ValueError(f"{msg} must be either a string or astropy.coordinates: {type(value)}")
799794

800795
if isinstance(value, str):
801796
return Quantity(value)
@@ -853,15 +848,170 @@ def load_user(self, user_id, *, verbose=False):
853848

854849
return self.is_valid_user(user_id=user_id, verbose=verbose)
855850

851+
def cross_match_basic(self, *, table_a_full_qualified_name, table_a_column_ra, table_a_column_dec,
852+
table_b_full_qualified_name=MAIN_GAIA_TABLE, table_b_column_ra=MAIN_GAIA_TABLE_RA,
853+
table_b_column_dec=MAIN_GAIA_TABLE_DEC, results_name=None,
854+
radius=1.0, background=False, verbose=False):
855+
"""Performs a positional cross-match between the specified tables.
856+
857+
This method simples the execution of the method `cross_match` since it carries out the following steps in one
858+
step:
859+
860+
1. updates the user table metadata to flag the positional RA/Dec columns;
861+
2. launches a positional cross-match as an asynchronous query;
862+
3. returns all the columns from both tables plus the angular distance (deg) for the cross-matched sources.
863+
864+
The result is a join table with the identifies of both tables and the distance (degrees), that is returned
865+
without metadata units. If desired, units can be added using the Units package of Astropy as follows:
866+
results[‘separation’].unit = u.degree. To speed up the cross-match, pass the biggest table to the
867+
``table_b_full_qualified_name`` parameter.
868+
TAP+ only
869+
870+
Parameters
871+
----------
872+
table_a_full_qualified_name : str, mandatory
873+
a full qualified table name (i.e. schema name and table name)
874+
table_a_column_ra : str, mandatory
875+
the ‘ra’ column in the table table_a_full_qualified_name
876+
table_a_column_dec : str, mandatory
877+
the ‘dec’ column in the table table_a_full_qualified_name
878+
table_b_full_qualified_name : str, optional, default MAIN_GAIA_TABLE
879+
a full qualified table name (i.e. schema name and table name)
880+
table_b_column_ra : str, optional, default MAIN_GAIA_TABLE_RA
881+
the ‘ra’ column in the table table_b_full_qualified_name
882+
table_b_column_dec : str, default MAIN_GAIA_TABLE_DEC
883+
the ‘dec’ column in the table table_b_full_qualified_name
884+
results_name : str, optional, default None
885+
custom name defined by the user for the job that is going to be created
886+
radius : float (arc. seconds), str or astropy.coordinate, optional, default 1.0
887+
radius (valid range: 0.1-10.0). For an astropy.coordinate any angular unit is valid, but its value in arc
888+
sec must be contained within the valid range.
889+
background : bool, optional, default 'False'
890+
when the job is executed in asynchronous mode, this flag specifies
891+
whether the execution will wait until results are available
892+
verbose : bool, optional, default 'False'
893+
flag to display information about the process
894+
895+
Returns
896+
-------
897+
A Job object
898+
"""
899+
900+
radius_quantity = self.__get_radius_as_quantity_arcsec(radius)
901+
902+
radius_arc_sec = radius_quantity.value
903+
904+
if radius_arc_sec < 0.1 or radius_arc_sec > 10.0:
905+
raise ValueError(f"Invalid radius value. Found {radius_quantity}, valid range is: 0.1 to 10.0")
906+
907+
schema_a = self.__get_schema_name(table_a_full_qualified_name)
908+
if not schema_a:
909+
raise ValueError(f"Schema name is empty in full qualified table: '{table_a_full_qualified_name}'")
910+
911+
table_b_full_qualified_name = table_b_full_qualified_name or self.MAIN_GAIA_TABLE or conf.MAIN_GAIA_TABLE
912+
913+
schema_b = self.__get_schema_name(table_b_full_qualified_name)
914+
if not schema_b:
915+
raise ValueError(f"Schema name is empty in full qualified table: '{table_b_full_qualified_name}'")
916+
917+
table_metadata_a = self.__get_table_metadata(table_a_full_qualified_name, verbose)
918+
919+
table_metadata_b = self.__get_table_metadata(table_b_full_qualified_name, verbose)
920+
921+
self.__check_columns_exist(table_metadata_a, table_a_full_qualified_name, table_a_column_ra, table_a_column_dec)
922+
923+
self.__update_ra_dec_columns(table_a_full_qualified_name, table_a_column_ra, table_a_column_dec,
924+
table_metadata_a, verbose)
925+
926+
self.__check_columns_exist(table_metadata_b, table_b_full_qualified_name, table_b_column_ra, table_b_column_dec)
927+
928+
self.__update_ra_dec_columns(table_b_full_qualified_name, table_b_column_ra, table_b_column_dec,
929+
table_metadata_b, verbose)
930+
931+
query = (
932+
f"SELECT a.*, DISTANCE(a.{table_a_column_ra}, a.{table_a_column_dec}, b.{table_b_column_ra}, "
933+
f"b.{table_b_column_dec}) AS separation, b.* "
934+
f"FROM {table_a_full_qualified_name} AS a JOIN {table_b_full_qualified_name} AS b "
935+
f"ON DISTANCE(a.{table_a_column_ra}, a.{table_a_column_dec}, b.{table_b_column_ra}, b.{table_b_column_dec})"
936+
f" < {radius_quantity.to(u.deg).value}")
937+
938+
return self.launch_job_async(query=query,
939+
name=results_name,
940+
output_file=None,
941+
output_format="votable_gzip",
942+
verbose=verbose,
943+
dump_to_file=False,
944+
background=background,
945+
upload_resource=None,
946+
upload_table_name=None)
947+
948+
def __get_radius_as_quantity_arcsec(self, radius):
949+
"""
950+
transform the input radius into an astropy.Quantity in arc seconds
951+
"""
952+
if not isinstance(radius, units.Quantity):
953+
radius_quantity = Quantity(value=radius, unit=u.arcsec)
954+
else:
955+
radius_quantity = radius.to(u.arcsec)
956+
return radius_quantity
957+
958+
def __update_ra_dec_columns(self, full_qualified_table_name, column_ra, column_dec, table_metadata, verbose):
959+
"""
960+
Update table metadata for the ‘ra’ and the ‘dec’ columns in the input table
961+
"""
962+
if full_qualified_table_name.startswith("user_"):
963+
list_of_changes = list()
964+
for column in table_metadata.columns:
965+
if column.name == column_ra and column.flags != '1':
966+
list_of_changes.append([column_ra, "flags", "Ra"])
967+
list_of_changes.append([column_ra, "indexed", True])
968+
if column.name == column_dec and column.flags != '2':
969+
list_of_changes.append([column_dec, "flags", "Dec"])
970+
list_of_changes.append([column_dec, "indexed", True])
971+
972+
if list_of_changes:
973+
TapPlus.update_user_table(self, table_name=full_qualified_table_name, list_of_changes=list_of_changes,
974+
verbose=verbose)
975+
976+
def __check_columns_exist(self, table_metadata_a, full_qualified_table_name, column_ra, column_dec):
977+
"""
978+
Check whether the ‘ra’ and the ‘dec’ columns exists the input table
979+
"""
980+
column_names = [column.name for column in table_metadata_a.columns]
981+
if column_ra not in column_names or column_dec not in column_names:
982+
raise ValueError(
983+
f"Please check: columns {column_ra} or {column_dec} not available in the table '"
984+
f"{full_qualified_table_name}'")
985+
986+
def __get_table_metadata(self, full_qualified_table_name, verbose):
987+
"""
988+
Get the table metadata for the input table
989+
"""
990+
try:
991+
table_metadata = self.load_table(table=full_qualified_table_name, verbose=verbose)
992+
except Exception:
993+
raise ValueError(f"Not found table '{full_qualified_table_name}' in the archive")
994+
return table_metadata
995+
996+
def __get_schema_name(self, full_qualified_table_name):
997+
"""
998+
Get the schema name from the full qualified table
999+
"""
1000+
schema = taputils.get_schema_name(full_qualified_table_name)
1001+
if schema is None:
1002+
raise ValueError(f"Not found schema name in full qualified table: '{full_qualified_table_name}'")
1003+
return schema
1004+
8561005
def cross_match(self, *, full_qualified_table_name_a,
8571006
full_qualified_table_name_b,
8581007
results_table_name,
8591008
radius=1.0,
8601009
background=False,
8611010
verbose=False):
862-
"""Performs a cross-match between the specified tables
863-
The result is a join table (stored in the user storage area)
864-
with the identifies of both tables and the distance.
1011+
"""Performs a positional cross-match between the specified tables.
1012+
1013+
The result is a join table (stored in the user storage area) with the identifies of both tables and the
1014+
distance.
8651015
TAP+ only
8661016
8671017
Parameters
@@ -872,8 +1022,9 @@ def cross_match(self, *, full_qualified_table_name_a,
8721022
a full qualified table name (i.e. schema name and table name)
8731023
results_table_name : str, mandatory
8741024
a table name without schema. The schema is set to the user one
875-
radius : float (arc. seconds), optional, default 1.0
876-
radius (valid range: 0.1-10.0)
1025+
radius : float (arc. seconds), str or astropy.coordinate, optional, default 1.0
1026+
radius (valid range: 0.1-10.0). For an astropy.coordinate any angular unit is valid, but its value in arc
1027+
sec must be contained within the valid range.
8771028
background : bool, optional, default 'False'
8781029
when the job is executed in asynchronous mode, this flag specifies
8791030
whether the execution will wait until results are available
@@ -884,24 +1035,26 @@ def cross_match(self, *, full_qualified_table_name_a,
8841035
-------
8851036
A Job object
8861037
"""
887-
if radius < 0.1 or radius > 10.0:
888-
raise ValueError(f"Invalid radius value. Found {radius}, valid range is: 0.1 to 10.0")
8891038

890-
schemaA = taputils.get_schema_name(full_qualified_table_name_a)
891-
if schemaA is None:
892-
raise ValueError(f"Not found schema name in full qualified table A: '{full_qualified_table_name_a}'")
893-
tableA = taputils.get_table_name(full_qualified_table_name_a)
894-
schemaB = taputils.get_schema_name(full_qualified_table_name_b)
1039+
radius_quantity = self.__get_radius_as_quantity_arcsec(radius)
1040+
1041+
radius_arc_sec = radius_quantity.value
1042+
1043+
if radius_arc_sec < 0.1 or radius_arc_sec > 10.0:
1044+
raise ValueError(f"Invalid radius value. Found {radius_quantity}, valid range is: 0.1 to 10.0")
1045+
1046+
schema_a = self.__get_schema_name(full_qualified_table_name_a)
1047+
1048+
table_a = taputils.get_table_name(full_qualified_table_name_a)
8951049

896-
if schemaB is None:
897-
raise ValueError(f"Not found schema name in full qualified table B: '{full_qualified_table_name_b}'")
1050+
schema_b = self.__get_schema_name(full_qualified_table_name_b)
8981051

899-
tableB = taputils.get_table_name(full_qualified_table_name_b)
1052+
table_b = taputils.get_table_name(full_qualified_table_name_b)
9001053

9011054
if taputils.get_schema_name(results_table_name) is not None:
9021055
raise ValueError("Please, do not specify schema for 'results_table_name'")
9031056

904-
query = f"SELECT crossmatch_positional('{schemaA}','{tableA}','{schemaB}','{tableB}',{radius}, " \
1057+
query = f"SELECT crossmatch_positional('{schema_a}','{table_a}','{schema_b}','{table_b}',{radius_arc_sec}, " \
9051058
f"'{results_table_name}') FROM dual;"
9061059

9071060
name = str(results_table_name)
@@ -916,10 +1069,8 @@ def cross_match(self, *, full_qualified_table_name_a,
9161069
upload_resource=None,
9171070
upload_table_name=None)
9181071

919-
def launch_job(self, query, *, name=None, output_file=None,
920-
output_format="votable_gzip", verbose=False,
921-
dump_to_file=False, upload_resource=None,
922-
upload_table_name=None):
1072+
def launch_job(self, query, *, name=None, output_file=None, output_format="votable_gzip", verbose=False,
1073+
dump_to_file=False, upload_resource=None, upload_table_name=None):
9231074
"""Launches a synchronous job
9241075
9251076
Parameters

0 commit comments

Comments
 (0)