Skip to content

Commit cc6c7f5

Browse files
committed
magnitude difference crossmatch algorithm and tests
1 parent 5b62162 commit cc6c7f5

File tree

2 files changed

+222
-0
lines changed

2 files changed

+222
-0
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
import numpy as np
6+
import pandas as pd
7+
import pyarrow as pa
8+
9+
from lsdb.core.crossmatch.kdtree_match import KdTreeCrossmatch
10+
from lsdb.core.crossmatch.kdtree_utils import _find_crossmatch_indices, _get_chord_distance
11+
12+
if TYPE_CHECKING:
13+
from lsdb.catalog import Catalog
14+
15+
16+
class MyCrossmatchAlgorithm(KdTreeCrossmatch):
17+
18+
extra_columns = pd.DataFrame({
19+
"_dist_arcsec": pd.Series(dtype=pd.ArrowDtype(pa.float64())),
20+
"magnitude_difference": pd.Series(dtype=pd.ArrowDtype(pa.float64())),
21+
})
22+
23+
@classmethod
24+
def validate(
25+
cls,
26+
left: Catalog,
27+
right: Catalog,
28+
left_mag_col: str,
29+
right_mag_col: str,
30+
radius_arcsec: float = 1,
31+
n_neighbors: int = 1,
32+
**kwargs
33+
):
34+
super().validate(left, right, n_neighbors=n_neighbors, radius_arcsec=radius_arcsec)
35+
36+
if left_mag_col not in left._ddf.columns:
37+
raise ValueError(f"Left catalog must have column '{left_mag_col}'")
38+
if right_mag_col not in right._ddf.columns:
39+
raise ValueError(f"Right catalog must have column '{right_mag_col}'")
40+
41+
def perform_crossmatch(
42+
self,
43+
left_mag_col: str,
44+
right_mag_col: str,
45+
radius_arcsec: float,
46+
n_neighbors: int = 1,
47+
**kwargs
48+
) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]:
49+
50+
max_d_chord = _get_chord_distance(radius_arcsec)
51+
52+
left_xyz, right_xyz = self._get_point_coordinates()
53+
54+
chord_distances_all, left_idx_all, right_idx_all = _find_crossmatch_indices(
55+
left_xyz=left_xyz,
56+
right_xyz=right_xyz,
57+
n_neighbors=n_neighbors,
58+
max_distance=max_d_chord,
59+
)
60+
61+
if len(left_idx_all) == 0:
62+
return np.array([], dtype=np.int64), np.array([], dtype=np.int64), pd.DataFrame({
63+
"_dist_arcsec": pd.Series(dtype=pd.ArrowDtype(pa.float64())),
64+
"magnitude_difference": pd.Series(dtype=pd.ArrowDtype(pa.float64())),
65+
})
66+
67+
arc_distances_all = np.degrees(2.0 * np.arcsin(0.5 * chord_distances_all)) * 3600
68+
69+
all_matches_df = pd.DataFrame({
70+
"left_idx": left_idx_all,
71+
"right_idx": right_idx_all,
72+
"arc_dist_arcsec": arc_distances_all,
73+
})
74+
75+
all_matches_df["left_mag"] = self.left.iloc[all_matches_df["left_idx"]][left_mag_col].to_numpy()
76+
all_matches_df["right_mag"] = self.right.iloc[all_matches_df["right_idx"]][right_mag_col].to_numpy()
77+
78+
all_matches_df["magnitude_difference"] = np.abs(all_matches_df["right_mag"] - all_matches_df["left_mag"])
79+
80+
best_match_indices_in_all_matches_df = all_matches_df.groupby("left_idx")["magnitude_difference"].idxmin()
81+
82+
final_matches_df = all_matches_df.loc[best_match_indices_in_all_matches_df].reset_index(drop=True)
83+
84+
final_left_indices = final_matches_df["left_idx"].to_numpy()
85+
final_right_indices = final_matches_df["right_idx"].to_numpy()
86+
final_distances = final_matches_df["arc_dist_arcsec"].to_numpy()
87+
final_magnitude_differences = final_matches_df["magnitude_difference"].to_numpy()
88+
89+
extra_columns = pd.DataFrame({
90+
"_dist_arcsec": pd.Series(final_distances, dtype=pd.ArrowDtype(pa.float64())),
91+
"magnitude_difference": pd.Series(final_magnitude_differences, dtype=pd.ArrowDtype(pa.float64())),
92+
})
93+
94+
return final_left_indices, final_right_indices, extra_columns
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import lsdb
2+
import nested_pandas as npd
3+
import numpy as np
4+
from lsdb.core.crossmatch.kdtree_match import KdTreeCrossmatch
5+
from lsdb_crossmatch.mag_difference_crossmatch import MyCrossmatchAlgorithm
6+
7+
def test_mag_difference_crossmatch(m67_delve_dir, m67_ps1_dir):
8+
left_data = lsdb.open_catalog(m67_ps1_dir)
9+
right_data = lsdb.open_catalog(m67_delve_dir)
10+
11+
id_col_left = "objID_left"
12+
id_col_right = "QUICK_OBJECT_ID_right"
13+
14+
left_mag_col = "rMeanPSFMag"
15+
right_mag_col = "MAG_PSF_R"
16+
17+
kdtree_result = lsdb.crossmatch(
18+
left_data,
19+
right_data,
20+
suffixes=["_left", "_right"],
21+
algorithm=KdTreeCrossmatch,
22+
radius_arcsec=0.01 * 3600,
23+
n_neighbors=5,
24+
).compute()
25+
"""
26+
print("\nKDTREE RESULTS")
27+
print("\nid col left")
28+
print(kdtree_result[id_col_left])
29+
print("\nid col right")
30+
print(kdtree_result[id_col_right])
31+
print("------------")
32+
"""
33+
"""
34+
target_left_id = 10493100048242
35+
count_in_left_catalog = left_data["objID"].value_counts().compute().get(target_left_id, 0)
36+
print(f"\nNumber of times {target_left_id} appears in the original left catalog: {count_in_left_catalog}")
37+
38+
target_right_id = 10493100048242
39+
count_in_right_catalog = right_data["QUICK_OBJECT_ID"].value_counts().compute().get(target_right_id, 0)
40+
41+
print(f"\nNumber of times {target_right_id} appears in the original right catalog: {count_in_right_catalog}")
42+
"""
43+
result = lsdb.crossmatch(
44+
left_data,
45+
right_data,
46+
suffixes=["_left", "_right"],
47+
algorithm=MyCrossmatchAlgorithm,
48+
radius_arcsec=0.01 * 3600,
49+
left_mag_col = left_mag_col,
50+
right_mag_col = right_mag_col,
51+
n_neighbors = 5
52+
).compute()
53+
"""
54+
print("\nMAG CROSSMATCH RESULTS")
55+
print("\nid col left")
56+
print(result[id_col_left])
57+
print("\nid col right")
58+
print(result[id_col_right])
59+
print("------------")
60+
"""
61+
62+
assert isinstance(result, npd.NestedFrame)
63+
64+
if not result.empty:
65+
assert len(result) == len(result[id_col_left].unique())
66+
assert result[id_col_left].value_counts().max() == 1
67+
68+
69+
if not kdtree_result.empty:
70+
kdtree_result["magnitude_difference"] = np.abs(
71+
kdtree_result[right_mag_col + "_right"] - kdtree_result[left_mag_col + "_left"]
72+
)
73+
74+
target_left_id =122650089529714672
75+
76+
if not kdtree_result.empty and target_left_id in kdtree_result[id_col_left].values:
77+
potential_matches_for_target = kdtree_result[kdtree_result[id_col_left] == target_left_id]
78+
79+
if not potential_matches_for_target.empty:
80+
potential_matches_for_target = potential_matches_for_target.sort_values(
81+
by="magnitude_difference"
82+
).reset_index(drop=True)
83+
84+
expected_best_right_id = potential_matches_for_target.iloc[0][id_col_right]
85+
86+
actual_match_from_my_algo = result[result[id_col_left] == target_left_id]
87+
88+
if not actual_match_from_my_algo.empty:
89+
actual_matched_right_id = actual_match_from_my_algo.iloc[0][id_col_right]
90+
91+
assert actual_matched_right_id == expected_best_right_id
92+
else:
93+
print(f"No potential matches found in kdtree_result for {target_left_id}.")
94+
else:
95+
print(f"Target left ID {target_left_id} not found in kdtree_result.")
96+
97+
"""
98+
for target_left_id in result[id_col_left].values:
99+
if not kdtree_result.empty and target_left_id in kdtree_result[id_col_left].values:
100+
potential_matches_for_target = kdtree_result[kdtree_result[id_col_left] == target_left_id].copy()
101+
102+
if not potential_matches_for_target.empty:
103+
potential_matches_for_target = potential_matches_for_target.sort_values(
104+
by="magnitude_difference"
105+
).reset_index(drop=True)
106+
107+
#print(f"\nAll potential matches for {target_left_id}:")
108+
#print(potential_matches_for_target[[id_col_left, id_col_right, "magnitude_difference"]])
109+
110+
expected_best_right_id = potential_matches_for_target.iloc[0][id_col_right]
111+
112+
#print(f"\nExpected best match for {target_left_id}:")
113+
#print(f" Right ID: {expected_best_right_id}")
114+
115+
actual_match_from_my_algo = result[result[id_col_left] == target_left_id]
116+
117+
if not actual_match_from_my_algo.empty:
118+
actual_matched_right_id = actual_match_from_my_algo.iloc[0][id_col_right]
119+
120+
#print(f"\nActual match from MyCrossmatchAlgorithm for {target_left_id}:")
121+
#print(f" Right ID: {actual_matched_right_id}")
122+
123+
assert actual_matched_right_id == expected_best_right_id
124+
else:
125+
print(f"No potential matches found in kdtree_result for {target_left_id}.")
126+
else:
127+
print(f"Target left ID {target_left_id} not found in kdtree_result.")
128+
"""

0 commit comments

Comments
 (0)