Skip to content

Commit 176e954

Browse files
committed
Update z-order query tests
1 parent 33c3bb3 commit 176e954

File tree

1 file changed

+65
-27
lines changed

1 file changed

+65
-27
lines changed

tests/test_sdata_points_zorder.py

Lines changed: 65 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22
from os.path import join
3+
import numpy as np
34

45
from spatialdata import read_zarr
56

@@ -36,6 +37,7 @@ def test_zorder_sorting():
3637
assert is_sorted(morton_sorted)
3738

3839

40+
3941
def test_zorder_query():
4042
sdata = get_sdata()
4143

@@ -55,11 +57,18 @@ def test_zorder_query():
5557
assert df.shape[0] == 42638083
5658

5759
# Do the same query the "dumb" way, by checking all points
60+
61+
# We need an epsilon for the "dumb" query since the normalization
62+
# introduces rounding issues. We can instead verify that a slightly
63+
# smaller rectangle is fully contained in the morton code query
64+
# estimated results.
65+
EXACT_BOUNDARY_EPSILON = 1
66+
5867
in_rect = (
59-
(df["x"] >= orig_rect[0][0])
60-
& (df["x"] <= orig_rect[1][0])
61-
& (df["y"] >= orig_rect[0][1])
62-
& (df["y"] <= orig_rect[1][1])
68+
(df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
69+
& (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
70+
& (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
71+
& (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
6372
)
6473
dumb_df_subset = df.loc[in_rect]
6574
# Get the row indices of the points in the rectangle
@@ -69,7 +78,7 @@ def test_zorder_query():
6978
# Check that the estimated rows 100% contain the exact rows.
7079
# A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
7180
assert set(exact_row_indices).issubset(set(estimated_row_indices))
72-
assert len(exact_row_indices) == 614
81+
assert len(exact_row_indices) == 552
7382
assert len(estimated_row_indices) <= 631
7483

7584
# Check that the number of rows checked is less than the total number of points
@@ -89,52 +98,81 @@ def test_zorder_query():
8998
orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
9099
]
91100

101+
norm_rect_rounded = [
102+
# TODO: should we use floor/ceil instead of round?
103+
[np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
104+
[np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
105+
]
106+
92107
in_rect_norm = (
93-
(df["x_uint"] >= norm_rect[0][0])
94-
& (df["x_uint"] <= norm_rect[1][0])
95-
& (df["y_uint"] >= norm_rect[0][1])
96-
& (df["y_uint"] <= norm_rect[1][1])
108+
(df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
109+
& (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
110+
& (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
111+
& (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
97112
)
98113
dumb_df_subset_norm = df.loc[in_rect_norm]
99114
# Get the row indices of the points in the rectangle
100115
# (these are the indices in the original dataframe)
101116
exact_row_indices_norm = dumb_df_subset_norm.index.tolist()
117+
118+
# A.issubset(B)
119+
# True if A is a subset of B and False otherwise.
102120
assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
103-
assert len(exact_row_indices_norm) == 617
104-
assert len(estimated_row_indices) <= 631
105-
106121

122+
assert len(exact_row_indices_norm) == 609
123+
assert len(estimated_row_indices) <= 631
107124

108-
"""
109125
# ========= Another query ==========
110-
orig_rect = [[500, 500], [600, 600]] # x0, y0, x1, y1
126+
orig_rect = [[500.0, 500.0], [600.0, 600.0]] # x0, y0, x1, y1
111127

112128
# Query using z-order
113129
matching_row_ranges, rows_checked = sdata_morton_query_rect_debug(sdata, "transcripts", orig_rect)
114130
rect_row_indices = row_ranges_to_row_indices(matching_row_ranges)
115131
estimated_row_indices = df.iloc[rect_row_indices].index.tolist()
116132

117-
# Query the "dumb" way
133+
# Do the same query the "dumb" way, by checking all points
118134
in_rect = (
119-
(df["x"] >= orig_rect[0][0])
120-
& (df["x"] <= orig_rect[1][0])
121-
& (df["y"] >= orig_rect[0][1])
122-
& (df["y"] <= orig_rect[1][1])
135+
(df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
136+
& (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
137+
& (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
138+
& (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
123139
)
124140
dumb_df_subset = df.loc[in_rect]
141+
# Get the row indices of the points in the rectangle
142+
# (these are the indices in the original dataframe)
125143
exact_row_indices = dumb_df_subset.index.tolist()
126144

127-
diff_rows = set(estimated_row_indices) - set(exact_row_indices)
128-
# print("Rows in estimated but not exact:", diff_rows)
129-
print(df.iloc[list(diff_rows)])
130-
raise NotImplementedError("Debugging")
145+
# Query 2: Do a second check, this time against x_uint/y_uint (the normalized coordinates)
146+
norm_rect = [
147+
orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
148+
orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
149+
]
150+
151+
norm_rect_rounded = [
152+
# TODO: should we use floor/ceil instead of round?
153+
[np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
154+
[np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
155+
]
156+
157+
in_rect_norm = (
158+
(df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
159+
& (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
160+
& (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
161+
& (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
162+
)
163+
dumb_df_subset_norm = df.loc[in_rect_norm]
164+
# Get the row indices of the points in the rectangle
165+
# (these are the indices in the original dataframe)
166+
exact_row_indices_norm = dumb_df_subset_norm.index.tolist()
167+
168+
# A.issubset(B)
169+
# True if A is a subset of B and False otherwise.
170+
assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
131171

132172
# Check that the estimated rows contain all of the exact rows.
133-
assert len(set(exact_row_indices).intersection(set(estimated_row_indices))) == 0
134-
assert len(exact_row_indices) <= 1123 # TODO: update
135-
assert len(estimated_row_indices) <= 1163 # TODO: update
173+
assert len(exact_row_indices) == 16678
174+
assert len(estimated_row_indices) <= 17643
136175

137-
"""
138176

139177

140178

0 commit comments

Comments
 (0)