Skip to content

Commit d3a3640

Browse files
committed
Update tests
1 parent 271869c commit d3a3640

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

src/vitessce/data_utils/spatialdata_points_zorder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ def orig_coord_to_norm_coord(orig_coord, orig_x_min, orig_x_max, orig_y_min, ori
238238
orig_x_range = orig_x_max - orig_x_min
239239
orig_y_range = orig_y_max - orig_y_min
240240
return [
241-
((orig_x - orig_x_min) / orig_x_range) * MORTON_CODE_VALUE_MAX,
242-
((orig_y - orig_y_min) / orig_y_range) * MORTON_CODE_VALUE_MAX,
241+
np.float64(((orig_x - orig_x_min) / orig_x_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
242+
np.float64(((orig_y - orig_y_min) / orig_y_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
243243
]
244244

245245
# --------------------------

tests/test_sdata_points_zorder.py

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
sdata_morton_query_rect_debug,
1212
row_ranges_to_row_indices,
1313
orig_coord_to_norm_coord,
14+
MORTON_CODE_VALUE_MAX,
1415
)
1516

1617
def is_sorted(l):
@@ -38,6 +39,21 @@ def test_zorder_sorting(sdata_with_points):
3839
assert is_sorted(morton_sorted)
3940

4041

42+
def norm_value_to_uint(value, v_min, v_max):
43+
"""
44+
Scale numeric value (int or float) to integer [0, 2^bits-1].
45+
"""
46+
# Cast to float64
47+
value_f64 = value.astype("float64")
48+
# Normalize the array values to be between 0.0 and 1.0
49+
norm_value_f64 = (value_f64 - v_min) / (v_max - v_min)
50+
# Clip to ensure no values are outside 0/1 range
51+
clipped_norm_value_f64 = np.clip(norm_value_f64, 0.0, 1.0)
52+
# Multiply by the morton code max-value to scale from [0,1] to [0,65535]
53+
out = (clipped_norm_value_f64 * MORTON_CODE_VALUE_MAX).astype(np.uint32)
54+
return out
55+
56+
4157
def test_zorder_query(sdata_with_points):
4258
sdata = sdata_with_points
4359

@@ -82,7 +98,7 @@ def test_zorder_query(sdata_with_points):
8298
assert len(estimated_row_indices) <= 631
8399

84100
# Check that the number of rows checked is less than the total number of points
85-
assert len(rows_checked) <= 45237
101+
assert len(rows_checked) <= 85374
86102
assert len(matching_row_ranges) == 24 # Kind of an implementation detail.
87103

88104
# Do a second check, this time against x_uint/y_uint (the normalized coordinates)
@@ -98,17 +114,11 @@ def test_zorder_query(sdata_with_points):
98114
orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
99115
]
100116

101-
norm_rect_rounded = [
102-
# TODO: should we use floor/ceil instead of round?
103-
[np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
104-
[np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
105-
]
106-
107117
in_rect_norm = (
108-
(df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
109-
& (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
110-
& (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
111-
& (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
118+
(df["x_uint"] >= norm_rect[0][0])
119+
& (df["x_uint"] <= norm_rect[1][0])
120+
& (df["y_uint"] >= norm_rect[0][1])
121+
& (df["y_uint"] <= norm_rect[1][1])
112122
)
113123
dumb_df_subset_norm = df.loc[in_rect_norm]
114124
# Get the row indices of the points in the rectangle
@@ -119,9 +129,10 @@ def test_zorder_query(sdata_with_points):
119129
# True if A is a subset of B and False otherwise.
120130
assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
121131

122-
assert len(exact_row_indices_norm) == 609
132+
assert len(exact_row_indices_norm) == 618
123133
assert len(estimated_row_indices) <= 631
124134

135+
125136
# ========= Another query ==========
126137
orig_rect = [[500.0, 500.0], [600.0, 600.0]] # x0, y0, x1, y1
127138

@@ -142,23 +153,39 @@ def test_zorder_query(sdata_with_points):
142153
# (these are the indices in the original dataframe)
143154
exact_row_indices = dumb_df_subset.index.tolist()
144155

156+
# Check that the estimated rows 100% contain the exact rows.
157+
# A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
158+
assert set(exact_row_indices).issubset(set(estimated_row_indices))
159+
assert len(exact_row_indices) == 16678
160+
assert len(estimated_row_indices) <= 17681
161+
162+
# Check that the number of rows checked is less than the total number of points
163+
assert len(rows_checked) <= 124661
164+
assert len(matching_row_ranges) == 176 # Kind of an implementation detail.
165+
166+
# Do the same query the "dumb" way, by checking all points
167+
in_rect = (
168+
(df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
169+
& (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
170+
& (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
171+
& (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
172+
)
173+
dumb_df_subset = df.loc[in_rect]
174+
# Get the row indices of the points in the rectangle
175+
# (these are the indices in the original dataframe)
176+
exact_row_indices = dumb_df_subset.index.tolist()
177+
145178
# Query 2: Do a second check, this time against x_uint/y_uint (the normalized coordinates)
146179
norm_rect = [
147180
orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
148181
orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
149182
]
150183

151-
norm_rect_rounded = [
152-
# TODO: should we use floor/ceil instead of round?
153-
[np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
154-
[np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
155-
]
156-
157184
in_rect_norm = (
158-
(df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
159-
& (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
160-
& (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
161-
& (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
185+
(df["x_uint"] >= norm_rect[0][0])
186+
& (df["x_uint"] <= norm_rect[1][0])
187+
& (df["y_uint"] >= norm_rect[0][1])
188+
& (df["y_uint"] <= norm_rect[1][1])
162189
)
163190
dumb_df_subset_norm = df.loc[in_rect_norm]
164191
# Get the row indices of the points in the rectangle
@@ -170,7 +197,7 @@ def test_zorder_query(sdata_with_points):
170197
assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
171198

172199
# Check that the estimated rows contain all of the exact rows.
173-
assert len(exact_row_indices) == 16678
174-
assert len(estimated_row_indices) <= 17643
200+
assert len(exact_row_indices_norm) == 17590
201+
assert len(estimated_row_indices) <= 17681
175202

176203

0 commit comments

Comments
 (0)