Update tests

keller-mark · keller-mark · commit d3a36400e208 · 2025-09-26T14:54:45.000-04:00
diff --git a/src/vitessce/data_utils/spatialdata_points_zorder.py b/src/vitessce/data_utils/spatialdata_points_zorder.py
@@ -238,8 +238,8 @@ def orig_coord_to_norm_coord(orig_coord, orig_x_min, orig_x_max, orig_y_min, ori
     orig_x_range = orig_x_max - orig_x_min
     orig_y_range = orig_y_max - orig_y_min
     return [
-        ((orig_x - orig_x_min) / orig_x_range) * MORTON_CODE_VALUE_MAX,
-        ((orig_y - orig_y_min) / orig_y_range) * MORTON_CODE_VALUE_MAX,
+        np.float64(((orig_x - orig_x_min) / orig_x_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
+        np.float64(((orig_y - orig_y_min) / orig_y_range) * MORTON_CODE_VALUE_MAX).astype(np.uint32),
     ]
 
 # --------------------------
diff --git a/tests/test_sdata_points_zorder.py b/tests/test_sdata_points_zorder.py
@@ -11,6 +11,7 @@
     sdata_morton_query_rect_debug,
     row_ranges_to_row_indices,
     orig_coord_to_norm_coord,
+    MORTON_CODE_VALUE_MAX,
 )
 
 def is_sorted(l):
@@ -38,6 +39,21 @@ def test_zorder_sorting(sdata_with_points):
     assert is_sorted(morton_sorted)
 
 
+def norm_value_to_uint(value, v_min, v_max):
+    """
+    Scale numeric value (int or float) to integer [0, 2^bits-1].
+    """
+    # Cast to float64
+    value_f64 = value.astype("float64")
+    # Normalize the array values to be between 0.0 and 1.0
+    norm_value_f64 = (value_f64 - v_min) / (v_max - v_min)
+    # Clip to ensure no values are outside 0/1 range
+    clipped_norm_value_f64 = np.clip(norm_value_f64, 0.0, 1.0)
+    # Multiply by the morton code max-value to scale from [0,1] to [0,65535]
+    out = (clipped_norm_value_f64 * MORTON_CODE_VALUE_MAX).astype(np.uint32)
+    return out
+
+
 def test_zorder_query(sdata_with_points):
     sdata = sdata_with_points
 
@@ -82,7 +98,7 @@ def test_zorder_query(sdata_with_points):
     assert len(estimated_row_indices) <= 631
 
     # Check that the number of rows checked is less than the total number of points
-    assert len(rows_checked) <= 45237
+    assert len(rows_checked) <= 85374
     assert len(matching_row_ranges) == 24 # Kind of an implementation detail.
 
     # Do a second check, this time against x_uint/y_uint (the normalized coordinates)
@@ -98,17 +114,11 @@ def test_zorder_query(sdata_with_points):
         orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
     ]
 
-    norm_rect_rounded = [
-        # TODO: should we use floor/ceil instead of round?
-        [np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
-        [np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
-    ]
-
     in_rect_norm = (
-        (df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
-        & (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
-        & (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
-        & (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
+        (df["x_uint"] >= norm_rect[0][0])
+        & (df["x_uint"] <= norm_rect[1][0])
+        & (df["y_uint"] >= norm_rect[0][1])
+        & (df["y_uint"] <= norm_rect[1][1])
     )
     dumb_df_subset_norm = df.loc[in_rect_norm]
     # Get the row indices of the points in the rectangle
@@ -119,9 +129,10 @@ def test_zorder_query(sdata_with_points):
     # True if A is a subset of B and False otherwise.
     assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
 
-    assert len(exact_row_indices_norm) == 609
+    assert len(exact_row_indices_norm) == 618
     assert len(estimated_row_indices) <= 631
 
+    
     # ========= Another query ==========
     orig_rect = [[500.0, 500.0], [600.0, 600.0]] # x0, y0, x1, y1
 
@@ -142,23 +153,39 @@ def test_zorder_query(sdata_with_points):
     # (these are the indices in the original dataframe)
     exact_row_indices = dumb_df_subset.index.tolist()
 
+    # Check that the estimated rows 100% contain the exact rows.
+    # A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
+    assert set(exact_row_indices).issubset(set(estimated_row_indices))
+    assert len(exact_row_indices) == 16678
+    assert len(estimated_row_indices) <= 17681
+
+    # Check that the number of rows checked is less than the total number of points
+    assert len(rows_checked) <= 124661
+    assert len(matching_row_ranges) == 176 # Kind of an implementation detail.
+
+    # Do the same query the "dumb" way, by checking all points
+    in_rect = (
+        (df["x"] >= orig_rect[0][0] + EXACT_BOUNDARY_EPSILON)
+        & (df["x"] <= orig_rect[1][0] - EXACT_BOUNDARY_EPSILON)
+        & (df["y"] >= orig_rect[0][1] + EXACT_BOUNDARY_EPSILON)
+        & (df["y"] <= orig_rect[1][1] - EXACT_BOUNDARY_EPSILON)
+    )
+    dumb_df_subset = df.loc[in_rect]
+    # Get the row indices of the points in the rectangle
+    # (these are the indices in the original dataframe)
+    exact_row_indices = dumb_df_subset.index.tolist()
+
     # Query 2: Do a second check, this time against x_uint/y_uint (the normalized coordinates)
     norm_rect = [
         orig_coord_to_norm_coord(orig_rect[0], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max),
         orig_coord_to_norm_coord(orig_rect[1], orig_x_min=x_min, orig_x_max=x_max, orig_y_min=y_min, orig_y_max=y_max)
     ]
 
-    norm_rect_rounded = [
-        # TODO: should we use floor/ceil instead of round?
-        [np.floor(norm_rect[0][0]), np.floor(norm_rect[0][1])],
-        [np.floor(norm_rect[1][0]), np.floor(norm_rect[1][1])]
-    ]
-
     in_rect_norm = (
-        (df["x_uint"] >= norm_rect_rounded[0][0] + EXACT_BOUNDARY_EPSILON)
-        & (df["x_uint"] < norm_rect_rounded[1][0] - EXACT_BOUNDARY_EPSILON)
-        & (df["y_uint"] >= norm_rect_rounded[0][1] + EXACT_BOUNDARY_EPSILON)
-        & (df["y_uint"] < norm_rect_rounded[1][1] - EXACT_BOUNDARY_EPSILON)
+        (df["x_uint"] >= norm_rect[0][0])
+        & (df["x_uint"] <= norm_rect[1][0])
+        & (df["y_uint"] >= norm_rect[0][1])
+        & (df["y_uint"] <= norm_rect[1][1])
     )
     dumb_df_subset_norm = df.loc[in_rect_norm]
     # Get the row indices of the points in the rectangle
@@ -170,7 +197,7 @@ def test_zorder_query(sdata_with_points):
     assert set(exact_row_indices_norm).issubset(set(estimated_row_indices))
 
     # Check that the estimated rows contain all of the exact rows.
-    assert len(exact_row_indices) == 16678
-    assert len(estimated_row_indices) <= 17643
+    assert len(exact_row_indices_norm) == 17590
+    assert len(estimated_row_indices) <= 17681