1111 sdata_morton_query_rect_debug ,
1212 row_ranges_to_row_indices ,
1313 orig_coord_to_norm_coord ,
14+ MORTON_CODE_VALUE_MAX ,
1415)
1516
1617def is_sorted (l ):
@@ -38,6 +39,21 @@ def test_zorder_sorting(sdata_with_points):
3839 assert is_sorted (morton_sorted )
3940
4041
42+ def norm_value_to_uint (value , v_min , v_max ):
43+ """
44+ Scale numeric value (int or float) to integer [0, 2^bits-1].
45+ """
46+ # Cast to float64
47+ value_f64 = value .astype ("float64" )
48+ # Normalize the array values to be between 0.0 and 1.0
49+ norm_value_f64 = (value_f64 - v_min ) / (v_max - v_min )
50+ # Clip to ensure no values are outside 0/1 range
51+ clipped_norm_value_f64 = np .clip (norm_value_f64 , 0.0 , 1.0 )
52+ # Multiply by the morton code max-value to scale from [0,1] to [0,65535]
53+ out = (clipped_norm_value_f64 * MORTON_CODE_VALUE_MAX ).astype (np .uint32 )
54+ return out
55+
56+
4157def test_zorder_query (sdata_with_points ):
4258 sdata = sdata_with_points
4359
@@ -82,7 +98,7 @@ def test_zorder_query(sdata_with_points):
8298 assert len (estimated_row_indices ) <= 631
8399
84100 # Check that the number of rows checked is less than the total number of points
85- assert len (rows_checked ) <= 45237
101+ assert len (rows_checked ) <= 85374
86102 assert len (matching_row_ranges ) == 24 # Kind of an implementation detail.
87103
88104 # Do a second check, this time against x_uint/y_uint (the normalized coordinates)
@@ -98,17 +114,11 @@ def test_zorder_query(sdata_with_points):
98114 orig_coord_to_norm_coord (orig_rect [1 ], orig_x_min = x_min , orig_x_max = x_max , orig_y_min = y_min , orig_y_max = y_max )
99115 ]
100116
101- norm_rect_rounded = [
102- # TODO: should we use floor/ceil instead of round?
103- [np .floor (norm_rect [0 ][0 ]), np .floor (norm_rect [0 ][1 ])],
104- [np .floor (norm_rect [1 ][0 ]), np .floor (norm_rect [1 ][1 ])]
105- ]
106-
107117 in_rect_norm = (
108- (df ["x_uint" ] >= norm_rect_rounded [0 ][0 ] + EXACT_BOUNDARY_EPSILON )
109- & (df ["x_uint" ] < norm_rect_rounded [1 ][0 ] - EXACT_BOUNDARY_EPSILON )
110- & (df ["y_uint" ] >= norm_rect_rounded [0 ][1 ] + EXACT_BOUNDARY_EPSILON )
111- & (df ["y_uint" ] < norm_rect_rounded [1 ][1 ] - EXACT_BOUNDARY_EPSILON )
118+ (df ["x_uint" ] >= norm_rect [0 ][0 ])
119+ & (df ["x_uint" ] <= norm_rect [1 ][0 ])
120+ & (df ["y_uint" ] >= norm_rect [0 ][1 ])
121+ & (df ["y_uint" ] <= norm_rect [1 ][1 ])
112122 )
113123 dumb_df_subset_norm = df .loc [in_rect_norm ]
114124 # Get the row indices of the points in the rectangle
@@ -119,9 +129,10 @@ def test_zorder_query(sdata_with_points):
119129 # True if A is a subset of B and False otherwise.
120130 assert set (exact_row_indices_norm ).issubset (set (estimated_row_indices ))
121131
122- assert len (exact_row_indices_norm ) == 609
132+ assert len (exact_row_indices_norm ) == 618
123133 assert len (estimated_row_indices ) <= 631
124134
135+
125136 # ========= Another query ==========
126137 orig_rect = [[500.0 , 500.0 ], [600.0 , 600.0 ]] # x0, y0, x1, y1
127138
@@ -142,23 +153,39 @@ def test_zorder_query(sdata_with_points):
142153 # (these are the indices in the original dataframe)
143154 exact_row_indices = dumb_df_subset .index .tolist ()
144155
156+ # Check that the estimated rows 100% contain the exact rows.
157+ # A.issubset(B) checks that all elements of A are in B ("A is a subset of B").
158+ assert set (exact_row_indices ).issubset (set (estimated_row_indices ))
159+ assert len (exact_row_indices ) == 16678
160+ assert len (estimated_row_indices ) <= 17681
161+
162+ # Check that the number of rows checked is less than the total number of points
163+ assert len (rows_checked ) <= 124661
164+ assert len (matching_row_ranges ) == 176 # Kind of an implementation detail.
165+
166+ # Do the same query the "dumb" way, by checking all points
167+ in_rect = (
168+ (df ["x" ] >= orig_rect [0 ][0 ] + EXACT_BOUNDARY_EPSILON )
169+ & (df ["x" ] <= orig_rect [1 ][0 ] - EXACT_BOUNDARY_EPSILON )
170+ & (df ["y" ] >= orig_rect [0 ][1 ] + EXACT_BOUNDARY_EPSILON )
171+ & (df ["y" ] <= orig_rect [1 ][1 ] - EXACT_BOUNDARY_EPSILON )
172+ )
173+ dumb_df_subset = df .loc [in_rect ]
174+ # Get the row indices of the points in the rectangle
175+ # (these are the indices in the original dataframe)
176+ exact_row_indices = dumb_df_subset .index .tolist ()
177+
145178 # Query 2: Do a second check, this time against x_uint/y_uint (the normalized coordinates)
146179 norm_rect = [
147180 orig_coord_to_norm_coord (orig_rect [0 ], orig_x_min = x_min , orig_x_max = x_max , orig_y_min = y_min , orig_y_max = y_max ),
148181 orig_coord_to_norm_coord (orig_rect [1 ], orig_x_min = x_min , orig_x_max = x_max , orig_y_min = y_min , orig_y_max = y_max )
149182 ]
150183
151- norm_rect_rounded = [
152- # TODO: should we use floor/ceil instead of round?
153- [np .floor (norm_rect [0 ][0 ]), np .floor (norm_rect [0 ][1 ])],
154- [np .floor (norm_rect [1 ][0 ]), np .floor (norm_rect [1 ][1 ])]
155- ]
156-
157184 in_rect_norm = (
158- (df ["x_uint" ] >= norm_rect_rounded [0 ][0 ] + EXACT_BOUNDARY_EPSILON )
159- & (df ["x_uint" ] < norm_rect_rounded [1 ][0 ] - EXACT_BOUNDARY_EPSILON )
160- & (df ["y_uint" ] >= norm_rect_rounded [0 ][1 ] + EXACT_BOUNDARY_EPSILON )
161- & (df ["y_uint" ] < norm_rect_rounded [1 ][1 ] - EXACT_BOUNDARY_EPSILON )
185+ (df ["x_uint" ] >= norm_rect [0 ][0 ])
186+ & (df ["x_uint" ] <= norm_rect [1 ][0 ])
187+ & (df ["y_uint" ] >= norm_rect [0 ][1 ])
188+ & (df ["y_uint" ] <= norm_rect [1 ][1 ])
162189 )
163190 dumb_df_subset_norm = df .loc [in_rect_norm ]
164191 # Get the row indices of the points in the rectangle
@@ -170,7 +197,7 @@ def test_zorder_query(sdata_with_points):
170197 assert set (exact_row_indices_norm ).issubset (set (estimated_row_indices ))
171198
172199 # Check that the estimated rows contain all of the exact rows.
173- assert len (exact_row_indices ) == 16678
174- assert len (estimated_row_indices ) <= 17643
200+ assert len (exact_row_indices_norm ) == 17590
201+ assert len (estimated_row_indices ) <= 17681
175202
176203
0 commit comments