Skip to content

Commit da0cd83

Browse files
authored
Add KDTreeMetadata to Python API (#101)
1 parent d26e2cd commit da0cd83

File tree

7 files changed

+187
-22
lines changed

7 files changed

+187
-22
lines changed

python/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/DEVELOP.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
To run the docs locally:
2+
3+
```
4+
uv sync --no-install-package geoindex-rs
5+
uv run --no-project maturin develop
6+
uv run --no-project mkdocs serve
7+
```

python/python/geoindex_rs/kdtree.pyi

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,16 @@ def range(
3535
Results are the insertion indexes of items that match the query.
3636
3737
Args:
38-
index: the KDTree to search
39-
min_x: The `min_x` coordinate of the query bounding box
40-
min_y: The `min_y` coordinate of the query bounding box
41-
max_x: The `max_x` coordinate of the query bounding box
42-
max_y: The `max_y` coordinate of the query bounding box
38+
index: the KDTree to search.
39+
min_x: The `min_x` coordinate of the query bounding box.
40+
min_y: The `min_y` coordinate of the query bounding box.
41+
max_x: The `max_x` coordinate of the query bounding box.
42+
max_y: The `max_y` coordinate of the query bounding box.
4343
4444
Returns:
45-
An Arrow array with the insertion indexes of query results.
45+
A uint32-typed Arrow array with the insertion indexes of query results.
4646
"""
47+
4748
def within(
4849
index: IndexLike,
4950
qx: int | float,
@@ -55,13 +56,13 @@ def within(
5556
Results are the insertion indexes of items that match the query.
5657
5758
Args:
58-
index: the KDTree to search
59-
qx: The `x` coordinate of the query point
60-
qy: The `y` coordinate of the query point
59+
index: the KDTree to search.
60+
qx: The `x` coordinate of the query point.
61+
qy: The `y` coordinate of the query point.
6162
r: The radius from the query point to use for searching.
6263
6364
Returns:
64-
An Arrow array with the insertion indexes of query results.
65+
A uint32-typed Arrow array with the insertion indexes of query results.
6566
"""
6667

6768
class KDTreeBuilder:
@@ -149,3 +150,49 @@ class KDTree(Buffer):
149150
object.
150151
"""
151152
def __repr__(self) -> str: ...
153+
154+
class KDTreeMetadata:
155+
"""Common metadata to describe a KDTree.
156+
157+
This can be used to know the number of items, node information, or total byte size
158+
of a KDTree.
159+
160+
Additionally, this can be used to know how much memory a KDTree **would use** with
161+
the given number of items and node size. A KDTree with 1 million items and a node
162+
size of 64 (the default) would take up 20 MiB.
163+
164+
```py
165+
from geoindex_rs import kdtree as kd
166+
167+
metadata = kd.KDTreeMetadata(num_items=1_000_000, node_size=64)
168+
assert metadata.num_bytes == 20_000_008
169+
```
170+
"""
171+
172+
def __init__(
173+
self,
174+
num_items: int,
175+
node_size: int = 64,
176+
coord_type: Literal["float32", "float64"] = "float64",
177+
) -> None:
178+
"""Create a new KDTreeMetadata given a number of items and node size.
179+
180+
Args:
181+
num_items: The number of items in the tree
182+
node_size: The node size of the tree. Defaults to 16.
183+
coord_type: The coordinate type to use in the tree. Currently only float32
184+
and float64 are permitted. Defaults to None.
185+
"""
186+
@classmethod
187+
def from_index(cls, index: IndexLike) -> KDTreeMetadata:
188+
"""Create from an existing KDTree buffer."""
189+
def __repr__(self) -> str: ...
190+
@property
191+
def num_items(self) -> int:
192+
"""The number of items indexed in the tree."""
193+
@property
194+
def node_size(self) -> int:
195+
"""The maximum number of items per node."""
196+
@property
197+
def num_bytes(self) -> int:
198+
"""The number of bytes that a KDTree with this metadata would have."""

python/python/geoindex_rs/rtree.pyi

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def boxes_at_level(index: IndexLike, level: int) -> Array:
3636
The returned array is a a zero-copy view from Rust. Note that it will keep
3737
the entire index memory alive until the returned array is garbage collected.
3838
"""
39+
3940
def tree_join(
4041
left: IndexLike,
4142
right: IndexLike,
@@ -150,6 +151,7 @@ def partitions(index: IndexLike) -> RecordBatch:
150151
index. Therefore, the `indices` array will have type `uint16` if the tree
151152
has fewer than 16,384 items; otherwise it will have type `uint32`.
152153
"""
154+
153155
def partition_boxes(index: IndexLike) -> RecordBatch:
154156
"""Extract the geometries of the spatial partitions from an RTree.
155157
@@ -174,6 +176,7 @@ def partition_boxes(index: IndexLike) -> RecordBatch:
174176
data. The `partition_id` column will be `uint16` type if there are less than
175177
65,536 partitions; otherwise it will be `uint32` type.
176178
"""
179+
177180
def search(
178181
index: IndexLike,
179182
min_x: int | float,
@@ -199,7 +202,7 @@ def search(
199202
class RTreeMetadata:
200203
"""Common metadata to describe an RTree.
201204
202-
This can be used to know the number of items, node informatino, or total byte size
205+
This can be used to know the number of items, node information, or total byte size
203206
of an RTree.
204207
205208
Additionally, this can be used to know how much memory an RTree **would use** with
@@ -335,7 +338,9 @@ class RTreeBuilder:
335338
It's important to add _arrays_ at a time. This should usually not be called in a loop.
336339
337340
Args:
338-
min_x: array-like input
341+
min_x: array-like input. If this is the only provided input, it should
342+
represent the entire bounding box, as described above. Otherwise, pass
343+
four separate parameters.
339344
min_y: array-like input. Defaults to None.
340345
max_x: array-like input. Defaults to None.
341346
max_y: array-like input. Defaults to None.
@@ -347,7 +352,10 @@ class RTreeBuilder:
347352
"""Sort the internal index and convert this class to an RTree instance.
348353
349354
Args:
350-
method: The method used for sorting the RTree. `"hilbert"` will use a [Hilbert Curve](https://en.wikipedia.org/wiki/Hilbert_R-tree#Packed_Hilbert_R-trees) for sorting; `"str"` will use the [Sort-Tile-Recursive](https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf) algorithm. Defaults to `"hilbert"`.
355+
method: The method used for sorting the RTree. Defaults to `"hilbert"`.
356+
357+
- `"hilbert"` will use a [Hilbert Curve](https://en.wikipedia.org/wiki/Hilbert_R-tree#Packed_Hilbert_R-trees) for sorting.
358+
- `"str"` will use the [Sort-Tile-Recursive](https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf) algorithm.
351359
352360
Returns:
353361
An immutable RTree instance, which can be used for spatial queries.

python/src/kdtree/metadata.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
use geo_index::kdtree::{KDTreeIndex, KDTreeMetadata, DEFAULT_KDTREE_NODE_SIZE};
2+
use pyo3::prelude::*;
3+
use pyo3::types::PyType;
4+
5+
use crate::coord_type::CoordType;
6+
use crate::kdtree::input::PyKDTreeRef;
7+
8+
pub(crate) enum PyKDTreeMetadataInner {
9+
Float32(KDTreeMetadata<f32>),
10+
Float64(KDTreeMetadata<f64>),
11+
}
12+
13+
impl PyKDTreeMetadataInner {
14+
fn node_size(&self) -> u16 {
15+
match self {
16+
Self::Float32(meta) => meta.node_size(),
17+
Self::Float64(meta) => meta.node_size(),
18+
}
19+
}
20+
21+
fn num_items(&self) -> u32 {
22+
match self {
23+
Self::Float32(meta) => meta.num_items(),
24+
Self::Float64(meta) => meta.num_items(),
25+
}
26+
}
27+
28+
fn num_bytes(&self) -> usize {
29+
match self {
30+
Self::Float32(meta) => meta.data_buffer_length(),
31+
Self::Float64(meta) => meta.data_buffer_length(),
32+
}
33+
}
34+
}
35+
36+
#[pyclass(name = "KDTreeMetadata")]
37+
pub struct PyKDTreeMetadata(PyKDTreeMetadataInner);
38+
39+
#[pymethods]
40+
impl PyKDTreeMetadata {
41+
#[new]
42+
#[pyo3(signature = (num_items, node_size = DEFAULT_KDTREE_NODE_SIZE, coord_type = None))]
43+
fn new(num_items: u32, node_size: u16, coord_type: Option<CoordType>) -> Self {
44+
let coord_type = coord_type.unwrap_or(CoordType::Float64);
45+
match coord_type {
46+
CoordType::Float32 => Self(PyKDTreeMetadataInner::Float32(KDTreeMetadata::<f32>::new(
47+
num_items, node_size,
48+
))),
49+
CoordType::Float64 => Self(PyKDTreeMetadataInner::Float64(KDTreeMetadata::<f64>::new(
50+
num_items, node_size,
51+
))),
52+
}
53+
}
54+
55+
#[classmethod]
56+
fn from_index(_cls: &Bound<PyType>, index: PyKDTreeRef) -> PyResult<Self> {
57+
match index {
58+
PyKDTreeRef::Float32(tree) => {
59+
Ok(Self(PyKDTreeMetadataInner::Float32(*tree.metadata())))
60+
}
61+
PyKDTreeRef::Float64(tree) => {
62+
Ok(Self(PyKDTreeMetadataInner::Float64(*tree.metadata())))
63+
}
64+
}
65+
}
66+
67+
fn __repr__(&self) -> String {
68+
format!(
69+
"KDTreeMetadata(num_items={}, node_size={})",
70+
self.0.num_items(),
71+
self.0.node_size()
72+
)
73+
}
74+
75+
#[getter]
76+
fn node_size(&self) -> u16 {
77+
self.0.node_size()
78+
}
79+
80+
#[getter]
81+
fn num_items(&self) -> u32 {
82+
self.0.num_items()
83+
}
84+
85+
#[getter]
86+
fn num_bytes(&self) -> usize {
87+
self.0.num_bytes()
88+
}
89+
}
90+
91+
impl From<KDTreeMetadata<f32>> for PyKDTreeMetadata {
92+
fn from(value: KDTreeMetadata<f32>) -> Self {
93+
Self(PyKDTreeMetadataInner::Float32(value))
94+
}
95+
}
96+
97+
impl From<KDTreeMetadata<f64>> for PyKDTreeMetadata {
98+
fn from(value: KDTreeMetadata<f64>) -> Self {
99+
Self(PyKDTreeMetadataInner::Float64(value))
100+
}
101+
}

python/src/kdtree/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod builder;
22
mod input;
3+
mod metadata;
34
mod range;
45
mod within;
56

@@ -19,6 +20,7 @@ pub fn register_kdtree_module(
1920

2021
child_module.add_class::<builder::PyKDTree>()?;
2122
child_module.add_class::<builder::PyKDTreeBuilder>()?;
23+
child_module.add_class::<metadata::PyKDTreeMetadata>()?;
2224
child_module.add_wrapped(wrap_pyfunction!(range::range))?;
2325
child_module.add_wrapped(wrap_pyfunction!(within::within))?;
2426

python/src/rtree/metadata.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ impl PyRTreeMetadataInner {
3939
}
4040
}
4141

42-
fn data_buffer_length(&self) -> usize {
42+
fn num_bytes(&self) -> usize {
4343
match self {
4444
Self::Float32(meta) => meta.data_buffer_length(),
4545
Self::Float64(meta) => meta.data_buffer_length(),
@@ -57,12 +57,12 @@ impl PyRTreeMetadata {
5757
fn new(num_items: u32, node_size: u16, coord_type: Option<CoordType>) -> Self {
5858
let coord_type = coord_type.unwrap_or(CoordType::Float64);
5959
match coord_type {
60-
CoordType::Float32 => Self(PyRTreeMetadataInner::Float32(
61-
geo_index::rtree::RTreeMetadata::<f32>::new(num_items, node_size),
62-
)),
63-
CoordType::Float64 => Self(PyRTreeMetadataInner::Float64(
64-
geo_index::rtree::RTreeMetadata::<f64>::new(num_items, node_size),
65-
)),
60+
CoordType::Float32 => Self(PyRTreeMetadataInner::Float32(RTreeMetadata::<f32>::new(
61+
num_items, node_size,
62+
))),
63+
CoordType::Float64 => Self(PyRTreeMetadataInner::Float64(RTreeMetadata::<f64>::new(
64+
num_items, node_size,
65+
))),
6666
}
6767
}
6868

@@ -113,7 +113,7 @@ impl PyRTreeMetadata {
113113

114114
#[getter]
115115
fn num_bytes(&self) -> usize {
116-
self.0.data_buffer_length()
116+
self.0.num_bytes()
117117
}
118118
}
119119

0 commit comments

Comments
 (0)