Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ tests = [
"pytest",
]

[project.entry-points."dask.sizeof"]
grid-indexing = "grid_indexing.distributed:sizeof_plugin"

[tool.maturin]
python-source = "python"
features = ["pyo3/extension-module"]
Expand Down
6 changes: 6 additions & 0 deletions python/grid_indexing/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,9 @@ def query_overlap(self, geoms):
output_chunks[indices] = chunk

return da.block(output_chunks.tolist())


def sizeof_plugin(sizeof):
@sizeof.register(Index)
def sizeof_index(index):
return index.nbytes
85 changes: 79 additions & 6 deletions src/index.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use bincode::{deserialize, serialize};
use std::mem;
use std::ops::Deref;

use geo::{Polygon, Relate};
use geo::{CoordsIter, Polygon, Relate};
use geoarrow::array::{ArrayBase, PolygonArray};
use geoarrow::trait_::{ArrayAccessor, NativeScalar};
use pyo3::exceptions::PyRuntimeError;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{IntoPyDict, PyBytes, PyType};
use pyo3_arrow::PyArray;
use rstar::{primitives::CachedEnvelope, RTree, RTreeObject};
use rstar::{primitives::CachedEnvelope, ParentNode, RTree, RTreeNode, RTreeObject};
use serde::{Deserialize, Serialize};

use super::trait_::{AsPolygonArray, AsSparse};
Expand All @@ -31,6 +32,13 @@ impl NumberedCell {
pub fn geometry(&self) -> &Polygon {
self.envelope.deref()
}

pub fn num_bytes(&self) -> usize {
let mut nbytes = mem::size_of_val(self);
nbytes += (*self.envelope).coords_count() * 2 * mem::size_of::<f64>();

nbytes
}
}

impl RTreeObject for NumberedCell {
Expand All @@ -41,14 +49,76 @@ impl RTreeObject for NumberedCell {
}
}

struct LeafReference<'a> {
reference: &'a NumberedCell,
}

struct ParentNodeReference<'a> {
reference: &'a ParentNode<NumberedCell>,
}

enum NodeReference<'a> {
Node(ParentNodeReference<'a>),
Leaf(LeafReference<'a>),
}

fn estimate_tree_size(tree: &RTree<NumberedCell>) -> usize {
let mut nbytes = mem::size_of_val(tree);

let mut to_visit: Vec<NodeReference> = vec![NodeReference::Node(ParentNodeReference {
reference: tree.root(),
})];
while let Some(item) = to_visit.pop() {
// Iteration:
// - pop the next item
// - if the popped item was a parent node, extend the queue
// - return the popped item
match item {
NodeReference::Node(parent) => {
to_visit.extend(
parent
.reference
.children()
.iter()
.map(|n| match n {
RTreeNode::Parent(p) => {
NodeReference::Node(ParentNodeReference { reference: p })
}
RTreeNode::Leaf(l) => {
NodeReference::Leaf(LeafReference { reference: l })
}
})
.collect::<Vec<NodeReference>>(),
);

nbytes += mem::size_of_val(parent.reference);
}
NodeReference::Leaf(leaf) => {
nbytes += leaf.reference.num_bytes();
}
};
}

nbytes
}

#[derive(Serialize, Deserialize, Debug)]
#[pyclass]
#[pyo3(module = "grid_indexing")]
pub struct Index {
tree: RTree<NumberedCell>,
num_bytes: usize,
}

impl Index {
fn from_tree(tree: RTree<NumberedCell>) -> Self {
let nbytes = estimate_tree_size(&tree);
Index {
tree,
num_bytes: nbytes,
}
}

pub fn create(cell_geoms: PolygonArray) -> Self {
let cells: Vec<_> = cell_geoms
.iter()
Expand All @@ -57,9 +127,7 @@ impl Index {
.map(|c| NumberedCell::new(c.0, c.1.to_geo()))
.collect();

Index {
tree: RTree::bulk_load_with_params(cells),
}
Self::from_tree(RTree::bulk_load_with_params(cells))
}

fn overlaps_one(&self, cell: Polygon) -> Vec<usize> {
Expand Down Expand Up @@ -88,7 +156,7 @@ impl Index {

#[pyfunction]
pub fn create_empty() -> Index {
Index { tree: RTree::new() }
Index::from_tree(RTree::new())
}

#[pymethods]
Expand Down Expand Up @@ -128,6 +196,11 @@ impl Index {
))
}

#[getter]
pub fn nbytes(&self) -> PyResult<usize> {
Ok(self.num_bytes)
}

#[classmethod]
pub fn from_shapely(_cls: &Bound<'_, PyType>, geoms: &Bound<PyAny>) -> PyResult<Self> {
let array = Python::with_gil(|py| {
Expand Down
Loading