Skip to content

Commit 08b448b

Browse files
authored
Split out WKB crate (redux) (#856)
TODO: - Fix handling of "maybe_multi" that was removed. Add `from_geometries` to capacity counters and to geometry builders. - Restore tests from deleted reading files Closes #825, closes #843
1 parent 157a46c commit 08b448b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+460
-2582
lines changed

Cargo.lock

Lines changed: 12 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/Cargo.lock

Lines changed: 12 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/geoarrow/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ arrow-ipc = "53"
5454
arrow-schema = "53"
5555
async-stream = { version = "0.3", optional = true }
5656
async-trait = { version = "0.1", optional = true }
57-
byteorder = "1"
5857
bytes = { version = "1.5.0", optional = true }
5958
chrono = { version = "0.4" }
6059
dbase = "0.5.0"
@@ -72,7 +71,6 @@ half = { version = "2.4.1" }
7271
http-range-client = { version = "0.8", optional = true }
7372
indexmap = { version = "2" }
7473
lexical-core = { version = "0.8.5" }
75-
num_enum = "0.7"
7674
object_store = { version = "0.11", optional = true }
7775
parquet = { version = "53", optional = true, default-features = false, features = [
7876
"arrow",
@@ -99,6 +97,7 @@ thiserror = "1"
9997
tokio = { version = "1", default-features = false, optional = true }
10098
# wkt = "0.11"
10199
wkt = { git = "https://github.com/georust/wkt", branch = "kyle/geo-traits-writer" }
100+
wkb = { git = "https://github.com/kylebarron/wkb", rev = "7d58a2327fe21cf250dab5ac6860b6cf0fddb838" }
102101

103102

104103
[dev-dependencies]

rust/geoarrow/src/algorithm/native/type_id.rs

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::array::*;
22
use crate::trait_::ArrayAccessor;
33
use arrow::array::Int16Builder;
4-
use arrow_array::{Int16Array, OffsetSizeTrait};
4+
use arrow_array::Int16Array;
55
use std::collections::HashSet;
66

77
/// Calculation of the geometry types within a GeometryArray
@@ -115,26 +115,28 @@ impl TypeIds for MixedGeometryArray<2> {
115115
}
116116
}
117117

118-
impl<O: OffsetSizeTrait> TypeIds for WKBArray<O> {
119-
fn get_type_ids(&self) -> Int16Array {
120-
let mut output_array = Int16Builder::with_capacity(self.len());
121-
self.iter().for_each(|maybe_wkb| {
122-
output_array.append_option(maybe_wkb.map(|wkb| {
123-
let type_id = u32::from(wkb.wkb_type().unwrap());
124-
type_id.try_into().unwrap()
125-
}))
126-
});
127-
128-
output_array.finish()
129-
}
130-
131-
fn get_unique_type_ids(&self) -> HashSet<i16> {
132-
let mut values = HashSet::new();
133-
self.iter().flatten().for_each(|wkb| {
134-
let type_id = u32::from(wkb.wkb_type().unwrap());
135-
values.insert(type_id.try_into().unwrap());
136-
});
137-
138-
values
139-
}
140-
}
118+
// Impl removed when `wkb` was refactored into a standalone crate.
119+
//
120+
// impl<O: OffsetSizeTrait> TypeIds for WKBArray<O> {
121+
// fn get_type_ids(&self) -> Int16Array {
122+
// let mut output_array = Int16Builder::with_capacity(self.len());
123+
// self.iter().for_each(|maybe_wkb| {
124+
// output_array.append_option(maybe_wkb.map(|wkb| {
125+
// let type_id = u32::from(wkb.wkb_type().unwrap());
126+
// type_id.try_into().unwrap()
127+
// }))
128+
// });
129+
130+
// output_array.finish()
131+
// }
132+
133+
// fn get_unique_type_ids(&self) -> HashSet<i16> {
134+
// let mut values = HashSet::new();
135+
// self.iter().flatten().for_each(|wkb| {
136+
// let type_id = u32::from(wkb.wkb_type().unwrap());
137+
// values.insert(type_id.try_into().unwrap());
138+
// });
139+
140+
// values
141+
// }
142+
// }

rust/geoarrow/src/array/binary/array.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ impl<O: OffsetSizeTrait> WKBArray<O> {
5454
/// Infer the minimal NativeType that this WKBArray can be casted to.
5555
#[allow(dead_code)]
5656
// TODO: is this obsolete with new from_wkb approach that uses downcasting?
57-
pub(crate) fn infer_geo_data_type(&self, coord_type: CoordType) -> Result<NativeType> {
58-
use crate::io::wkb::reader::r#type::infer_geometry_type;
59-
infer_geometry_type(self.iter().flatten(), coord_type)
57+
pub(crate) fn infer_geo_data_type(&self, _coord_type: CoordType) -> Result<NativeType> {
58+
todo!()
59+
// use crate::io::wkb::reader::r#type::infer_geometry_type;
60+
// infer_geometry_type(self.iter().flatten(), coord_type)
6061
}
6162

6263
/// The lengths of each buffer contained in this array.

rust/geoarrow/src/array/binary/builder.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@ use std::sync::Arc;
33
use crate::array::binary::WKBCapacity;
44
use crate::array::metadata::ArrayMetadata;
55
use crate::error::{GeoArrowError, Result};
6-
use crate::io::wkb::writer::{
7-
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
8-
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
9-
write_geometry_collection_as_wkb, write_line_string_as_wkb, write_multi_line_string_as_wkb,
10-
write_multi_point_as_wkb, write_multi_polygon_as_wkb, write_point_as_wkb, write_polygon_as_wkb,
11-
};
126
use arrow_array::builder::GenericBinaryBuilder;
137
use arrow_array::OffsetSizeTrait;
148
use geo_traits::{
159
GeometryCollectionTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait,
1610
MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
1711
};
12+
use wkb::writer::{
13+
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
14+
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
15+
write_geometry_collection, write_line_string, write_multi_line_string, write_multi_point,
16+
write_multi_polygon, write_point, write_polygon,
17+
};
18+
use wkb::Endianness;
1819

1920
use super::array::WKBArray;
2021

@@ -82,7 +83,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
8283
if let Some(geom) = geom {
8384
// TODO: figure out how to write directly to the underlying vec without a copy
8485
let mut buf = Vec::with_capacity(point_wkb_size(geom.dim()));
85-
write_point_as_wkb(&mut buf, geom).unwrap();
86+
write_point(&mut buf, geom, Endianness::LittleEndian).unwrap();
8687
self.0.append_value(&buf)
8788
} else {
8889
self.0.append_null();
@@ -95,7 +96,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
9596
if let Some(geom) = geom {
9697
// TODO: figure out how to write directly to the underlying vec without a copy
9798
let mut buf = Vec::with_capacity(line_string_wkb_size(geom));
98-
write_line_string_as_wkb(&mut buf, geom).unwrap();
99+
write_line_string(&mut buf, geom, Endianness::LittleEndian).unwrap();
99100
self.0.append_value(&buf)
100101
} else {
101102
self.0.append_null()
@@ -108,7 +109,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
108109
if let Some(geom) = geom {
109110
// TODO: figure out how to write directly to the underlying vec without a copy
110111
let mut buf = Vec::with_capacity(polygon_wkb_size(geom));
111-
write_polygon_as_wkb(&mut buf, geom).unwrap();
112+
write_polygon(&mut buf, geom, Endianness::LittleEndian).unwrap();
112113
self.0.append_value(&buf)
113114
} else {
114115
self.0.append_null()
@@ -121,7 +122,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
121122
if let Some(geom) = geom {
122123
// TODO: figure out how to write directly to the underlying vec without a copy
123124
let mut buf = Vec::with_capacity(multi_point_wkb_size(geom));
124-
write_multi_point_as_wkb(&mut buf, geom).unwrap();
125+
write_multi_point(&mut buf, geom, Endianness::LittleEndian).unwrap();
125126
self.0.append_value(&buf)
126127
} else {
127128
self.0.append_null()
@@ -134,7 +135,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
134135
if let Some(geom) = geom {
135136
// TODO: figure out how to write directly to the underlying vec without a copy
136137
let mut buf = Vec::with_capacity(multi_line_string_wkb_size(geom));
137-
write_multi_line_string_as_wkb(&mut buf, geom).unwrap();
138+
write_multi_line_string(&mut buf, geom, Endianness::LittleEndian).unwrap();
138139
self.0.append_value(&buf)
139140
} else {
140141
self.0.append_null()
@@ -147,7 +148,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
147148
if let Some(geom) = geom {
148149
// TODO: figure out how to write directly to the underlying vec without a copy
149150
let mut buf = Vec::with_capacity(multi_polygon_wkb_size(geom));
150-
write_multi_polygon_as_wkb(&mut buf, geom).unwrap();
151+
write_multi_polygon(&mut buf, geom, Endianness::LittleEndian).unwrap();
151152
self.0.append_value(&buf)
152153
} else {
153154
self.0.append_null()
@@ -159,6 +160,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
159160
pub fn push_geometry(&mut self, geom: Option<&impl GeometryTrait<T = f64>>) {
160161
use GeometryType::*;
161162

163+
// TODO: call wkb::write_geometry directly
162164
if let Some(geom) = geom {
163165
match geom.as_type() {
164166
Point(point) => self.push_point(Some(point)),
@@ -188,7 +190,7 @@ impl<O: OffsetSizeTrait> WKBBuilder<O> {
188190
if let Some(geom) = geom {
189191
// TODO: figure out how to write directly to the underlying vec without a copy
190192
let mut buf = Vec::with_capacity(geometry_collection_wkb_size(geom));
191-
write_geometry_collection_as_wkb(&mut buf, geom).unwrap();
193+
write_geometry_collection(&mut buf, geom, Endianness::LittleEndian).unwrap();
192194
self.0.append_value(&buf)
193195
} else {
194196
self.0.append_null()

rust/geoarrow/src/array/binary/capacity.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ use std::ops::Add;
22

33
use arrow_array::OffsetSizeTrait;
44

5-
use crate::io::wkb::writer::{
6-
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
7-
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
8-
};
95
use geo_traits::{
106
GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait,
117
MultiPolygonTrait, PointTrait, PolygonTrait,
128
};
9+
use wkb::writer::{
10+
geometry_collection_wkb_size, line_string_wkb_size, multi_line_string_wkb_size,
11+
multi_point_wkb_size, multi_polygon_wkb_size, point_wkb_size, polygon_wkb_size,
12+
};
1313

1414
/// A counter for the buffer sizes of a [`WKBArray`][crate::array::WKBArray].
1515
///

rust/geoarrow/src/array/geometrycollection/builder.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use crate::array::mixed::builder::DEFAULT_PREFER_MULTI;
99
use crate::array::offset_builder::OffsetsBuilder;
1010
use crate::array::{CoordType, GeometryCollectionArray, MixedGeometryBuilder, WKBArray};
1111
use crate::error::{GeoArrowError, Result};
12-
use crate::io::wkb::reader::WKBGeometry;
1312
use crate::scalar::WKB;
1413
use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow};
1514
use geo_traits::{
@@ -369,10 +368,10 @@ impl<'a, const D: usize> GeometryCollectionBuilder<D> {
369368
metadata: Arc<ArrayMetadata>,
370369
prefer_multi: bool,
371370
) -> Result<Self> {
372-
let wkb_objects2: Vec<Option<WKBGeometry>> = wkb_objects
371+
let wkb_objects2 = wkb_objects
373372
.iter()
374-
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.to_wkb_object()))
375-
.collect();
373+
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.parse()).transpose())
374+
.collect::<Result<Vec<_>>>()?;
376375
Self::from_nullable_geometries(&wkb_objects2, coord_type, metadata, prefer_multi)
377376
}
378377
}

rust/geoarrow/src/array/linestring/builder.rs

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use crate::array::{
77
MultiPointBuilder, SeparatedCoordBufferBuilder, WKBArray,
88
};
99
use crate::error::{GeoArrowError, Result};
10-
use crate::io::wkb::reader::WKBLineString;
1110
use crate::scalar::WKB;
1211
use crate::trait_::{ArrayAccessor, GeometryArrayBuilder, IntoArrow};
1312
use arrow_array::{Array, GenericListArray, OffsetSizeTrait};
@@ -251,6 +250,14 @@ impl<const D: usize> LineStringBuilder<D> {
251250
.unwrap();
252251
}
253252

253+
pub fn extend_from_geometry_iter<'a>(
254+
&mut self,
255+
geoms: impl Iterator<Item = Option<&'a (impl GeometryTrait<T = f64> + 'a)>>,
256+
) -> Result<()> {
257+
geoms.into_iter().try_for_each(|g| self.push_geometry(g))?;
258+
Ok(())
259+
}
260+
254261
/// Push a raw coordinate to the underlying coordinate array.
255262
///
256263
/// # Safety
@@ -282,24 +289,28 @@ impl<const D: usize> LineStringBuilder<D> {
282289
Ok(())
283290
}
284291

292+
pub fn from_nullable_geometries(
293+
geoms: &[Option<impl GeometryTrait<T = f64>>],
294+
coord_type: Option<CoordType>,
295+
metadata: Arc<ArrayMetadata>,
296+
) -> Result<Self> {
297+
let capacity = LineStringCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?;
298+
let mut array =
299+
Self::with_capacity_and_options(capacity, coord_type.unwrap_or_default(), metadata);
300+
array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?;
301+
Ok(array)
302+
}
303+
285304
pub(crate) fn from_wkb<W: OffsetSizeTrait>(
286305
wkb_objects: &[Option<WKB<'_, W>>],
287306
coord_type: Option<CoordType>,
288307
metadata: Arc<ArrayMetadata>,
289308
) -> Result<Self> {
290-
let wkb_objects2: Vec<Option<WKBLineString>> = wkb_objects
309+
let wkb_objects2 = wkb_objects
291310
.iter()
292-
.map(|maybe_wkb| {
293-
maybe_wkb
294-
.as_ref()
295-
.map(|wkb| wkb.to_wkb_object().into_line_string())
296-
})
297-
.collect();
298-
Ok(Self::from_nullable_line_strings(
299-
&wkb_objects2,
300-
coord_type,
301-
metadata,
302-
))
311+
.map(|maybe_wkb| maybe_wkb.as_ref().map(|wkb| wkb.parse()).transpose())
312+
.collect::<Result<Vec<_>>>()?;
313+
Self::from_nullable_geometries(&wkb_objects2, coord_type, metadata)
303314
}
304315
}
305316

0 commit comments

Comments
 (0)