Skip to content

Commit 5620d4b

Browse files
authored
fix: wkb proj:geometry w/o setting geo metadata (#808)
For stac-utils/rustac-py#160
1 parent ad8d8c2 commit 5620d4b

File tree

6 files changed

+108
-26
lines changed

6 files changed

+108
-26
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,4 @@ tracing-subscriber = { version = "0.3.18", features = [
111111
tracing-indicatif = "0.3.9"
112112
url = "2.3"
113113
webpki-roots = "1.0.0"
114+
wkb = "0.9.0"

crates/core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ geoarrow = [
2222
"dep:arrow-schema",
2323
"dep:geo-traits",
2424
"dep:geo-types",
25+
"dep:wkb",
2526
]
2627
geoparquet = ["geoarrow", "dep:geoparquet", "dep:parquet"]
2728

@@ -49,6 +50,7 @@ stac-derive.workspace = true
4950
thiserror.workspace = true
5051
tracing.workspace = true
5152
url = { workspace = true, features = ["serde"] }
53+
wkb = { workspace = true, optional = true }
5254

5355
[dev-dependencies]
5456
assert-json-diff.workspace = true

crates/core/src/error.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,9 @@ pub enum Error {
103103
/// [url::ParseError]
104104
#[error(transparent)]
105105
UrlParse(#[from] url::ParseError),
106+
107+
/// [wkb::error::WkbError]
108+
#[error(transparent)]
109+
#[cfg(feature = "geoarrow")]
110+
Wkb(#[from] wkb::error::WkbError),
106111
}

crates/core/src/geoarrow/json.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,10 +419,35 @@ fn set_column_for_json_rows(
419419
}
420420
}
421421
_ => {
422-
return Err(ArrowError::JsonError(format!(
423-
"data type {:?} not supported in nested map for json writer",
424-
array.data_type()
425-
)));
422+
if col_name == "proj:geometry" {
423+
let binary_array = as_generic_binary_array::<i32>(array);
424+
rows.iter_mut()
425+
.zip(binary_array.iter())
426+
.filter_map(|(maybe_row, maybe_value)| {
427+
maybe_row.as_mut().map(|row| (row, maybe_value))
428+
})
429+
.try_for_each(|(row, maybe_value)| -> Result<(), ArrowError> {
430+
let maybe_value = maybe_value
431+
.map(|value| -> Result<_, ArrowError> {
432+
let wkb = wkb::reader::read_wkb(value)
433+
.map_err(|err| ArrowError::ExternalError(Box::new(err)))?;
434+
let value = geojson::Value::from(&wkb.to_geometry());
435+
Ok(value)
436+
})
437+
.transpose()?;
438+
if let Some(j) = maybe_value {
439+
row.insert(col_name.to_string(), Value::from(&j));
440+
} else if explicit_nulls {
441+
row.insert(col_name.to_string(), Value::Null);
442+
}
443+
Ok(())
444+
})?;
445+
} else {
446+
return Err(ArrowError::JsonError(format!(
447+
"data type {:?} not supported in nested map for json writer",
448+
array.data_type()
449+
)));
450+
}
426451
}
427452
}
428453
Ok(())

crates/core/src/geoarrow/mod.rs

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
pub mod json;
44

55
use crate::{Error, ItemCollection, Result};
6-
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader, cast::AsArray};
6+
use arrow_array::{
7+
Array, RecordBatch, RecordBatchIterator, RecordBatchReader, builder::BinaryBuilder,
8+
cast::AsArray,
9+
};
710
use arrow_json::ReaderBuilder;
811
use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef, TimeUnit};
912
use geo_types::Geometry;
@@ -14,17 +17,14 @@ use geoarrow_array::{
1417
};
1518
use geoarrow_schema::{GeoArrowType, GeometryType, Metadata};
1619
use serde_json::{Value, json};
17-
use std::{collections::HashMap, sync::Arc};
20+
use std::{io::Cursor, sync::Arc};
1821

1922
/// The stac-geoparquet version metadata key.
2023
pub const VERSION_KEY: &str = "stac:geoparquet_version";
2124

2225
/// The stac-geoparquet version.
2326
pub const VERSION: &str = "1.0.0";
2427

25-
/// Geometry columns.
26-
pub const GEOMETRY_COLUMNS: [&str; 2] = ["geometry", "proj:geometry"];
27-
2828
/// Datetime columns.
2929
pub const DATETIME_COLUMNS: [&str; 8] = [
3030
"datetime",
@@ -79,7 +79,8 @@ impl TableBuilder {
7979
/// ```
8080
pub fn build(self) -> Result<Table> {
8181
let mut values = Vec::with_capacity(self.item_collection.items.len());
82-
let mut geometry_builders = HashMap::new();
82+
let mut geometry_builder = GeometryBuilder::new(GeometryType::new(Default::default()));
83+
let mut proj_geometry_builder = BinaryBuilder::new();
8384

8485
for item in self.item_collection.items {
8586
let mut value =
@@ -88,18 +89,20 @@ impl TableBuilder {
8889
let value = value
8990
.as_object_mut()
9091
.expect("a flat item should serialize to an object");
91-
for key in GEOMETRY_COLUMNS {
92-
if let Some(value) = value.remove(key) {
93-
let entry = geometry_builders.entry(key).or_insert_with(|| {
94-
let geometry_type = GeometryType::new(Default::default());
95-
GeometryBuilder::new(geometry_type)
96-
});
97-
let geometry =
98-
geojson::Geometry::from_json_value(value).map_err(Box::new)?;
99-
entry.push_geometry(Some(
100-
&(Geometry::try_from(geometry).map_err(Box::new)?),
101-
))?;
102-
}
92+
if let Some(value) = value.remove("geometry") {
93+
let geometry = geojson::Geometry::from_json_value(value).map_err(Box::new)?;
94+
geometry_builder
95+
.push_geometry(Some(&(Geometry::try_from(geometry).map_err(Box::new)?)))?;
96+
}
97+
if let Some(value) = value.remove("proj:geometry") {
98+
let geometry = geojson::Geometry::from_json_value(value).map_err(Box::new)?;
99+
let mut cursor = Cursor::new(Vec::new());
100+
wkb::writer::write_geometry(
101+
&mut cursor,
102+
&Geometry::try_from(geometry).map_err(Box::new)?,
103+
&Default::default(),
104+
)?;
105+
proj_geometry_builder.append_value(cursor.into_inner());
103106
}
104107
if let Some(bbox) = value.remove("bbox") {
105108
let bbox = convert_bbox(bbox)?;
@@ -132,10 +135,14 @@ impl TableBuilder {
132135
// Add the geometries back in.
133136
let mut schema_builder = SchemaBuilder::from(schema.fields());
134137
let mut columns = record_batch.columns().to_vec();
135-
for (key, geometry_builder) in geometry_builders {
136-
let geometry_array = geometry_builder.finish();
137-
columns.push(geometry_array.to_array_ref());
138-
schema_builder.push(geometry_array.data_type().to_field(key, true));
138+
let geometry_array = geometry_builder.finish();
139+
columns.push(geometry_array.to_array_ref());
140+
schema_builder.push(geometry_array.data_type().to_field("geometry", true));
141+
let proj_geometry_array = proj_geometry_builder.finish();
142+
if !proj_geometry_array.is_empty() {
143+
let data_type = proj_geometry_array.data_type().clone();
144+
columns.push(Arc::new(proj_geometry_array));
145+
schema_builder.push(Field::new("proj:geometry", data_type, true));
139146
}
140147
let _ = schema_builder
141148
.metadata_mut()
@@ -350,4 +357,20 @@ mod tests {
350357
let record_batch = record_batches.pop().unwrap();
351358
let _ = super::with_wkb_geometry(record_batch, "geometry").unwrap();
352359
}
360+
361+
#[test]
362+
fn has_proj_geometry() {
363+
let item: Item =
364+
crate::read("examples/extensions-collection/proj-example/proj-example.json").unwrap();
365+
let table = Table::from_item_collection(vec![item]).unwrap();
366+
let (mut record_batches, _) = table.into_inner();
367+
assert_eq!(record_batches.len(), 1);
368+
let record_batch = record_batches.pop().unwrap();
369+
assert!(
370+
record_batch
371+
.schema()
372+
.column_with_name("proj:geometry")
373+
.is_some()
374+
);
375+
}
353376
}

crates/core/src/geoparquet.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,4 +372,30 @@ mod tests {
372372
let cursor = Cursor::new(Vec::new());
373373
super::into_writer(cursor, items).unwrap();
374374
}
375+
376+
#[test]
377+
fn no_proj_geometry_metadata() {
378+
let item: Item =
379+
crate::read("examples/extensions-collection/proj-example/proj-example.json").unwrap();
380+
let mut cursor = Cursor::new(Vec::new());
381+
super::into_writer(&mut cursor, vec![item]).unwrap();
382+
let bytes = Bytes::from(cursor.into_inner());
383+
let reader = SerializedFileReader::new(bytes).unwrap();
384+
let key_value = reader
385+
.metadata()
386+
.file_metadata()
387+
.key_value_metadata()
388+
.unwrap()
389+
.iter()
390+
.find(|key_value| key_value.key == "geo")
391+
.unwrap();
392+
let value: serde_json::Value =
393+
serde_json::from_str(key_value.value.as_deref().unwrap()).unwrap();
394+
assert!(
395+
!value["columns"]
396+
.as_object()
397+
.unwrap()
398+
.contains_key("proj:geometry")
399+
);
400+
}
375401
}

0 commit comments

Comments
 (0)