Skip to content

Commit 972cd6f

Browse files
authored
feat: Set GeoArrow WKT extension type and WGS84 on Zarr arrays named "bbox" (#20)
1 parent b1907b1 commit 972cd6f

File tree

1 file changed

+77
-41
lines changed

1 file changed

+77
-41
lines changed

src/schema.rs

Lines changed: 77 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Generate an Arrow schema from a Zarr array schema.
22
3-
use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
3+
use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef, TimeUnit};
4+
use geoarrow_schema::{Crs, WktType};
45
use std::sync::Arc;
56
use zarrs::array::Array;
67
use zarrs::array::data_type::DataType as ZarrDataType;
@@ -31,9 +32,8 @@ fn arrays_to_schema<TStorage: ?Sized>(
3132
) -> ZarrDataFusionResult<SchemaRef> {
3233
let mut fields = vec![];
3334
for array in arrays.iter() {
34-
let arrow_dtype = zarr_to_arrow_dtype(array.data_type())?;
35-
let field = Field::new(field_name(group_root, array.path()), arrow_dtype, false);
36-
fields.push(field);
35+
let name = field_name(group_root, array.path());
36+
fields.push(zarr_to_arrow_field(name, array.data_type())?);
3737
}
3838
// Sort fields by name for consistent ordering
3939
fields.sort_by(|f1, f2| f1.name().cmp(f2.name()));
@@ -53,47 +53,75 @@ fn field_name(group_root: &NodePath, array_path: &NodePath) -> String {
5353
}
5454

5555
/// Maps a Zarr data type to an Arrow data type
56-
fn zarr_to_arrow_dtype(zarr_dtype: &ZarrDataType) -> ZarrDataFusionResult<DataType> {
57-
match zarr_dtype {
58-
ZarrDataType::Bool => Ok(DataType::Boolean),
59-
ZarrDataType::Int8 => Ok(DataType::Int8),
60-
ZarrDataType::Int16 => Ok(DataType::Int16),
61-
ZarrDataType::Int32 => Ok(DataType::Int32),
62-
ZarrDataType::Int64 => Ok(DataType::Int64),
63-
ZarrDataType::UInt8 => Ok(DataType::UInt8),
64-
ZarrDataType::UInt16 => Ok(DataType::UInt16),
65-
ZarrDataType::UInt32 => Ok(DataType::UInt32),
66-
ZarrDataType::UInt64 => Ok(DataType::UInt64),
67-
ZarrDataType::Float16 => Ok(DataType::Float16),
68-
ZarrDataType::Float32 => Ok(DataType::Float32),
69-
ZarrDataType::Float64 => Ok(DataType::Float64),
70-
ZarrDataType::Complex64 | ZarrDataType::Complex128 => Err(ZarrDataFusionError::Custom(
71-
"Complex64/Complex128 not yet supported.".to_string(),
72-
)),
73-
ZarrDataType::RawBits(_size) => Ok(DataType::BinaryView),
74-
ZarrDataType::String => Ok(DataType::Utf8View),
56+
fn zarr_to_arrow_field(name: String, zarr_dtype: &ZarrDataType) -> ZarrDataFusionResult<FieldRef> {
57+
if name == "bbox" {
58+
match zarr_dtype {
59+
ZarrDataType::String => {
60+
let crs = Crs::from_authority_code("EPSG:4326".to_string());
61+
let geoarrow_metadata = Arc::new(geoarrow_schema::Metadata::new(crs, None));
62+
63+
return Ok(Arc::new(
64+
Field::new(&name, DataType::Utf8View, false)
65+
.with_extension_type(WktType::new(geoarrow_metadata)),
66+
));
67+
}
68+
_ => {
69+
return Err(ZarrDataFusionError::Custom(
70+
"Expected 'bbox' field to be of Zarr string data type.".to_string(),
71+
));
72+
}
73+
}
74+
}
75+
76+
let data_type = match zarr_dtype {
77+
ZarrDataType::Bool => DataType::Boolean,
78+
ZarrDataType::Int8 => DataType::Int8,
79+
ZarrDataType::Int16 => DataType::Int16,
80+
ZarrDataType::Int32 => DataType::Int32,
81+
ZarrDataType::Int64 => DataType::Int64,
82+
ZarrDataType::UInt8 => DataType::UInt8,
83+
ZarrDataType::UInt16 => DataType::UInt16,
84+
ZarrDataType::UInt32 => DataType::UInt32,
85+
ZarrDataType::UInt64 => DataType::UInt64,
86+
ZarrDataType::Float16 => DataType::Float16,
87+
ZarrDataType::Float32 => DataType::Float32,
88+
ZarrDataType::Float64 => DataType::Float64,
89+
ZarrDataType::Complex64 | ZarrDataType::Complex128 => {
90+
return Err(ZarrDataFusionError::Custom(
91+
"Complex64/Complex128 not yet supported.".to_string(),
92+
));
93+
}
94+
ZarrDataType::RawBits(_size) => DataType::BinaryView,
95+
ZarrDataType::String => DataType::Utf8View,
7596
ZarrDataType::NumpyDateTime64 {
7697
unit,
7798
scale_factor: _,
7899
} => match unit {
79-
NumpyTimeUnit::Millisecond => Ok(DataType::Timestamp(TimeUnit::Millisecond, None)),
80-
NumpyTimeUnit::Microsecond => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
81-
NumpyTimeUnit::Nanosecond => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
82-
NumpyTimeUnit::Second => Ok(DataType::Timestamp(TimeUnit::Second, None)),
83-
_ => Err(ZarrDataFusionError::Custom(format!(
84-
"Unsupported Numpy datetime64 time unit: {:?}",
85-
unit
86-
))),
100+
NumpyTimeUnit::Millisecond => DataType::Timestamp(TimeUnit::Millisecond, None),
101+
NumpyTimeUnit::Microsecond => DataType::Timestamp(TimeUnit::Microsecond, None),
102+
NumpyTimeUnit::Nanosecond => DataType::Timestamp(TimeUnit::Nanosecond, None),
103+
NumpyTimeUnit::Second => DataType::Timestamp(TimeUnit::Second, None),
104+
_ => {
105+
return Err(ZarrDataFusionError::Custom(format!(
106+
"Unsupported Numpy datetime64 time unit: {:?}",
107+
unit
108+
)));
109+
}
87110
},
88-
ZarrDataType::Extension(ext) => Err(ZarrDataFusionError::Custom(format!(
89-
"Unsupported Zarr extension type: {}",
90-
ext.name()
91-
))),
92-
_ => Err(ZarrDataFusionError::Custom(format!(
93-
"Unsupported Zarr data type: {:?}",
94-
zarr_dtype
95-
))),
96-
}
111+
ZarrDataType::Extension(ext) => {
112+
return Err(ZarrDataFusionError::Custom(format!(
113+
"Unsupported Zarr extension type: {}",
114+
ext.name()
115+
)));
116+
}
117+
_ => {
118+
return Err(ZarrDataFusionError::Custom(format!(
119+
"Unsupported Zarr data type: {:?}",
120+
zarr_dtype
121+
)));
122+
}
123+
};
124+
Ok(Arc::new(Field::new(&name, data_type, false)))
97125
}
98126

99127
#[cfg(test)]
@@ -110,8 +138,16 @@ mod tests {
110138
let group = Group::open(storage.clone(), "/meta").unwrap();
111139
let schema = group_arrays_schema(&group).unwrap();
112140

141+
let geoarrow_metadata = Arc::new(geoarrow_schema::Metadata::new(
142+
Crs::from_authority_code("EPSG:4326".to_string()),
143+
None,
144+
));
145+
113146
let expected_fields = vec![
114-
Arc::new(Field::new("bbox", DataType::Utf8View, false)),
147+
Arc::new(
148+
Field::new("bbox", DataType::Utf8View, false)
149+
.with_extension_type(WktType::new(geoarrow_metadata)),
150+
),
115151
Arc::new(Field::new("collection", DataType::Utf8View, false)),
116152
Arc::new(Field::new(
117153
"date",

0 commit comments

Comments
 (0)