11//! Generate an Arrow schema from a Zarr array schema.
22
3- use arrow_schema:: { DataType , Field , Schema , SchemaRef , TimeUnit } ;
3+ use arrow_schema:: { DataType , Field , FieldRef , Schema , SchemaRef , TimeUnit } ;
4+ use geoarrow_schema:: { Crs , WktType } ;
45use std:: sync:: Arc ;
56use zarrs:: array:: Array ;
67use zarrs:: array:: data_type:: DataType as ZarrDataType ;
@@ -31,9 +32,8 @@ fn arrays_to_schema<TStorage: ?Sized>(
3132) -> ZarrDataFusionResult < SchemaRef > {
3233 let mut fields = vec ! [ ] ;
3334 for array in arrays. iter ( ) {
34- let arrow_dtype = zarr_to_arrow_dtype ( array. data_type ( ) ) ?;
35- let field = Field :: new ( field_name ( group_root, array. path ( ) ) , arrow_dtype, false ) ;
36- fields. push ( field) ;
35+ let name = field_name ( group_root, array. path ( ) ) ;
36+ fields. push ( zarr_to_arrow_field ( name, array. data_type ( ) ) ?) ;
3737 }
3838 // Sort fields by name for consistent ordering
3939 fields. sort_by ( |f1, f2| f1. name ( ) . cmp ( f2. name ( ) ) ) ;
@@ -53,47 +53,75 @@ fn field_name(group_root: &NodePath, array_path: &NodePath) -> String {
5353}
5454
5555/// Maps a Zarr data type to an Arrow data type
56- fn zarr_to_arrow_dtype ( zarr_dtype : & ZarrDataType ) -> ZarrDataFusionResult < DataType > {
57- match zarr_dtype {
58- ZarrDataType :: Bool => Ok ( DataType :: Boolean ) ,
59- ZarrDataType :: Int8 => Ok ( DataType :: Int8 ) ,
60- ZarrDataType :: Int16 => Ok ( DataType :: Int16 ) ,
61- ZarrDataType :: Int32 => Ok ( DataType :: Int32 ) ,
62- ZarrDataType :: Int64 => Ok ( DataType :: Int64 ) ,
63- ZarrDataType :: UInt8 => Ok ( DataType :: UInt8 ) ,
64- ZarrDataType :: UInt16 => Ok ( DataType :: UInt16 ) ,
65- ZarrDataType :: UInt32 => Ok ( DataType :: UInt32 ) ,
66- ZarrDataType :: UInt64 => Ok ( DataType :: UInt64 ) ,
67- ZarrDataType :: Float16 => Ok ( DataType :: Float16 ) ,
68- ZarrDataType :: Float32 => Ok ( DataType :: Float32 ) ,
69- ZarrDataType :: Float64 => Ok ( DataType :: Float64 ) ,
70- ZarrDataType :: Complex64 | ZarrDataType :: Complex128 => Err ( ZarrDataFusionError :: Custom (
71- "Complex64/Complex128 not yet supported." . to_string ( ) ,
72- ) ) ,
73- ZarrDataType :: RawBits ( _size) => Ok ( DataType :: BinaryView ) ,
74- ZarrDataType :: String => Ok ( DataType :: Utf8View ) ,
56+ fn zarr_to_arrow_field ( name : String , zarr_dtype : & ZarrDataType ) -> ZarrDataFusionResult < FieldRef > {
57+ if name == "bbox" {
58+ match zarr_dtype {
59+ ZarrDataType :: String => {
60+ let crs = Crs :: from_authority_code ( "EPSG:4326" . to_string ( ) ) ;
61+ let geoarrow_metadata = Arc :: new ( geoarrow_schema:: Metadata :: new ( crs, None ) ) ;
62+
63+ return Ok ( Arc :: new (
64+ Field :: new ( & name, DataType :: Utf8View , false )
65+ . with_extension_type ( WktType :: new ( geoarrow_metadata) ) ,
66+ ) ) ;
67+ }
68+ _ => {
69+ return Err ( ZarrDataFusionError :: Custom (
70+ "Expected 'bbox' field to be of Zarr string data type." . to_string ( ) ,
71+ ) ) ;
72+ }
73+ }
74+ }
75+
76+ let data_type = match zarr_dtype {
77+ ZarrDataType :: Bool => DataType :: Boolean ,
78+ ZarrDataType :: Int8 => DataType :: Int8 ,
79+ ZarrDataType :: Int16 => DataType :: Int16 ,
80+ ZarrDataType :: Int32 => DataType :: Int32 ,
81+ ZarrDataType :: Int64 => DataType :: Int64 ,
82+ ZarrDataType :: UInt8 => DataType :: UInt8 ,
83+ ZarrDataType :: UInt16 => DataType :: UInt16 ,
84+ ZarrDataType :: UInt32 => DataType :: UInt32 ,
85+ ZarrDataType :: UInt64 => DataType :: UInt64 ,
86+ ZarrDataType :: Float16 => DataType :: Float16 ,
87+ ZarrDataType :: Float32 => DataType :: Float32 ,
88+ ZarrDataType :: Float64 => DataType :: Float64 ,
89+ ZarrDataType :: Complex64 | ZarrDataType :: Complex128 => {
90+ return Err ( ZarrDataFusionError :: Custom (
91+ "Complex64/Complex128 not yet supported." . to_string ( ) ,
92+ ) ) ;
93+ }
94+ ZarrDataType :: RawBits ( _size) => DataType :: BinaryView ,
95+ ZarrDataType :: String => DataType :: Utf8View ,
7596 ZarrDataType :: NumpyDateTime64 {
7697 unit,
7798 scale_factor : _,
7899 } => match unit {
79- NumpyTimeUnit :: Millisecond => Ok ( DataType :: Timestamp ( TimeUnit :: Millisecond , None ) ) ,
80- NumpyTimeUnit :: Microsecond => Ok ( DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ) ,
81- NumpyTimeUnit :: Nanosecond => Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ) ,
82- NumpyTimeUnit :: Second => Ok ( DataType :: Timestamp ( TimeUnit :: Second , None ) ) ,
83- _ => Err ( ZarrDataFusionError :: Custom ( format ! (
84- "Unsupported Numpy datetime64 time unit: {:?}" ,
85- unit
86- ) ) ) ,
100+ NumpyTimeUnit :: Millisecond => DataType :: Timestamp ( TimeUnit :: Millisecond , None ) ,
101+ NumpyTimeUnit :: Microsecond => DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ,
102+ NumpyTimeUnit :: Nanosecond => DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ,
103+ NumpyTimeUnit :: Second => DataType :: Timestamp ( TimeUnit :: Second , None ) ,
104+ _ => {
105+ return Err ( ZarrDataFusionError :: Custom ( format ! (
106+ "Unsupported Numpy datetime64 time unit: {:?}" ,
107+ unit
108+ ) ) ) ;
109+ }
87110 } ,
88- ZarrDataType :: Extension ( ext) => Err ( ZarrDataFusionError :: Custom ( format ! (
89- "Unsupported Zarr extension type: {}" ,
90- ext. name( )
91- ) ) ) ,
92- _ => Err ( ZarrDataFusionError :: Custom ( format ! (
93- "Unsupported Zarr data type: {:?}" ,
94- zarr_dtype
95- ) ) ) ,
96- }
111+ ZarrDataType :: Extension ( ext) => {
112+ return Err ( ZarrDataFusionError :: Custom ( format ! (
113+ "Unsupported Zarr extension type: {}" ,
114+ ext. name( )
115+ ) ) ) ;
116+ }
117+ _ => {
118+ return Err ( ZarrDataFusionError :: Custom ( format ! (
119+ "Unsupported Zarr data type: {:?}" ,
120+ zarr_dtype
121+ ) ) ) ;
122+ }
123+ } ;
124+ Ok ( Arc :: new ( Field :: new ( & name, data_type, false ) ) )
97125}
98126
99127#[ cfg( test) ]
@@ -110,8 +138,16 @@ mod tests {
110138 let group = Group :: open ( storage. clone ( ) , "/meta" ) . unwrap ( ) ;
111139 let schema = group_arrays_schema ( & group) . unwrap ( ) ;
112140
141+ let geoarrow_metadata = Arc :: new ( geoarrow_schema:: Metadata :: new (
142+ Crs :: from_authority_code ( "EPSG:4326" . to_string ( ) ) ,
143+ None ,
144+ ) ) ;
145+
113146 let expected_fields = vec ! [
114- Arc :: new( Field :: new( "bbox" , DataType :: Utf8View , false ) ) ,
147+ Arc :: new(
148+ Field :: new( "bbox" , DataType :: Utf8View , false )
149+ . with_extension_type( WktType :: new( geoarrow_metadata) ) ,
150+ ) ,
115151 Arc :: new( Field :: new( "collection" , DataType :: Utf8View , false ) ) ,
116152 Arc :: new( Field :: new(
117153 "date" ,
0 commit comments