diff --git a/Cargo.lock b/Cargo.lock index 5f6eaa2f3e..05b779f629 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,6 +2352,10 @@ dependencies = [ "daft-text", "daft-warc", "daft-writers", + "geoarrow-array", + "geoarrow-cast", + "geoarrow-expr-geo", + "geoarrow-schema", "libc", "log", "lzma-sys", @@ -2452,6 +2456,7 @@ version = "0.3.0-dev0" dependencies = [ "arrow", "arrow-row", + "arrow-schema 57.3.0", "bincode", "bytemuck", "chrono", @@ -2472,6 +2477,9 @@ dependencies = [ "derive_more", "fastrand 2.3.0", "fnv", + "geoarrow-array", + "geoarrow-cast", + "geoarrow-schema", "html-escape", "hyperloglog", "image", @@ -3588,6 +3596,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "ecdsa" version = "0.14.8" @@ -3879,6 +3897,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -4044,6 +4068,133 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.7.0" +dependencies = [ + "arrow-array", + "arrow-buffer 57.3.0", + "arrow-json", + "arrow-schema 57.3.0", + "geo", + "geo-traits", + "geo-types", + "geoarrow-schema", + "geozero", + "num-traits", + "wkb", + "wkt 0.14.0", +] + +[[package]] +name = "geoarrow-cast" +version = "0.7.0" +dependencies = [ + "arrow-schema 57.3.0", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", + "wkt 0.14.0", +] + +[[package]] +name = "geoarrow-expr-geo" +version = "0.7.0" +dependencies = [ + "arrow-array", + "arrow-buffer 57.3.0", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + +[[package]] +name = "geoarrow-schema" +version = "0.7.0" +dependencies = [ + "arrow-schema 57.3.0", + "geo-traits", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geojson" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e26f3c45b36fccc9cf2805e61d4da6bc4bbd5a3a9589b01afa3a40eff703bd79" +dependencies = [ + "log", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "geozero" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f28f34864745eb2f123c990c6ffd92c1584bd39439b3f27ff2a0f4ea5b309b" +dependencies = [ + "geo-types", + "geojson", + "log", + "serde_json", + "thiserror 1.0.69", + "wkt 0.11.1", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -4300,6 +4451,15 @@ dependencies = [ "serde", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -4316,6 +4476,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -4330,6 +4492,16 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -4693,6 +4865,49 @@ dependencies = [ name = "hyperloglog" version = "0.3.0-dev0" +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413183068e6e0289e18d7d0a1f661b81546e6918d5453a44570b9ab30cbed1b3" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -4980,6 +5195,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -5580,6 +5804,28 @@ dependencies = [ "libm", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "numpy" version = "0.28.0" @@ -6884,6 +7130,23 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rstest" version = "0.26.1" @@ -7452,6 +7715,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spin" version = "0.10.0" @@ -9265,6 +9540,43 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d" +dependencies = [ + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 98cff325b5..f29b9eec88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,10 @@ daft-sql = {path = "src/daft-sql", default-features = false} daft-stats = {path = "src/daft-stats", default-features = false} daft-warc = {path = "src/daft-warc", default-features = false} daft-writers = {path = "src/daft-writers", default-features = false} +geoarrow-array = {path = "src/geoarrow/geoarrow-array", default-features = false} +geoarrow-cast = {path = "src/geoarrow/geoarrow-cast", default-features = false} +geoarrow-expr-geo = {path = "src/geoarrow/geoarrow-expr-geo", default-features = false} +geoarrow-schema = {path = "src/geoarrow/geoarrow-schema", default-features = false} log = {workspace = true} lzma-sys = {version = "*", features = ["static"]} pyo3 = {workspace = true, optional = true} @@ -222,7 +226,11 @@ members = [ "src/daft-writers", "src/hyperloglog", "src/daft-cli", - "src/daft-text" + "src/daft-text", + "src/geoarrow/geoarrow-array", + "src/geoarrow/geoarrow-cast", + "src/geoarrow/geoarrow-expr-geo", + "src/geoarrow/geoarrow-schema" ] exclude = [ "examples/hello" @@ -232,6 +240,7 @@ exclude = [ arrow = "57.1.0" arrow-array = {version = "57.1.0", features = ["chrono-tz"]} arrow-buffer = "57.1.0" +arrow-cast = "57.1.0" arrow-csv = "57.1.0" arrow-data = "57.1.0" arrow-flight = "57.1.0" @@ -299,6 +308,16 @@ dashmap = "6.1.0" educe = "0.6.0" futures = "0.3.30" hashbrown = "0.16" +geo = "0.31.0" +geo-traits = "0.3.0" +geo-types = "0.7.16" +geozero = "0.14" +geoarrow-array = {path = "src/geoarrow/geoarrow-array", default-features = false} +geoarrow-cast = {path = "src/geoarrow/geoarrow-cast", default-features = false} +geoarrow-expr-geo = {path = "src/geoarrow/geoarrow-expr-geo", default-features = false} +geoarrow-schema = {path = "src/geoarrow/geoarrow-schema", default-features = false} +wkb = "0.9.1" +wkt = "0.14" html-escape = "0.2.13" image = {version = "0.25.10", default-features = false} indexmap = "2.9.0" diff --git a/daft/__init__.py b/daft/__init__.py index 168ba22d80..e556a7c0bf 100644 --- a/daft/__init__.py +++ b/daft/__init__.py @@ -75,7 +75,7 @@ def refresh_logger() -> None: from_pylist, from_ray_dataset, ) -from daft.daft import ImageFormat, ImageMode, ImageProperty, ResourceRequest +from daft.daft import GeospatialMode, ImageFormat, ImageMode, ImageProperty, ResourceRequest from daft.dataframe import DataFrame from daft.schema import Schema from daft.datatype import DataType, TimeUnit, MediaType @@ -178,6 +178,7 @@ def __getattr__(name: str) -> object: "DataType", "Expression", "File", + "GeospatialMode", "IOConfig", "Identifier", "ImageFormat", diff --git a/daft/daft/__init__.pyi b/daft/daft/__init__.pyi index 6b4e98b704..1df2370801 100644 --- a/daft/daft/__init__.pyi +++ b/daft/daft/__init__.pyi @@ -89,6 +89,10 @@ class ImageProperty(Enum): @staticmethod def from_property_string(attr: str) -> ImageProperty: ... +class GeospatialMode: + @staticmethod + def from_user_defined_mode(dimension: str, coord_type: str) -> GeospatialMode: ... + class PyWindowBoundary: """Represents a window frame boundary in window functions.""" @@ -1370,6 +1374,30 @@ class PyDataType: def python() -> PyDataType: ... @staticmethod def file(media_type: PyMediaType) -> PyDataType: ... + @staticmethod + def wkt(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def wkb(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def point(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def linestring(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def polygon(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def multipoint(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def multilinestring(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def multipolygon(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def geometry_collection(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def geometry(mode: GeospatialMode | None = None) -> PyDataType: ... + @staticmethod + def geography() -> PyDataType: ... + @staticmethod + def rect(mode: GeospatialMode | None = None) -> PyDataType: ... def to_arrow(self, cast_tensor_type_for_ray: builtins.bool | None = None) -> pa.DataType: ... def is_null(self) -> builtins.bool: ... def is_boolean(self) -> builtins.bool: ... diff --git a/daft/datatype.py b/daft/datatype.py index 1c5f641c94..cf34ca7dd7 100644 --- a/daft/datatype.py +++ b/daft/datatype.py @@ -8,7 +8,7 @@ from packaging.version import parse -from daft.daft import ImageMode, PyDataType, PyMediaType, PyTimeUnit, sql_datatype +from daft.daft import GeospatialMode, ImageMode, PyDataType, PyMediaType, PyTimeUnit, sql_datatype from daft.dependencies import np, pa from daft.runners import get_or_create_runner @@ -727,6 +727,111 @@ def image( return cls._from_pydatatype(PyDataType.image(mode, height, width)) @datatype_constructor + @classmethod + def wkt( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.wkt(mode)) + + @classmethod + def wkb( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.wkb(mode)) + + @classmethod + def point( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.point(mode)) + + @classmethod + def linestring( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.linestring(mode)) + + @classmethod + def polygon( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.polygon(mode)) + + @classmethod + def multipoint( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.multipoint(mode)) + + @classmethod + def multilinestring( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.multilinestring(mode)) + + @classmethod + def multipolygon( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.multipolygon(mode)) + + @classmethod + def geometry_collection( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.geometry_collection(mode)) + + @classmethod + def geometry( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.geometry(mode)) + + @classmethod + def geography( + cls, + ) -> DataType: + return cls._from_pydatatype(PyDataType.geography()) + + @classmethod + def rect( + cls, + mode: GeospatialMode | None = None, + ) -> DataType: + if not isinstance(mode, GeospatialMode) and mode is not None: + raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}") + return cls._from_pydatatype(PyDataType.rect(mode)) + @classmethod def tensor( cls, diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 6131134f8b..c349ab97b1 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -1,6 +1,7 @@ [dependencies] arrow = {workspace = true} arrow-row = {workspace = true} +arrow-schema = {workspace = true} bincode = {workspace = true} bytemuck = {version = "1", features = ["derive"]} chrono = {workspace = true} @@ -21,6 +22,9 @@ daft-sketch = {path = "../daft-sketch", default-features = false} derive_more = {workspace = true} fastrand = "2.1.0" fnv = "1.0.7" +geoarrow-array = {workspace = true, default-features = false} +geoarrow-cast = {workspace = true, default-features = false} +geoarrow-schema = {workspace = true, default-features = false} html-escape = {workspace = true} hyperloglog = {path = "../hyperloglog"} image = {workspace = true, features = [ diff --git a/src/daft-core/src/array/growable/arrow_growable.rs b/src/daft-core/src/array/growable/arrow_growable.rs index f9d0664572..6df498e2bf 100644 --- a/src/daft-core/src/array/growable/arrow_growable.rs +++ b/src/daft-core/src/array/growable/arrow_growable.rs @@ -242,6 +242,10 @@ where let field = Arc::new(Field::new(self.name.clone(), self.dtype.clone())); Ok(DataArray::::from_arrow(field, arrow_array)?.into_series()) } + + fn len(&self) -> usize { + self.len + } } /// Simplified null growable — just tracks a length counter. @@ -279,6 +283,10 @@ impl Growable for ArrowNullGrowable { self.len = 0; Ok(NullArray::full_null(&self.name, &self.dtype, len).into_series()) } + + fn len(&self) -> usize { + self.len + } } #[cfg(test)] diff --git a/src/daft-core/src/array/growable/fixed_size_list_growable.rs b/src/daft-core/src/array/growable/fixed_size_list_growable.rs index 6c2b0ba170..6fe2f2cd48 100644 --- a/src/daft-core/src/array/growable/fixed_size_list_growable.rs +++ b/src/daft-core/src/array/growable/fixed_size_list_growable.rs @@ -88,4 +88,8 @@ impl Growable for FixedSizeListGrowable<'_> { ) .into_series()) } + + fn len(&self) -> usize { + self.child_growable.len() / self.element_fixed_len + } } diff --git a/src/daft-core/src/array/growable/list_growable.rs b/src/daft-core/src/array/growable/list_growable.rs index 51a9c82d86..18131bae37 100644 --- a/src/daft-core/src/array/growable/list_growable.rs +++ b/src/daft-core/src/array/growable/list_growable.rs @@ -105,4 +105,8 @@ impl Growable for ListGrowable<'_> { ) .into_series()) } + + fn len(&self) -> usize { + self.growable_offsets.len() - 1 + } } diff --git a/src/daft-core/src/array/growable/logical_growable.rs b/src/daft-core/src/array/growable/logical_growable.rs index 72d769db51..52f63be5e8 100644 --- a/src/daft-core/src/array/growable/logical_growable.rs +++ b/src/daft-core/src/array/growable/logical_growable.rs @@ -44,6 +44,10 @@ where ); Ok(arr.into_series()) } + + fn len(&self) -> usize { + self.physical_growable.len() + } } macro_rules! impl_logical_growable { @@ -115,3 +119,15 @@ where } } } +impl_logical_growable!(LogicalWKTGrowable, WKTType); +impl_logical_growable!(LogicalWKBGrowable, WKBType); +impl_logical_growable!(LogicalPointGrowable, PointType); +impl_logical_growable!(LogicalLineStringGrowable, LineStringType); +impl_logical_growable!(LogicalPolygonGrowable, PolygonType); +impl_logical_growable!(LogicalMultiPointGrowable, MultiPointType); +impl_logical_growable!(LogicalMultiLineStringGrowable, MultiLineStringType); +impl_logical_growable!(LogicalMultiPolygonGrowable, MultiPolygonType); +impl_logical_growable!(LogicalGeometryCollectionGrowable, GeometryCollectionType); +impl_logical_growable!(LogicalGeometryGrowable, GeometryType); +impl_logical_growable!(LogicalGeographyGrowable, GeographyType); +impl_logical_growable!(LogicalRectGrowable, RectType); diff --git a/src/daft-core/src/array/growable/map_growable.rs b/src/daft-core/src/array/growable/map_growable.rs index 89d78d726c..dd00cb276d 100644 --- a/src/daft-core/src/array/growable/map_growable.rs +++ b/src/daft-core/src/array/growable/map_growable.rs @@ -70,6 +70,10 @@ impl Growable for MapGrowable<'_> { ); Ok(map_array.into_series()) } + + fn len(&self) -> usize { + self.list_growable.len() + } } #[cfg(test)] diff --git a/src/daft-core/src/array/growable/mod.rs b/src/daft-core/src/array/growable/mod.rs index b2712c7dfe..fe16d357cc 100644 --- a/src/daft-core/src/array/growable/mod.rs +++ b/src/daft-core/src/array/growable/mod.rs @@ -1,7 +1,7 @@ use common_error::DaftResult; use crate::{ - array::{FixedSizeListArray, ListArray, StructArray, prelude::*}, + array::{FixedSizeListArray, ListArray, StructArray, UnionArray, prelude::*}, datatypes::{FileArray, prelude::*}, file::DaftMediaType, series::Series, @@ -15,6 +15,7 @@ mod list_growable; mod logical_growable; mod map_growable; mod struct_growable; +mod union_growable; #[cfg(feature = "python")] mod python_growable; @@ -58,6 +59,7 @@ pub fn make_growable<'a>( /// Describes a struct that can be extended from slices of other pre-existing Series. /// This is very useful for abstracting many "physical" operations such as takes, broadcasts, /// filters and more. +#[allow(clippy::len_without_is_empty)] pub trait Growable { /// Extends this [`Growable`] with elements from the bounded [`Array`] at index `index` from /// a slice starting at `start` and length `len`. @@ -75,6 +77,8 @@ pub trait Growable { /// Builds an array from the [`Growable`] fn build(&mut self) -> DaftResult; + + fn len(&self) -> usize; } /// Trait that an Array type can implement to provide a Growable factory method @@ -189,6 +193,7 @@ impl_growable_array!( IntervalArray, arrow_growable::ArrowGrowable<'a, IntervalType> ); +impl_growable_array!(UnionArray, union_growable::UnionGrowable<'a>); impl_growable_array!(DateArray, logical_growable::LogicalDateGrowable<'a>); impl_growable_array!(TimeArray, logical_growable::LogicalTimeGrowable<'a>); @@ -233,3 +238,33 @@ where #[cfg(feature = "python")] impl_growable_array!(PythonArray, python_growable::PythonGrowable<'a>); +impl_growable_array!(WktArray, logical_growable::LogicalWKTGrowable<'a>); +impl_growable_array!(WkbArray, logical_growable::LogicalWKBGrowable<'a>); +impl_growable_array!(PointArray, logical_growable::LogicalPointGrowable<'a>); +impl_growable_array!( + LineStringArray, + logical_growable::LogicalLineStringGrowable<'a> +); +impl_growable_array!(PolygonArray, logical_growable::LogicalPolygonGrowable<'a>); +impl_growable_array!( + MultiPointArray, + logical_growable::LogicalMultiPointGrowable<'a> +); +impl_growable_array!( + MultiLineStringArray, + logical_growable::LogicalMultiLineStringGrowable<'a> +); +impl_growable_array!( + MultiPolygonArray, + logical_growable::LogicalMultiPolygonGrowable<'a> +); +impl_growable_array!( + GeometryCollectionArray, + logical_growable::LogicalGeometryCollectionGrowable<'a> +); +impl_growable_array!(GeometryArray, logical_growable::LogicalGeometryGrowable<'a>); +impl_growable_array!( + GeographyArray, + logical_growable::LogicalGeographyGrowable<'a> +); +impl_growable_array!(RectArray, logical_growable::LogicalRectGrowable<'a>); diff --git a/src/daft-core/src/array/growable/python_growable.rs b/src/daft-core/src/array/growable/python_growable.rs index 043a329636..8fe1b3c332 100644 --- a/src/daft-core/src/array/growable/python_growable.rs +++ b/src/daft-core/src/array/growable/python_growable.rs @@ -74,4 +74,8 @@ impl Growable for PythonGrowable<'_> { Ok(PythonArray::new(field, buffer.into(), built_validity).into_series()) } + + fn len(&self) -> usize { + self.buffer.len() + } } diff --git a/src/daft-core/src/array/growable/struct_growable.rs b/src/daft-core/src/array/growable/struct_growable.rs index dfb1a9c5f3..bc8f2e44fd 100644 --- a/src/daft-core/src/array/growable/struct_growable.rs +++ b/src/daft-core/src/array/growable/struct_growable.rs @@ -98,4 +98,12 @@ impl Growable for StructGrowable<'_> { ) .into_series()) } + + fn len(&self) -> usize { + if let Some(child) = self.children_growables.first() { + child.len() + } else { + 0 + } + } } diff --git a/src/daft-core/src/array/growable/union_growable.rs b/src/daft-core/src/array/growable/union_growable.rs new file mode 100644 index 0000000000..b986886505 --- /dev/null +++ b/src/daft-core/src/array/growable/union_growable.rs @@ -0,0 +1,122 @@ +use arrow::buffer::ScalarBuffer; +use common_error::DaftResult; + +use super::Growable; +use crate::{ + array::{UnionArray, growable::make_growable}, + datatypes::{DataType, Field}, + series::{IntoSeries, Series}, +}; + +pub struct UnionGrowable<'a> { + name: String, + dtype: DataType, + ids: Vec, + arrays: Vec<&'a UnionArray>, + children_growables: Vec>, + offsets: Option>, +} + +impl<'a> UnionGrowable<'a> { + pub fn new( + name: &str, + dtype: &DataType, + children: Vec<&'a UnionArray>, + _use_validity: bool, + capacity: usize, + ) -> Self { + match dtype { + DataType::Union(fields, _, _) => { + let has_offsets = children[0].offsets().is_some(); + + let children_growables: Vec> = fields + .iter() + .enumerate() + .map(|(i, f)| { + make_growable( + f.name.as_ref(), + &f.dtype, + children + .iter() + .map(|a| a.children.get(i).unwrap()) + .collect::>(), + false, + capacity, + ) + }) + .collect::>(); + + Self { + name: name.to_string(), + dtype: dtype.clone(), + ids: Vec::with_capacity(capacity), + arrays: children, + children_growables, + offsets: if has_offsets { + Some(Vec::with_capacity(capacity)) + } else { + None + }, + } + } + _ => panic!("Cannot create UnionGrowable from dtype: {}", dtype), + } + } +} + +impl Growable for UnionGrowable<'_> { + fn extend(&mut self, index: usize, start: usize, len: usize) { + let array = self.arrays[index]; + + let ids = &array.ids()[start..start + len]; + self.ids.extend(ids); + + if let Some(x) = self.offsets.as_mut() { + let offsets = &array.offsets().clone().unwrap()[start..start + len]; + + for (&type_, &offset) in ids.iter().zip(offsets.iter()) { + let field = &mut self.children_growables[type_ as usize]; + + x.push(field.len() as i32); + field.extend(index, offset as usize, 1); + } + } else { + self.children_growables + .iter_mut() + .for_each(|field| field.extend(index, start, len)); + } + } + + fn add_nulls(&mut self, _additional: usize) {} + + fn build(&mut self) -> DaftResult { + let build_children = self + .children_growables + .iter_mut() + .map(|growable| growable.build()) + .collect::>>()?; + + let ids = std::mem::take(&mut self.ids); + let offsets = std::mem::take(&mut self.offsets); + + let ids_buffer = ScalarBuffer::from(ids); + let offsets_buffer = if let Some(offsets) = offsets { + let offsets_buffer = ScalarBuffer::from(offsets); + Some(offsets_buffer) + } else { + None + }; + + Ok(UnionArray::new( + Field::new(self.name.clone(), self.dtype.clone()), + ids_buffer, + build_children, + offsets_buffer, + ) + .into_series()) + } + + fn len(&self) -> usize { + self.ids.len() + } +} diff --git a/src/daft-core/src/array/mod.rs b/src/daft-core/src/array/mod.rs index a8b2f55825..2e944df0ff 100644 --- a/src/daft-core/src/array/mod.rs +++ b/src/daft-core/src/array/mod.rs @@ -8,6 +8,7 @@ mod list_array; pub mod ops; mod serdes; mod struct_array; +mod union_array; pub mod values; use arrow::{ @@ -18,6 +19,7 @@ use arrow::{ pub use fixed_size_list_array::FixedSizeListArray; pub use list_array::ListArray; pub use struct_array::StructArray; +pub use union_array::UnionArray; mod boolean; pub mod prelude; use std::{marker::PhantomData, sync::Arc}; diff --git a/src/daft-core/src/array/ops/as_arrow.rs b/src/daft-core/src/array/ops/as_arrow.rs index d36f4fd93c..ea9e7b347e 100644 --- a/src/daft-core/src/array/ops/as_arrow.rs +++ b/src/daft-core/src/array/ops/as_arrow.rs @@ -1,5 +1,5 @@ use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{ BinaryArray, BooleanArray, DaftPrimitiveType, FixedSizeBinaryArray, IntervalArray, NullArray, NumericNative, Utf8Array, @@ -97,6 +97,7 @@ impl_asarrow_logicalarray!(TimestampArray, arrow::array::Int64Array); impl_asarrow_nested!(ListArray, arrow::array::LargeListArray); impl_asarrow_nested!(FixedSizeListArray, arrow::array::FixedSizeListArray); impl_asarrow_nested!(StructArray, arrow::array::StructArray); +impl_asarrow_nested!(UnionArray, arrow::array::UnionArray); #[cfg(test)] mod test { diff --git a/src/daft-core/src/array/ops/broadcast.rs b/src/daft-core/src/array/ops/broadcast.rs index 8cf8488a3c..268648b885 100644 --- a/src/daft-core/src/array/ops/broadcast.rs +++ b/src/daft-core/src/array/ops/broadcast.rs @@ -10,7 +10,7 @@ use super::full::FullNull; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{DaftPrimitiveType, Field, NumericNative}, prelude::{ AsArrow, BinaryType, BooleanArray, ExtensionArray, FixedSizeBinaryType, IntervalType, @@ -201,6 +201,13 @@ macro_rules! impl_broadcast_via_concat { impl_broadcast_via_concat!(FixedSizeListArray); impl_broadcast_via_concat!(ListArray); impl_broadcast_via_concat!(ExtensionArray); + +impl Broadcastable for UnionArray { + fn broadcast(&self, _num: usize) -> DaftResult { + todo!("implement broadcast for UnionArray") + } +} + #[cfg(feature = "python")] impl_broadcast_via_concat!(PythonArray); diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index d45e25e07e..7af1992206 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -1,13 +1,28 @@ use std::{ + collections::HashMap, ops::{Div, Mul}, sync::Arc, }; use arrow::{ - array::Array as ArrowArray, + array::{Array as ArrowArray, AsArray}, buffer::{OffsetBuffer, ScalarBuffer}, }; use common_error::{DaftError, DaftResult}; +use daft_schema::geospatial_mode::{ + Crs as DaftCrs, CrsType as DaftCrsType, Dimension as DaftDimension, Edges as DaftEdges, + Metadata as DaftMetadata, +}; +use geoarrow_array::{ + GeoArrowArray, + array::{PointArray as GeoarrowPointArray, from_arrow_array}, +}; +use geoarrow_cast::cast::cast as geoarrow_cast; +use geoarrow_schema::{ + CoordType as GeoarrowCoordType, Dimension as GeoarrowDimension, Edges as GeoarrowEdges, + GeoArrowType, Metadata as GeoarrowMetadata, PointType as GeoarrowPointType, + crs::{Crs as GeoarrowCrs, CrsType as GeoarrowCrsType}, +}; use indexmap::IndexMap; #[cfg(feature = "python")] use pyo3::prelude::*; @@ -18,18 +33,20 @@ use crate::lit::Literal; use crate::prelude::PythonArray; use crate::{ array::{ - DataArray, FixedSizeListArray, ListArray, StructArray, + DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray, growable::make_growable, image_array::ImageArraySidecarData, ops::{DaftCompare, full::FullNull}, }, datatypes::{ - DaftArrayType, DaftArrowBackedType, DataType, Field, FileArray, ImageMode, Int64Array, - NullArray, TimeUnit, UInt64Array, Utf8Array, + DaftArrayType, DaftArrowBackedType, DataType, Field, FileArray, GeospatialMode, ImageMode, + Int64Array, NullArray, TimeUnit, UInt64Array, Utf8Array, logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeSparseTensorArray, FixedShapeTensorArray, ImageArray, MapArray, - SparseTensorArray, TensorArray, TimeArray, TimestampArray, + FixedShapeSparseTensorArray, FixedShapeTensorArray, GeographyArray, GeometryArray, + GeometryCollectionArray, ImageArray, LineStringArray, MapArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, RectArray, + SparseTensorArray, TensorArray, TimeArray, TimestampArray, WkbArray, WktArray, }, }, file::{DaftMediaType, MediaTypeAudio, MediaTypeUnknown, MediaTypeVideo}, @@ -79,6 +96,11 @@ where // Cast from DataArray to the target DataType // by using Arrow's casting mechanisms. + if self.data_type().is_string() && dtype.is_wkt() { + let new_field = Arc::new(Field::new(self.name(), dtype.clone())); + return Series::from_arrow(new_field, self.to_arrow()); + } + if !dtype.is_arrow() || !self.data_type().is_arrow() { return Err(DaftError::TypeError(format!( "Can not cast {:?} to type: {:?}: not convertible to Arrow", @@ -1768,6 +1790,338 @@ impl StructArray { } } +impl UnionArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + unimplemented!("Union casting not implemented for dtype: {}", dtype) + } +} + +fn geoarrow_crs_type_from_crs_type(crs_type: &DaftCrsType) -> GeoarrowCrsType { + match crs_type { + DaftCrsType::Projjson => GeoarrowCrsType::Projjson, + DaftCrsType::Wkt2_2019 => GeoarrowCrsType::Wkt2_2019, + DaftCrsType::AuthorityCode => GeoarrowCrsType::AuthorityCode, + DaftCrsType::Srid => GeoarrowCrsType::Srid, + } +} + +fn geoarrow_crs_from_crs(crs: &DaftCrs) -> GeoarrowCrs { + match (crs.crs.as_ref(), crs.crs_type.as_ref()) { + (Some(value), Some(crs_type)) => { + let geoarrow_crs_type = geoarrow_crs_type_from_crs_type(crs_type); + match geoarrow_crs_type { + GeoarrowCrsType::Projjson => GeoarrowCrs::from_projjson(value.clone()), + GeoarrowCrsType::Wkt2_2019 => GeoarrowCrs::from_wkt2_2019(value.to_string()), + GeoarrowCrsType::AuthorityCode => { + GeoarrowCrs::from_authority_code(value.to_string()) + } + GeoarrowCrsType::Srid => GeoarrowCrs::from_srid(value.to_string()), + } + } + (Some(value), None) => GeoarrowCrs::from_unknown_crs_type(value.to_string()), + (None, _) => GeoarrowCrs::default(), + } +} + +fn geoarrow_edges_from_edges(edges: &DaftEdges) -> GeoarrowEdges { + match edges { + DaftEdges::Andoyer => GeoarrowEdges::Andoyer, + DaftEdges::Karney => GeoarrowEdges::Karney, + DaftEdges::Spherical => GeoarrowEdges::Spherical, + DaftEdges::Thomas => GeoarrowEdges::Thomas, + DaftEdges::Vincenty => GeoarrowEdges::Vincenty, + } +} + +fn geoarrow_metadata_from_metadata(metadata: &DaftMetadata) -> Arc { + let geoarrow_crs = geoarrow_crs_from_crs(&metadata.crs); + let geoarrow_edges = metadata.edges.as_ref().map(geoarrow_edges_from_edges); + Arc::new(GeoarrowMetadata::new(geoarrow_crs, geoarrow_edges)) +} + +fn geoarrow_dimension_from_dimension(dimension: &DaftDimension) -> GeoarrowDimension { + match dimension { + DaftDimension::XY => GeoarrowDimension::XY, + DaftDimension::XYZ => GeoarrowDimension::XYZ, + DaftDimension::XYM => GeoarrowDimension::XYM, + DaftDimension::XYZM => GeoarrowDimension::XYZM, + } +} + +fn geoarrow_wkt_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::WktType { + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::WktType::new(metadata) +} + +fn geoarrow_point_type_from_mode(mode: &GeospatialMode) -> GeoarrowPointType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + GeoarrowPointType::new(dimension, metadata).with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_linestring_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::LineStringType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::LineStringType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_polygon_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::PolygonType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::PolygonType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_multipoint_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::MultiPointType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::MultiPointType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_multilinestring_type_from_mode( + mode: &GeospatialMode, +) -> geoarrow_schema::MultiLineStringType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::MultiLineStringType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_multipolygon_type_from_mode( + mode: &GeospatialMode, +) -> geoarrow_schema::MultiPolygonType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::MultiPolygonType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_geometry_collection_type_from_mode( + mode: &GeospatialMode, +) -> geoarrow_schema::GeometryCollectionType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + + geoarrow_schema::GeometryCollectionType::new(dimension, metadata) + .with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_geometry_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::GeometryType { + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + geoarrow_schema::GeometryType::new(metadata).with_coord_type(GeoarrowCoordType::Separated) +} + +fn geoarrow_rect_type_from_mode(mode: &GeospatialMode) -> geoarrow_schema::RectType { + let dimension = geoarrow_dimension_from_dimension(&mode.dimension); + let metadata = geoarrow_metadata_from_metadata(&mode.metadata); + geoarrow_schema::RectType::new(dimension, metadata) +} + +impl WktArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + if dtype.is_geospatial() { + let metadata = self.field.metadata.as_ref().clone(); + let metadata_hashmap = metadata.into_iter().collect::>(); + let arrow_field = arrow_schema::Field::new( + self.field.name.as_ref(), + arrow_schema::DataType::LargeUtf8, + true, + ) + .with_metadata(metadata_hashmap); + + let arrow_array_ref = self.physical.to_arrow(); + let geoarrow_array = from_arrow_array( + arrow_array_ref.as_string_opt::().unwrap(), + &arrow_field, + ) + .unwrap(); + + let geoarrow_type = match dtype { + DataType::Point(mode) => GeoArrowType::Point(geoarrow_point_type_from_mode(mode)), + DataType::LineString(mode) => { + GeoArrowType::LineString(geoarrow_linestring_type_from_mode(mode)) + } + DataType::Polygon(mode) => { + GeoArrowType::Polygon(geoarrow_polygon_type_from_mode(mode)) + } + DataType::MultiPoint(mode) => { + GeoArrowType::MultiPoint(geoarrow_multipoint_type_from_mode(mode)) + } + DataType::MultiLineString(mode) => { + GeoArrowType::MultiLineString(geoarrow_multilinestring_type_from_mode(mode)) + } + DataType::MultiPolygon(mode) => { + GeoArrowType::MultiPolygon(geoarrow_multipolygon_type_from_mode(mode)) + } + DataType::GeometryCollection(mode) => GeoArrowType::GeometryCollection( + geoarrow_geometry_collection_type_from_mode(mode), + ), + _ => unimplemented!( + "Unsupported cast from {:?} to {:?}", + self.data_type(), + dtype + ), + }; + + let geoarrow_casted = geoarrow_cast(&geoarrow_array, &geoarrow_type); + let casted_array = match geoarrow_casted { + Ok(casted) => casted, + Err(e) => { + panic!("Error casting geoarrow array: {}", e); + } + }; + + let casted_arrow_array = casted_array.to_array_ref(); + let new_field = Arc::new(Field::new(self.name(), dtype.clone())); + return Series::from_arrow(new_field, casted_arrow_array); + } else if dtype.is_string() { + let new_field = Arc::new(Field::new(self.name(), dtype.clone())); + return Series::from_arrow(new_field, self.physical.to_arrow()); + } + unimplemented!("Wkt casting not implemented for dtype: {}", dtype) + } +} + +impl WkbArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + unimplemented!("Wkb casting not implemented for dtype: {}", dtype) + } +} + +impl PointArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + match dtype { + DataType::WKT(mode) => { + let curr_mode = match self.data_type() { + DataType::Point(mode) => mode.clone(), + _ => unreachable!(), + }; + + let geotype = geoarrow_point_type_from_mode(&curr_mode); + + let arrow_array_ref = self.physical.to_arrow()?; + let geoarrow_array = + GeoarrowPointArray::try_from((arrow_array_ref.as_struct(), geotype)).unwrap(); + let geoarrow_type = GeoArrowType::LargeWkt(geoarrow_wkt_type_from_mode(mode)); + let geoarrow_casted = geoarrow_cast(&geoarrow_array, &geoarrow_type); + let casted_array = match geoarrow_casted { + Ok(casted) => casted, + Err(e) => { + panic!("Error casting geoarrow array: {}", e); + } + }; + + let casted_arrow_array = casted_array.to_array_ref(); + let new_field = Arc::new(Field::new(self.name(), dtype.clone())); + Series::from_arrow(new_field, casted_arrow_array) + } + _ => unimplemented!( + "Unsupported cast from {:?} to {:?}", + self.data_type(), + dtype + ), + } + } +} + +macro_rules! impl_geo_to_wkt_cast { + ($array_ty:ty, $daft_variant:ident, $geo_type_fn:ident, $geoarrow_variant:ident) => { + impl $array_ty { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + match dtype { + DataType::WKT(mode) => { + let curr_mode = match self.data_type() { + DataType::$daft_variant(m) => m.clone(), + _ => unreachable!(), + }; + let arrow_field = GeoArrowType::$geoarrow_variant($geo_type_fn(&curr_mode)) + .to_field(self.name(), true); + let arrow_array_ref = self.physical.to_arrow()?; + let geoarrow_array = + from_arrow_array(arrow_array_ref.as_ref(), &arrow_field) + .unwrap_or_else(|e| panic!("Error creating geoarrow array: {e}")); + let casted = geoarrow_cast( + geoarrow_array.as_ref(), + &GeoArrowType::LargeWkt(geoarrow_wkt_type_from_mode(mode)), + ) + .unwrap_or_else(|e| panic!("Error casting geoarrow array: {e}")); + let new_field = Arc::new(Field::new(self.name(), dtype.clone())); + Series::from_arrow(new_field, casted.to_array_ref()) + } + _ => unimplemented!( + "Daft casting from {} to {} not implemented", + self.data_type(), + dtype + ), + } + } + } + }; +} + +impl_geo_to_wkt_cast!( + LineStringArray, + LineString, + geoarrow_linestring_type_from_mode, + LineString +); +impl_geo_to_wkt_cast!( + PolygonArray, + Polygon, + geoarrow_polygon_type_from_mode, + Polygon +); +impl_geo_to_wkt_cast!( + MultiPointArray, + MultiPoint, + geoarrow_multipoint_type_from_mode, + MultiPoint +); +impl_geo_to_wkt_cast!( + MultiLineStringArray, + MultiLineString, + geoarrow_multilinestring_type_from_mode, + MultiLineString +); +impl_geo_to_wkt_cast!( + MultiPolygonArray, + MultiPolygon, + geoarrow_multipolygon_type_from_mode, + MultiPolygon +); +impl_geo_to_wkt_cast!( + GeometryCollectionArray, + GeometryCollection, + geoarrow_geometry_collection_type_from_mode, + GeometryCollection +); +impl_geo_to_wkt_cast!( + GeometryArray, + Geometry, + geoarrow_geometry_type_from_mode, + Geometry +); +impl_geo_to_wkt_cast!(RectArray, Rect, geoarrow_rect_type_from_mode, Rect); + +impl GeographyArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + unimplemented!( + "Daft casting from {} to {} not implemented", + self.data_type(), + dtype + ) + } +} + #[cfg(test)] mod tests { use rand::{Rng, rng}; diff --git a/src/daft-core/src/array/ops/compare_agg.rs b/src/daft-core/src/array/ops/compare_agg.rs index fac39c4355..db6fd203aa 100644 --- a/src/daft-core/src/array/ops/compare_agg.rs +++ b/src/daft-core/src/array/ops/compare_agg.rs @@ -7,7 +7,7 @@ use super::{DaftCompareAggable, GroupIndices, full::FullNull}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, datatypes::*, }; @@ -407,6 +407,7 @@ impl_todo_daft_comparable!(FixedSizeListArray); impl_todo_daft_comparable!(ListArray); impl_todo_daft_comparable!(ExtensionArray); impl_todo_daft_comparable!(IntervalArray); +impl_todo_daft_comparable!(UnionArray); #[cfg(feature = "python")] impl_todo_daft_comparable!(PythonArray); diff --git a/src/daft-core/src/array/ops/comparison.rs b/src/daft-core/src/array/ops/comparison.rs index ce5dc98828..2c9839a32a 100644 --- a/src/daft-core/src/array/ops/comparison.rs +++ b/src/daft-core/src/array/ops/comparison.rs @@ -162,7 +162,6 @@ trait RowCompareFastPath { } arrow::datatypes::DataType::Dictionary(_, _) => true, arrow::datatypes::DataType::Map(_, _) => true, - arrow::datatypes::DataType::Union(_, _) => true, arrow::datatypes::DataType::RunEndEncoded(_, _) => true, _ => false, } diff --git a/src/daft-core/src/array/ops/count.rs b/src/daft-core/src/array/ops/count.rs index 76115925f2..a35de40a15 100644 --- a/src/daft-core/src/array/ops/count.rs +++ b/src/daft-core/src/array/ops/count.rs @@ -7,7 +7,7 @@ use super::{DaftCountAggable, GroupIndices}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, count_mode::CountMode, datatypes::*, }; @@ -122,6 +122,7 @@ macro_rules! impl_daft_count_aggable { impl_daft_count_aggable!(FixedSizeListArray); impl_daft_count_aggable!(ListArray); impl_daft_count_aggable!(StructArray); +impl_daft_count_aggable!(UnionArray); #[cfg(feature = "python")] impl_daft_count_aggable!(PythonArray); diff --git a/src/daft-core/src/array/ops/filter.rs b/src/daft-core/src/array/ops/filter.rs index a47863de36..20059ada91 100644 --- a/src/daft-core/src/array/ops/filter.rs +++ b/src/daft-core/src/array/ops/filter.rs @@ -4,7 +4,7 @@ use super::{as_arrow::AsArrow, from_arrow::FromArrow}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{BooleanArray, DaftArrowBackedType}, }; @@ -74,3 +74,10 @@ impl PythonArray { Ok(growable.build()?.downcast::()?.clone()) } } + +impl UnionArray { + pub fn filter(&self, mask: &BooleanArray) -> DaftResult { + let filtered = arrow::compute::filter(self.to_arrow()?.as_ref(), mask.as_arrow()?)?; + Self::from_arrow(self.field().clone(), filtered) + } +} diff --git a/src/daft-core/src/array/ops/from_arrow.rs b/src/daft-core/src/array/ops/from_arrow.rs index 93c25714cf..77327c8ddf 100644 --- a/src/daft-core/src/array/ops/from_arrow.rs +++ b/src/daft-core/src/array/ops/from_arrow.rs @@ -7,7 +7,7 @@ use arrow::{ use common_error::{DaftError, DaftResult}; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{ DaftDataType, DaftLogicalType, DaftPhysicalType, DataType, Field, FieldRef, logical::LogicalArray, @@ -163,6 +163,68 @@ impl FromArrow for StructArray { } } +impl FromArrow for UnionArray { + fn from_arrow>(field: F, arrow_arr: ArrayRef) -> DaftResult { + let field: FieldRef = field.into(); + + match (&field.dtype, arrow_arr.data_type()) { + ( + DataType::Union(fields, ids, mode), + arrow::datatypes::DataType::Union(arrow_fields, arrow_mode), + ) => { + if fields.len() != arrow_fields.len() { + return Err(DaftError::ValueError(format!( + "Attempting to create Daft UnionArray with {} fields from Arrow array with {} fields: {} vs {:?}", + fields.len(), + arrow_fields.len(), + &field.dtype, + arrow_arr.data_type(), + ))); + } + + for (id, (arrow_id, _)) in ids.iter().zip(arrow_fields.iter()) { + if *id != arrow_id { + return Err(DaftError::ValueError(format!( + "Attempting to create Daft UnionArray with id {} from Arrow array with id {:?}", + id, arrow_id, + ))); + } + } + + let arrow_arr = arrow_arr + .as_any() + .downcast_ref::() + .unwrap(); + + if mode.is_dense() != arrow_arr.is_dense() { + return Err(DaftError::ValueError(format!( + "Attempting to create Daft UnionArray with mode {} from Arrow array with mode {:?}", + mode, arrow_mode, + ))); + } + + let child_series = fields + .iter() + .zip(ids.iter()) + .map(|(field, id)| { + let arrow_arr = arrow_arr.child(*id); + Series::from_arrow(Arc::new(field.clone()), arrow_arr.clone()) + }) + .collect::>>()?; + + let ids = arrow_arr.type_ids().clone(); + let offsets = arrow_arr.offsets().cloned(); + + Ok(Self::new(field, ids, child_series, offsets)) + } + (d, a) => Err(DaftError::TypeError(format!( + "Attempting to create Daft UnionArray with type {} from arrow array with type {:?}", + d, a + ))), + } + } +} + impl FromArrow for MapArray { fn from_arrow>(field: F, arrow_arr: ArrayRef) -> DaftResult { let field: FieldRef = field.into(); @@ -248,7 +310,6 @@ macro_rules! impl_logical_from_arrow { let field: FieldRef = field.into(); let target_convert = field.to_physical(); let target_convert_arrow = target_convert.dtype.to_arrow()?; - let physical_arrow_array = arrow::compute::cast( arrow_arr.as_ref(), &target_convert_arrow, @@ -329,6 +390,40 @@ where } } +impl_logical_from_arrow!(WKTType); +impl_logical_from_arrow!(WKBType); +impl_logical_from_arrow!(PointType); +impl_logical_from_arrow!(LineStringType); +impl_logical_from_arrow!(PolygonType); +impl_logical_from_arrow!(MultiPointType); +impl_logical_from_arrow!(MultiLineStringType); +impl_logical_from_arrow!(MultiPolygonType); + +impl FromArrow for LogicalArray { + fn from_arrow>(field: F, arrow_arr: ArrayRef) -> DaftResult { + let field: FieldRef = field.into(); + let target_convert = field.to_physical(); + // Skip arrow::compute::cast — it doesn't support Union child types. + // ListArray::from_arrow handles the Union child recursively via Series::from_arrow. + let physical = ListArray::from_arrow(Arc::new(target_convert), arrow_arr)?; + Ok(Self::new(field, physical)) + } +} + +impl FromArrow for LogicalArray { + fn from_arrow>(field: F, arrow_arr: ArrayRef) -> DaftResult { + let field: FieldRef = field.into(); + let target_convert = field.to_physical(); + // Skip arrow::compute::cast — it doesn't support Union child types. + // ListArray::from_arrow handles the Union child recursively via Series::from_arrow. + let physical = UnionArray::from_arrow(Arc::new(target_convert), arrow_arr)?; + Ok(Self::new(field, physical)) + } +} + +impl_logical_from_arrow!(GeographyType); +impl_logical_from_arrow!(RectType); + #[cfg(test)] mod tests { use common_error::DaftResult; diff --git a/src/daft-core/src/array/ops/full.rs b/src/daft-core/src/array/ops/full.rs index 08a80cf392..c0f3928344 100644 --- a/src/daft-core/src/array/ops/full.rs +++ b/src/daft-core/src/array/ops/full.rs @@ -7,7 +7,7 @@ use pyo3::Python; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{ DaftDataType, DaftLogicalType, DaftPhysicalType, DataType, Field, logical::LogicalArray, }, @@ -171,6 +171,55 @@ impl FullNull for StructArray { } } +impl FullNull for UnionArray { + fn full_null(name: &str, dtype: &DataType, length: usize) -> Self { + match dtype { + DataType::Union(fields, _, mode) => { + let field = Field::new(name, dtype.clone()); + let empty_children = fields + .iter() + .map(|f| Series::full_null(f.name.as_ref(), &f.dtype, length)) + .collect::>(); + + let offsets = if mode.is_dense() { + let offsets: arrow::buffer::ScalarBuffer = vec![0; length + 1].into(); + Some(offsets) + } else { + None + }; + + let types: arrow::buffer::ScalarBuffer = vec![0; length].into(); + + Self::new(field, types, empty_children, offsets) + } + _ => panic!("Cannot create empty UnionArray with dtype: {}", dtype), + } + } + + fn empty(name: &str, dtype: &DataType) -> Self { + match dtype { + DataType::Union(fields, _, mode) => { + let field = Field::new(name, dtype.clone()); + let empty_children = fields + .iter() + .map(|f| Series::empty(f.name.as_ref(), &f.dtype)) + .collect::>(); + + let offsets = if mode.is_dense() { + Some(arrow::buffer::ScalarBuffer::::default()) + } else { + None + }; + + let types: arrow::buffer::ScalarBuffer = Vec::new().into(); + + Self::new(field, types, empty_children, offsets) + } + _ => panic!("Cannot create empty UnionArray with dtype: {}", dtype), + } + } +} + #[cfg(feature = "python")] impl FullNull for PythonArray { fn full_null(name: &str, dtype: &DataType, length: usize) -> Self { diff --git a/src/daft-core/src/array/ops/get_lit.rs b/src/daft-core/src/array/ops/get_lit.rs index 5fd7fc4f5b..c762f8f852 100644 --- a/src/daft-core/src/array/ops/get_lit.rs +++ b/src/daft-core/src/array/ops/get_lit.rs @@ -29,6 +29,12 @@ impl NullArray { } } +impl UnionArray { + pub fn get_lit(&self, _idx: usize) -> Literal { + todo!("Implement UnionArray::get_lit") + } +} + impl StructArray { pub fn get_lit(&self, idx: usize) -> Literal { assert!( @@ -276,3 +282,75 @@ impl_array_get_lit!(FixedShapeTensorArray, DataType::FixedShapeTensor(_, shape) impl_image_array_get_lit!(ImageArray); impl_image_array_get_lit!(FixedShapeImageArray); + +impl WktArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl WkbArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl PointArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl LineStringArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl PolygonArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl MultiPointArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl MultiLineStringArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl MultiPolygonArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl GeometryCollectionArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl GeometryArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl GeographyArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} + +impl RectArray { + pub fn get_lit(&self, idx: usize) -> Literal { + self.physical.get_lit(idx) + } +} diff --git a/src/daft-core/src/array/ops/hash.rs b/src/daft-core/src/array/ops/hash.rs index 8f0a2415c9..8a864e7e9b 100644 --- a/src/daft-core/src/array/ops/hash.rs +++ b/src/daft-core/src/array/ops/hash.rs @@ -11,7 +11,7 @@ use xxhash_rust::{const_xxh3, xxh3::xxh3_64_with_seed, xxh32::xxh32, xxh64::xxh6 use super::as_arrow::AsArrow; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{ BinaryArray, BooleanArray, DaftPrimitiveType, Decimal128Array, FixedSizeBinaryArray, Int8Array, Int16Array, Int32Array, Int64Array, NullArray, UInt8Array, UInt16Array, @@ -463,6 +463,12 @@ fn hash_list( } } +impl UnionArray { + pub fn hash(&self, _seed: Option<&UInt64Array>) -> DaftResult { + todo!("implement hash for UnionArray") + } +} + #[cfg(test)] mod tests { use std::hash::{BuildHasher, Hasher}; diff --git a/src/daft-core/src/array/ops/if_else.rs b/src/daft-core/src/array/ops/if_else.rs index a0db329b89..791c0f1b43 100644 --- a/src/daft-core/src/array/ops/if_else.rs +++ b/src/daft-core/src/array/ops/if_else.rs @@ -5,7 +5,7 @@ use super::as_arrow::AsArrow; use crate::prelude::PythonArray; use crate::{ array::{ - DataArray, FixedSizeListArray, ListArray, StructArray, + DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray, growable::{Growable, GrowableArray}, ops::full::FullNull, }, @@ -156,6 +156,7 @@ macro_rules! impl_if_else { impl_if_else!(ListArray); impl_if_else!(FixedSizeListArray); impl_if_else!(StructArray); +impl_if_else!(UnionArray); #[cfg(feature = "python")] impl_if_else!(PythonArray); diff --git a/src/daft-core/src/array/ops/len.rs b/src/daft-core/src/array/ops/len.rs index 4e6fe8d620..e285a97d6d 100644 --- a/src/daft-core/src/array/ops/len.rs +++ b/src/daft-core/src/array/ops/len.rs @@ -3,7 +3,7 @@ use arrow::buffer::{NullBuffer, OffsetBuffer}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{DaftArrowBackedType, FileArray}, file::DaftMediaType, }; @@ -115,6 +115,16 @@ impl StructArray { } } +impl UnionArray { + pub fn size_bytes(&self) -> usize { + let children_size_bytes: usize = self.children.iter().map(|s| s.size_bytes()).sum(); + let offset_bytes = + self.offsets().clone().map(|b| b.len()).unwrap_or(0) * std::mem::size_of::(); + let ids_bytes = self.ids().len() * std::mem::size_of::(); + children_size_bytes + offset_bytes + ids_bytes + } +} + impl FileArray where T: DaftMediaType, diff --git a/src/daft-core/src/array/ops/list_agg.rs b/src/daft-core/src/array/ops/list_agg.rs index 0063c6f287..17bda65e26 100644 --- a/src/daft-core/src/array/ops/list_agg.rs +++ b/src/daft-core/src/array/ops/list_agg.rs @@ -5,7 +5,7 @@ use super::{DaftListAggable, GroupIndices}; use crate::prelude::PythonArray; use crate::{ array::{ - DataArray, FixedSizeListArray, ListArray, StructArray, + DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray, growable::{Growable, GrowableArray}, }, datatypes::DaftArrowBackedType, @@ -84,3 +84,7 @@ impl DaftListAggable for StructArray { impl DaftListAggable for PythonArray { impl_daft_list_agg!(); } + +impl DaftListAggable for UnionArray { + impl_daft_list_agg!(); +} diff --git a/src/daft-core/src/array/ops/null.rs b/src/daft-core/src/array/ops/null.rs index f8841e2fba..d72c2f5356 100644 --- a/src/daft-core/src/array/ops/null.rs +++ b/src/daft-core/src/array/ops/null.rs @@ -6,7 +6,7 @@ use super::{DaftIsNull, DaftNotNull}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, datatypes::*, }; @@ -153,10 +153,12 @@ macro_rules! impl_not_null_nested_array { impl_is_null_nested_array!(ListArray); impl_is_null_nested_array!(FixedSizeListArray); impl_is_null_nested_array!(StructArray); +impl_is_null_nested_array!(UnionArray); impl_not_null_nested_array!(ListArray); impl_not_null_nested_array!(FixedSizeListArray); impl_not_null_nested_array!(StructArray); +impl_not_null_nested_array!(UnionArray); impl DataArray where @@ -200,3 +202,10 @@ impl StructArray { } } } + +impl UnionArray { + #[inline] + pub fn is_valid(&self, _idx: usize) -> bool { + true + } +} diff --git a/src/daft-core/src/array/ops/repr.rs b/src/daft-core/src/array/ops/repr.rs index b8c2a55f30..8309f6bd8b 100644 --- a/src/daft-core/src/array/ops/repr.rs +++ b/src/daft-core/src/array/ops/repr.rs @@ -4,15 +4,17 @@ use common_error::DaftResult; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{DataArray, FixedSizeListArray, ListArray, StructArray}, + array::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}, datatypes::{ BinaryArray, BooleanArray, DaftNumericType, DataType, Decimal128Array, ExtensionArray, FileArray, FixedSizeBinaryArray, IntervalArray, IntervalValue, NullArray, UInt64Array, Utf8Array, logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeSparseTensorArray, FixedShapeTensorArray, ImageArray, MapArray, - SparseTensorArray, TensorArray, TimeArray, TimestampArray, + FixedShapeSparseTensorArray, FixedShapeTensorArray, GeographyArray, GeometryArray, + GeometryCollectionArray, ImageArray, LineStringArray, MapArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, RectArray, + SparseTensorArray, TensorArray, TimeArray, TimestampArray, WkbArray, WktArray, }, }, file::DaftMediaType, @@ -473,6 +475,13 @@ impl StructArray { } } } + +impl UnionArray { + pub fn str_value(&self, _idx: usize) -> DaftResult { + todo!("Implement str_value for UnionArray") + } +} + impl FileArray where T: DaftMediaType, @@ -482,6 +491,126 @@ where } } +impl WktArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl WkbArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl PointArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl LineStringArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl PolygonArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl MultiPointArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl MultiLineStringArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl MultiPolygonArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl GeometryCollectionArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl GeometryArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl GeographyArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + +impl RectArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + if self.physical.is_valid(idx) { + Ok("".to_string()) + } else { + Ok("None".to_string()) + } + } +} + // Truncate strings so they do not crash the browser when rendering HTML fn truncate_for_html(s: &str) -> String { // Limit string length to 1MB to prevent browser crashes @@ -548,6 +677,19 @@ impl_array_html_value!(DurationArray); impl_array_html_value!(IntervalArray); impl_array_html_value!(TimestampArray); impl_array_html_value!(EmbeddingArray); +impl_array_html_value!(UnionArray); +impl_array_html_value!(WktArray); +impl_array_html_value!(WkbArray); +impl_array_html_value!(PointArray); +impl_array_html_value!(LineStringArray); +impl_array_html_value!(PolygonArray); +impl_array_html_value!(MultiPointArray); +impl_array_html_value!(MultiLineStringArray); +impl_array_html_value!(MultiPolygonArray); +impl_array_html_value!(GeometryCollectionArray); +impl_array_html_value!(GeometryArray); +impl_array_html_value!(RectArray); +impl_array_html_value!(GeographyArray); #[cfg(feature = "python")] impl PythonArray { diff --git a/src/daft-core/src/array/ops/sort.rs b/src/daft-core/src/array/ops/sort.rs index af85aaa1ed..986644afad 100644 --- a/src/daft-core/src/array/ops/sort.rs +++ b/src/daft-core/src/array/ops/sort.rs @@ -12,7 +12,7 @@ use super::as_arrow::AsArrow; use crate::prelude::PythonArray; use crate::{ array::{ - DataArray, FixedSizeListArray, ListArray, StructArray, + DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray, ops::arrow::sort::primitive::{ common::multi_column_idx_sort, indices::indices_sorted_unstable_by, sort::sort_by, }, @@ -23,8 +23,10 @@ use crate::{ IntervalArray, NullArray, NumericNative, Utf8Array, logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeSparseTensorArray, FixedShapeTensorArray, ImageArray, MapArray, - SparseTensorArray, TensorArray, TimeArray, TimestampArray, + FixedShapeSparseTensorArray, FixedShapeTensorArray, GeographyArray, GeometryArray, + GeometryCollectionArray, ImageArray, LineStringArray, MapArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, RectArray, + SparseTensorArray, TensorArray, TimeArray, TimestampArray, WkbArray, WktArray, }, }, file::DaftMediaType, @@ -671,6 +673,12 @@ impl StructArray { } } +impl UnionArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for UnionArray") + } +} + impl ExtensionArray { pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { todo!("impl sort for ExtensionArray") @@ -768,3 +776,75 @@ where todo!("impl sort for FileArray") } } + +impl WktArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for WKTArray") + } +} + +impl WkbArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for WKBArray") + } +} + +impl PointArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for PointArray") + } +} + +impl LineStringArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for LineStringArray") + } +} + +impl PolygonArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for PolygonArray") + } +} + +impl MultiPointArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for MultiPointArray") + } +} + +impl MultiLineStringArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for MultiLineStringArray") + } +} + +impl MultiPolygonArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for MultiPolygonArray") + } +} + +impl GeometryCollectionArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for GeometryCollectionArray") + } +} + +impl GeometryArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for GeometryArray") + } +} + +impl GeographyArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for GeographyArray") + } +} + +impl RectArray { + pub fn sort(&self, _descending: bool, _nulls_first: bool) -> DaftResult { + todo!("impl sort for RectArray") + } +} diff --git a/src/daft-core/src/array/ops/take.rs b/src/daft-core/src/array/ops/take.rs index e164d1aff4..66bfb554b8 100644 --- a/src/daft-core/src/array/ops/take.rs +++ b/src/daft-core/src/array/ops/take.rs @@ -47,6 +47,18 @@ impl_logicalarray_take!(SparseTensorArray); impl_logicalarray_take!(FixedShapeSparseTensorArray); impl_logicalarray_take!(FixedShapeTensorArray); impl_logicalarray_take!(MapArray); +impl_logicalarray_take!(WktArray); +impl_logicalarray_take!(WkbArray); +impl_logicalarray_take!(PointArray); +impl_logicalarray_take!(LineStringArray); +impl_logicalarray_take!(PolygonArray); +impl_logicalarray_take!(MultiPointArray); +impl_logicalarray_take!(MultiLineStringArray); +impl_logicalarray_take!(MultiPolygonArray); +impl_logicalarray_take!(GeometryCollectionArray); +impl_logicalarray_take!(GeometryArray); +impl_logicalarray_take!(RectArray); +impl_logicalarray_take!(GeographyArray); impl FixedSizeListArray { pub fn take(&self, idx: &UInt64Array) -> DaftResult { @@ -116,6 +128,13 @@ impl ListArray { )) } } + +impl UnionArray { + pub fn take(&self, _idx: &UInt64Array) -> DaftResult { + todo!("implement take for UnionArray") + } +} + impl StructArray { pub fn take(&self, idx: &UInt64Array) -> DaftResult { let nulls = self.nulls().map(|v| { diff --git a/src/daft-core/src/array/prelude.rs b/src/daft-core/src/array/prelude.rs index 8bd806a56f..e8307182cc 100644 --- a/src/daft-core/src/array/prelude.rs +++ b/src/daft-core/src/array/prelude.rs @@ -1,9 +1,11 @@ -pub use super::{DataArray, FixedSizeListArray, ListArray, StructArray}; +pub use super::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}; // Import logical array types pub use crate::datatypes::logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeSparseTensorArray, - FixedShapeTensorArray, ImageArray, LogicalArray, MapArray, SparseTensorArray, TensorArray, - TimeArray, TimestampArray, + FixedShapeTensorArray, GeographyArray, GeometryArray, GeometryCollectionArray, ImageArray, + LineStringArray, LogicalArray, MapArray, MultiLineStringArray, MultiPointArray, + MultiPolygonArray, PointArray, PolygonArray, RectArray, SparseTensorArray, TensorArray, + TimeArray, TimestampArray, WkbArray, WktArray, }; #[cfg(feature = "python")] pub use crate::datatypes::python::PythonArray; diff --git a/src/daft-core/src/array/serdes.rs b/src/daft-core/src/array/serdes.rs index 3e5e12b1c7..beebe34a0c 100644 --- a/src/daft-core/src/array/serdes.rs +++ b/src/daft-core/src/array/serdes.rs @@ -2,14 +2,14 @@ use std::cell::RefCell; use serde::ser::SerializeMap; -use super::{DataArray, FixedSizeListArray, ListArray, StructArray}; +use super::{DataArray, FixedSizeListArray, ListArray, StructArray, UnionArray}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ datatypes::{ BinaryArray, BooleanArray, DaftLogicalType, DaftPrimitiveType, DataType, ExtensionArray, - Field, FixedSizeBinaryArray, Int64Array, IntervalArray, NullArray, Utf8Array, - logical::LogicalArray, + Field, FixedSizeBinaryArray, Int8Array, Int32Array, Int64Array, IntervalArray, NullArray, + Utf8Array, logical::LogicalArray, }, series::{IntoSeries, Series}, }; @@ -154,6 +154,33 @@ impl serde::Serialize for StructArray { } } +impl serde::Serialize for UnionArray { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut s = serializer.serialize_map(Some(2))?; + let mut values = Vec::with_capacity(self.children.len() + 2); + values.extend(self.children.iter().map(Some)); + + let ids = Int8Array::from_slice("ids", &self.ids).into_series(); + values.push(Some(&ids)); + + let offsets = if let Some(offsets) = self.offsets() { + let offsets = Int32Array::from_slice("offsets", offsets).into_series(); + Some(offsets) + } else { + None + }; + + values.push(offsets.as_ref()); + + s.serialize_entry("field", self.field.as_ref())?; + s.serialize_entry("values", &values)?; + s.end() + } +} + impl serde::Serialize for ListArray { fn serialize(&self, serializer: S) -> Result where diff --git a/src/daft-core/src/array/union_array.rs b/src/daft-core/src/array/union_array.rs new file mode 100644 index 0000000000..33f19ff38c --- /dev/null +++ b/src/daft-core/src/array/union_array.rs @@ -0,0 +1,232 @@ +use std::sync::Arc; + +use arrow::{ + array::ArrayRef, + buffer::{NullBuffer, ScalarBuffer}, +}; +use common_error::{DaftError, DaftResult}; + +use crate::{ + array::growable::{Growable, GrowableArray}, + datatypes::{DaftArrayType, DataType, Field}, + series::Series, +}; + +#[derive(Clone, Debug)] +pub struct UnionArray { + pub field: Arc, + pub ids: ScalarBuffer, + pub children: Vec, + pub offsets: Option>, +} + +impl DaftArrayType for UnionArray { + fn data_type(&self) -> &DataType { + &self.field.as_ref().dtype + } +} + +impl UnionArray { + pub fn new>>( + field: F, + ids: ScalarBuffer, + children: Vec, + offsets: Option>, + ) -> Self { + let field: Arc = field.into(); + match &field.as_ref().dtype { + DataType::Union(fields, _, mode) => { + assert!( + fields.len() == children.len(), + "StructArray::new received {} children arrays but expected {} for specified dtype: {}", + children.len(), + fields.len(), + &field.as_ref().dtype + ); + for (dtype_field, series) in fields.iter().zip(children.iter()) { + assert!( + !(&dtype_field.dtype != series.data_type()), + "StructArray::new received an array with dtype: {} but expected child field: {}", + series.data_type(), + dtype_field + ); + assert!( + dtype_field.name.as_ref() == series.name(), + "StructArray::new received a series with name: {} but expected name: {}", + series.name(), + &dtype_field.name + ); + } + + assert!( + offsets.is_some() == mode.is_dense(), + "UnionArray can only have offsets if mode is dense" + ); + + if let Some(offsets) = &offsets { + assert!( + offsets.len() == ids.len(), + "Type Ids and Offsets lengths must match" + ); + } else { + for child in &children { + assert!( + child.len() == ids.len(), + "Sparse union child arrays must be equal in length to the length of the union" + ); + } + } + + Self { + field, + ids, + children, + offsets, + } + } + _ => { + panic!( + "UnionArray::new expected Union datatype, but received field: {}", + field + ) + } + } + } + + pub fn ids(&self) -> &ScalarBuffer { + &self.ids + } + + pub fn concat(arrays: &[&Self]) -> DaftResult { + if arrays.is_empty() { + return Err(DaftError::ValueError( + "Need at least 1 UnionArray to concat".to_string(), + )); + } + + let first_array = arrays.first().unwrap(); + let mut growable = ::make_growable( + first_array.field.name.as_ref(), + &first_array.field.dtype, + arrays.to_vec(), + false, + arrays.iter().map(|a| a.len()).sum(), + ); + + for (i, arr) in arrays.iter().enumerate() { + growable.extend(i, 0, arr.len()); + } + + growable + .build() + .map(|s| s.downcast::().unwrap().clone()) + } + + pub fn nulls(&self) -> Option<&NullBuffer> { + None + } + + pub fn null_count(&self) -> usize { + 0 + } + + pub fn len(&self) -> usize { + self.ids.len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn field(&self) -> &Arc { + &self.field + } + + pub fn offsets(&self) -> &Option> { + &self.offsets + } + + pub fn name(&self) -> &str { + &self.field.name + } + + pub fn data_type(&self) -> &DataType { + &self.field.dtype + } + + pub fn rename(&self, name: &str) -> Self { + Self { + field: Arc::new(Field::new(name, self.data_type().clone())), + ids: self.ids.clone(), + children: self.children.clone(), + offsets: self.offsets.clone(), + } + } + + pub fn slice(&self, start: usize, end: usize) -> DaftResult { + if start > end { + return Err(DaftError::ValueError(format!( + "Trying to slice array with negative length, start: {start} vs end: {end}" + ))); + } + + let (offsets, children) = match self.offsets.as_ref() { + // If dense union, slice offsets + Some(offsets) => { + let sliced_offsets = offsets.slice(start, end - start); + (Some(sliced_offsets), self.children.clone()) + } + // Otherwise need to slice sparse children + None => { + let children = self + .children + .iter() + .map(|x| x.slice(start, end)) + .collect::>>()?; + (None, children) + } + }; + + Ok(Self::new( + self.field.clone(), + self.ids.slice(start, end - start), + children, + offsets, + )) + } + + pub fn to_arrow(&self) -> DaftResult { + let field = self.field().to_arrow()?; + + let arrow::datatypes::DataType::Union(fields, _) = field.data_type() else { + return Err(DaftError::TypeError(format!( + "Expected UnionArray, got {:?}", + field.data_type() + ))); + }; + + let children = self + .children + .iter() + .map(|x| x.to_arrow()) + .collect::>>()?; + + Ok(Arc::new(unsafe { + arrow::array::UnionArray::new_unchecked( + fields.clone(), + self.ids.clone(), + self.offsets.clone(), + children, + ) + }) as _) + } + + pub fn with_nulls(&self, _nulls: Option) -> DaftResult { + Ok(Self::new( + self.field.clone(), + self.ids.clone(), + self.children.clone(), + self.offsets.clone(), + )) + } +} diff --git a/src/daft-core/src/datatypes/logical.rs b/src/daft-core/src/datatypes/logical.rs index 3c91550022..b03ebde7fe 100644 --- a/src/daft-core/src/datatypes/logical.rs +++ b/src/daft-core/src/datatypes/logical.rs @@ -6,10 +6,12 @@ use common_error::DaftResult; use super::{ DaftArrayType, DaftDataType, DataArray, DataType, DurationType, EmbeddingType, FixedShapeImageType, FixedShapeSparseTensorType, FixedShapeTensorType, FixedSizeListArray, - ImageType, MapType, SparseTensorType, TensorType, TimeType, TimestampType, + GeographyType, GeometryCollectionType, GeometryType, ImageType, LineStringType, MapType, + MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, RectType, + SparseTensorType, TensorType, TimeType, TimestampType, WKBType, WKTType, }; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, datatypes::{DaftLogicalType, DateType, Field}, }; @@ -132,6 +134,16 @@ impl LogicalArrayImpl { } } +/// Implementation for a LogicalArray that wraps a StructArray +impl LogicalArrayImpl { + impl_logical_type!(UnionArray); + + pub fn to_arrow(&self) -> DaftResult { + let union_arrow_array = self.physical.to_arrow()?; + Ok(union_arrow_array) + } +} + impl MapArray { impl_logical_type!(ListArray); @@ -179,6 +191,60 @@ impl MapArray { } } +impl LineStringArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + +impl PolygonArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + +impl MultiPointArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + +impl MultiLineStringArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + +impl MultiPolygonArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + +impl GeometryCollectionArray { + impl_logical_type!(ListArray); + + pub fn to_arrow(&self) -> DaftResult { + let list_arrow_array = self.physical.to_arrow()?; + Ok(list_arrow_array) + } +} + pub type LogicalArray = LogicalArrayImpl::PhysicalType as DaftDataType>::ArrayType>; // pub type Decimal128Array = LogicalArray; @@ -194,8 +260,35 @@ pub type SparseTensorArray = LogicalArray; pub type FixedShapeSparseTensorArray = LogicalArray; pub type FixedShapeImageArray = LogicalArray; pub type MapArray = LogicalArray; +pub type WktArray = LogicalArray; +pub type WkbArray = LogicalArray; +pub type PointArray = LogicalArray; +pub type LineStringArray = LogicalArray; +pub type PolygonArray = LogicalArray; +pub type MultiPointArray = LogicalArray; +pub type MultiLineStringArray = LogicalArray; +pub type MultiPolygonArray = LogicalArray; +pub type GeometryCollectionArray = LogicalArray; +pub type GeometryArray = LogicalArray; +pub type RectArray = LogicalArray; +pub type GeographyArray = LogicalArray; pub trait DaftImageryType: DaftLogicalType {} impl DaftImageryType for ImageType {} impl DaftImageryType for FixedShapeImageType {} + +pub trait DaftGeometryType: DaftLogicalType {} + +impl DaftGeometryType for WKTType {} +impl DaftGeometryType for WKBType {} +impl DaftGeometryType for PointType {} +impl DaftGeometryType for LineStringType {} +impl DaftGeometryType for PolygonType {} +impl DaftGeometryType for MultiPointType {} +impl DaftGeometryType for MultiLineStringType {} +impl DaftGeometryType for MultiPolygonType {} +impl DaftGeometryType for GeometryCollectionType {} +impl DaftGeometryType for GeometryType {} +impl DaftGeometryType for RectType {} +impl DaftGeometryType for GeographyType {} diff --git a/src/daft-core/src/datatypes/matching.rs b/src/daft-core/src/datatypes/matching.rs index a4a452da24..f1d59ba396 100644 --- a/src/daft-core/src/datatypes/matching.rs +++ b/src/daft-core/src/datatypes/matching.rs @@ -51,7 +51,19 @@ macro_rules! with_match_daft_types { DataType::File(MediaType::Unknown) => __with_ty__! { UnknownFileType }, DataType::File(MediaType::Video) => __with_ty__! { VideoFileType }, DataType::File(MediaType::Audio) => __with_ty__! { AudioFileType }, - + DataType::Union(..) => __with_ty__! { UnionType }, + DataType::WKT(_) => __with_ty__! { WKTType }, + DataType::WKB(_) => __with_ty__! { WKBType }, + DataType::Point(_) => __with_ty__! { PointType }, + DataType::LineString(_) => __with_ty__! { LineStringType }, + DataType::Polygon(_) => __with_ty__! { PolygonType }, + DataType::MultiPoint(_) => __with_ty__! { MultiPointType }, + DataType::MultiLineString(_) => __with_ty__! { MultiLineStringType }, + DataType::MultiPolygon(_) => __with_ty__! { MultiPolygonType }, + DataType::GeometryCollection(_) => __with_ty__! { GeometryCollectionType }, + DataType::Geometry(_) => __with_ty__! { GeometryType }, + DataType::Rect(_) => __with_ty__! { RectType }, + DataType::Geography => __with_ty__! { GeographyType }, // NOTE: We should not implement a default for match here, because this is meant to be // an exhaustive match across **all** Daft types. @@ -95,6 +107,7 @@ macro_rules! with_match_physical_daft_types { DataType::Interval => __with_ty__! { IntervalType }, #[cfg(feature = "python")] DataType::Python => __with_ty__! { PythonType }, + DataType::Union(_, _, _) => __with_ty__! { UnionType }, _ => panic!("{:?} not implemented for with_match_physical_daft_types", $key_type) } diff --git a/src/daft-core/src/datatypes/mod.rs b/src/daft-core/src/datatypes/mod.rs index 1833268c50..6aa43b4ea4 100644 --- a/src/daft-core/src/datatypes/mod.rs +++ b/src/daft-core/src/datatypes/mod.rs @@ -19,6 +19,7 @@ pub use agg_ops::{ pub use daft_schema::dtype::DataType; pub use daft_schema::{ field::{Field, FieldID, FieldRef}, + geospatial_mode::{CoordType, Crs, CrsType, Dimension, Edges, GeospatialMode, Metadata}, image_format::ImageFormat, image_mode::ImageMode, time_unit::{TimeUnit, format_string_has_offset, infer_timeunit_from_format_string}, @@ -31,7 +32,7 @@ pub use crate::array::{DataArray, FixedSizeListArray, file_array::FileArray}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, file::{DaftMediaType, FileType}, }; @@ -241,12 +242,19 @@ impl_daft_arrow_datatype!(Decimal128Type, Unknown); impl_nested_datatype!(FixedSizeListType, FixedSizeListArray); impl_nested_datatype!(StructType, StructArray); impl_nested_datatype!(ListType, ListArray); +impl_nested_datatype!(UnionType, UnionArray); impl_daft_logical_data_array_datatype!(TimestampType, Unknown, Int64Type); impl_daft_logical_data_array_datatype!(DateType, Date, Int32Type); impl_daft_logical_data_array_datatype!(TimeType, Unknown, Int64Type); impl_daft_logical_data_array_datatype!(DurationType, Unknown, Int64Type); +impl_daft_logical_data_array_datatype!(WKTType, Unknown, Utf8Type); +impl_daft_logical_data_array_datatype!(WKBType, Unknown, BinaryType); +impl_daft_logical_data_array_datatype!(GeometryType, Unknown, UnionType); +impl_daft_logical_data_array_datatype!(RectType, Unknown, StructType); impl_daft_logical_data_array_datatype!(ImageType, Unknown, StructType); +impl_daft_logical_data_array_datatype!(PointType, Unknown, StructType); +impl_daft_logical_data_array_datatype!(GeographyType, Unknown, StructType); impl_daft_logical_data_array_datatype!(TensorType, Unknown, StructType); impl_daft_logical_data_array_datatype!(SparseTensorType, Unknown, StructType); impl_daft_logical_data_array_datatype!(FixedShapeSparseTensorType, Unknown, StructType); @@ -254,6 +262,12 @@ impl_daft_logical_fixed_size_list_datatype!(EmbeddingType, Unknown); impl_daft_logical_fixed_size_list_datatype!(FixedShapeImageType, Unknown); impl_daft_logical_fixed_size_list_datatype!(FixedShapeTensorType, Unknown); impl_daft_logical_list_datatype!(MapType, Unknown); +impl_daft_logical_list_datatype!(LineStringType, Unknown); +impl_daft_logical_list_datatype!(PolygonType, Unknown); +impl_daft_logical_list_datatype!(MultiPointType, Unknown); +impl_daft_logical_list_datatype!(MultiLineStringType, Unknown); +impl_daft_logical_list_datatype!(MultiPolygonType, Unknown); +impl_daft_logical_list_datatype!(GeometryCollectionType, Unknown); impl DaftDataType for FileType where diff --git a/src/daft-core/src/datatypes/prelude.rs b/src/daft-core/src/datatypes/prelude.rs index 4873b8ac53..ea4b03ac7c 100644 --- a/src/daft-core/src/datatypes/prelude.rs +++ b/src/daft-core/src/datatypes/prelude.rs @@ -21,6 +21,8 @@ pub use super::{ }; pub use crate::datatypes::{ DateType, Decimal128Type, DurationType, EmbeddingType, FixedShapeImageType, - FixedShapeSparseTensorType, FixedShapeTensorType, ImageType, IntervalType, MapType, - SparseTensorType, TensorType, TimeType, TimestampType, logical::DaftImageryType, + FixedShapeSparseTensorType, FixedShapeTensorType, GeographyType, GeometryCollectionType, + GeometryType, ImageType, IntervalType, LineStringType, MapType, MultiLineStringType, + MultiPointType, MultiPolygonType, PointType, PolygonType, RectType, SparseTensorType, + TensorType, TimeType, TimestampType, WKBType, WKTType, logical::DaftImageryType, }; diff --git a/src/daft-core/src/series/array_impl/logical_array.rs b/src/daft-core/src/series/array_impl/logical_array.rs index 672119d776..25ff21ae27 100644 --- a/src/daft-core/src/series/array_impl/logical_array.rs +++ b/src/daft-core/src/series/array_impl/logical_array.rs @@ -198,6 +198,18 @@ impl_series_like_for_logical_array!(FixedShapeTensorArray); impl_series_like_for_logical_array!(SparseTensorArray); impl_series_like_for_logical_array!(FixedShapeSparseTensorArray); impl_series_like_for_logical_array!(MapArray); +impl_series_like_for_logical_array!(WktArray); +impl_series_like_for_logical_array!(WkbArray); +impl_series_like_for_logical_array!(PointArray); +impl_series_like_for_logical_array!(LineStringArray); +impl_series_like_for_logical_array!(PolygonArray); +impl_series_like_for_logical_array!(MultiPointArray); +impl_series_like_for_logical_array!(MultiLineStringArray); +impl_series_like_for_logical_array!(MultiPolygonArray); +impl_series_like_for_logical_array!(GeometryCollectionArray); +impl_series_like_for_logical_array!(GeometryArray); +impl_series_like_for_logical_array!(RectArray); +impl_series_like_for_logical_array!(GeographyArray); impl SeriesLike for ArrayWrapper> where diff --git a/src/daft-core/src/series/array_impl/nested_array.rs b/src/daft-core/src/series/array_impl/nested_array.rs index 2b024244a3..74a8731dec 100644 --- a/src/daft-core/src/series/array_impl/nested_array.rs +++ b/src/daft-core/src/series/array_impl/nested_array.rs @@ -6,7 +6,7 @@ use common_error::{DaftError, DaftResult}; use super::ArrayWrapper; use crate::{ array::{ - FixedSizeListArray, ListArray, StructArray, + FixedSizeListArray, ListArray, StructArray, UnionArray, ops::{DaftIsNull, DaftNotNull, DaftSetAggable, GroupIndices, broadcast::Broadcastable}, }, datatypes::{BooleanArray, DataType, Field}, @@ -165,3 +165,4 @@ macro_rules! impl_series_like_for_nested_arrays { impl_series_like_for_nested_arrays!(FixedSizeListArray); impl_series_like_for_nested_arrays!(StructArray); impl_series_like_for_nested_arrays!(ListArray); +impl_series_like_for_nested_arrays!(UnionArray); diff --git a/src/daft-core/src/series/from_lit.rs b/src/daft-core/src/series/from_lit.rs index 55fe6372fd..fad803c9d1 100644 --- a/src/daft-core/src/series/from_lit.rs +++ b/src/daft-core/src/series/from_lit.rs @@ -430,6 +430,72 @@ pub fn series_from_literals_iter image_array_from_img_buffers("literal", iter, image_mode)?.into_series() } + DataType::Union(..) => { + return Err(common_error::DaftError::NotImplemented( + "Union literal to Series is not yet implemented".to_string(), + )); + + } + DataType::WKT(_) => { + return Err(common_error::DaftError::NotImplemented( + "WKT literal to Series is not yet implemented".to_string(), + )); + } + DataType::WKB(_) => { + return Err(common_error::DaftError::NotImplemented( + "WKB literal to Series is not yet implemented".to_string(), + )); + } + DataType::Point(_) => { + return Err(common_error::DaftError::NotImplemented( + "Point literal to Series is not yet implemented".to_string(), + )); + } + DataType::LineString(_) => { + return Err(common_error::DaftError::NotImplemented( + "LineString literal to Series is not yet implemented".to_string(), + )); + } + DataType::Polygon(_) => { + return Err(common_error::DaftError::NotImplemented( + "Polygon literal to Series is not yet implemented".to_string(), + )); + } + DataType::MultiPoint(_) => { + return Err(common_error::DaftError::NotImplemented( + "MultiPoint literal to Series is not yet implemented".to_string(), + )); + } + DataType::MultiLineString(_) => { + return Err(common_error::DaftError::NotImplemented( + "MultiLineString literal to Series is not yet implemented".to_string(), + )); + } + DataType::MultiPolygon(_) => { + return Err(common_error::DaftError::NotImplemented( + "MultiPolygon literal to Series is not yet implemented".to_string(), + )); + } + DataType::GeometryCollection(_) => { + return Err(common_error::DaftError::NotImplemented( + "GeometryCollection literal to Series is not yet implemented".to_string(), + )); + } + DataType::Geometry(_) => { + return Err(common_error::DaftError::NotImplemented( + "Geometry literal to Series is not yet implemented".to_string(), + )); + } + DataType::Rect(_) => { + return Err(common_error::DaftError::NotImplemented( + "Rect literal to Series is not yet implemented".to_string(), + )); + } + DataType::Geography => { + return Err(common_error::DaftError::NotImplemented( + "Geography literal to Series is not yet implemented".to_string(), + )); + } DataType::FixedSizeBinary(..) | DataType::FixedSizeList(..) | DataType::Extension(..) diff --git a/src/daft-core/src/series/ops/downcast.rs b/src/daft-core/src/series/ops/downcast.rs index f9c4b51302..81c17768bc 100644 --- a/src/daft-core/src/series/ops/downcast.rs +++ b/src/daft-core/src/series/ops/downcast.rs @@ -1,14 +1,16 @@ use common_error::{DaftError, DaftResult}; use logical::{ - EmbeddingArray, FixedShapeSparseTensorArray, FixedShapeTensorArray, SparseTensorArray, - TensorArray, + EmbeddingArray, FixedShapeSparseTensorArray, FixedShapeTensorArray, GeographyArray, + GeometryArray, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, + MultiPolygonArray, PointArray, PolygonArray, RectArray, SparseTensorArray, TensorArray, + WkbArray, WktArray, }; use self::logical::{DurationArray, ImageArray, MapArray}; #[cfg(feature = "python")] use crate::prelude::PythonArray; use crate::{ - array::{ListArray, StructArray}, + array::{ListArray, StructArray, UnionArray}, datatypes::{ logical::{DateArray, FixedShapeImageArray, TimeArray, TimestampArray}, *, @@ -172,4 +174,56 @@ impl Series { pub fn file(&self) -> DaftResult<&FileArray> { self.downcast() } + + pub fn union(&self) -> DaftResult<&UnionArray> { + self.downcast() + } + + pub fn wkt(&self) -> DaftResult<&WktArray> { + self.downcast() + } + + pub fn wkb(&self) -> DaftResult<&WkbArray> { + self.downcast() + } + + pub fn point(&self) -> DaftResult<&PointArray> { + self.downcast() + } + + pub fn multi_point(&self) -> DaftResult<&MultiPointArray> { + self.downcast() + } + + pub fn line_string(&self) -> DaftResult<&LineStringArray> { + self.downcast() + } + + pub fn multi_line_string(&self) -> DaftResult<&MultiLineStringArray> { + self.downcast() + } + + pub fn polygon(&self) -> DaftResult<&PolygonArray> { + self.downcast() + } + + pub fn multi_polygon(&self) -> DaftResult<&MultiPolygonArray> { + self.downcast() + } + + pub fn geometry_collection(&self) -> DaftResult<&GeometryCollectionArray> { + self.downcast() + } + + pub fn geometry(&self) -> DaftResult<&GeometryArray> { + self.downcast() + } + + pub fn geography(&self) -> DaftResult<&GeographyArray> { + self.downcast() + } + + pub fn rect(&self) -> DaftResult<&RectArray> { + self.downcast() + } } diff --git a/src/daft-core/src/series/serdes.rs b/src/daft-core/src/series/serdes.rs index be4c0e5bd6..fc96c2590f 100644 --- a/src/daft-core/src/series/serdes.rs +++ b/src/daft-core/src/series/serdes.rs @@ -7,12 +7,14 @@ use arrow::{ use serde::{Deserializer, de::Visitor}; use crate::{ - array::{ListArray, StructArray, ops::full::FullNull}, + array::{ListArray, StructArray, UnionArray, ops::full::FullNull}, datatypes::{ logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeSparseTensorArray, FixedShapeTensorArray, ImageArray, MapArray, - SparseTensorArray, TensorArray, TimeArray, TimestampArray, + FixedShapeSparseTensorArray, FixedShapeTensorArray, GeographyArray, GeometryArray, + GeometryCollectionArray, ImageArray, LineStringArray, MapArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, RectArray, + SparseTensorArray, TensorArray, TimeArray, TimestampArray, WkbArray, WktArray, }, *, }, @@ -180,6 +182,34 @@ impl<'d> serde::Deserialize<'d> for Series { let nulls = nulls.map(|v| v.bool().unwrap().to_bitmap().into()); Ok(StructArray::new(Arc::new(field), children, nulls).into_series()) } + DataType::Union(..) => { + let mut all_series = map.next_value::>>()?; + let offsets_series = all_series + .pop() + .ok_or_else(|| serde::de::Error::missing_field("offsets"))?; + + let offsets = if let Some(offsets) = offsets_series { + let offsets_array = offsets.i32().unwrap(); + Some(offsets_array.values()) + } else { + None + }; + + let ids = all_series + .pop() + .ok_or_else(|| serde::de::Error::missing_field("ids"))? + .unwrap(); + + let ids_array = ids.i8().unwrap(); + + let ids = ids_array.values(); + + let children = all_series + .into_iter() + .map(|s| s.unwrap()) + .collect::>(); + Ok(UnionArray::new(Arc::new(field), ids, children, offsets).into_series()) + } DataType::List(..) => { let mut all_series = map.next_value::>>()?; let nulls = all_series @@ -337,6 +367,113 @@ impl<'d> serde::Deserialize<'d> for Series { DataType::File(_) => { panic!("Unable to deserialize File DataType"); } + DataType::WKT(..) => { + type PType = + <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + WktArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } + DataType::WKB(..) => { + type PType = + <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + WkbArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } + DataType::Point(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + PointArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } + DataType::LineString(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(LineStringArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::Polygon(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + PolygonArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } + DataType::MultiPoint(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(MultiPointArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::MultiLineString(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(MultiLineStringArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::MultiPolygon(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(MultiPolygonArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::GeometryCollection(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(GeometryCollectionArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::Geometry(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + GeometryArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series(), + ) + } + DataType::Geography => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok(GeographyArray::new( + field, + physical.downcast::().unwrap().clone(), + ) + .into_series()) + } + DataType::Rect(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + RectArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } } } } diff --git a/src/daft-recordbatch/src/repr_html.rs b/src/daft-recordbatch/src/repr_html.rs index 165ab2f967..32ba619f11 100644 --- a/src/daft-recordbatch/src/repr_html.rs +++ b/src/daft-recordbatch/src/repr_html.rs @@ -105,6 +105,10 @@ pub fn html_value(s: &Series, idx: usize, truncate: bool) -> String { let arr = s.struct_().unwrap(); arr.html_value(idx, truncate) } + DataType::Union(_, _, _) => { + let arr = s.union().unwrap(); + arr.html_value(idx, truncate) + } DataType::Map { .. } => { let arr = s.map().unwrap(); arr.html_value(idx, truncate) @@ -151,5 +155,53 @@ pub fn html_value(s: &Series, idx: usize, truncate: bool) -> String { DataType::Unknown => { panic!("Unknown data type") } + DataType::WKT(_) => { + let arr = s.wkt().unwrap(); + arr.html_value(idx, truncate) + } + DataType::WKB(_) => { + let arr = s.wkb().unwrap(); + arr.html_value(idx, truncate) + } + DataType::Point(_) => { + let arr = s.point().unwrap(); + arr.html_value(idx, truncate) + } + DataType::LineString(_) => { + let arr = s.line_string().unwrap(); + arr.html_value(idx, truncate) + } + DataType::Polygon(_) => { + let arr = s.polygon().unwrap(); + arr.html_value(idx, truncate) + } + DataType::MultiPoint(_) => { + let arr = s.multi_point().unwrap(); + arr.html_value(idx, truncate) + } + DataType::MultiLineString(_) => { + let arr = s.multi_line_string().unwrap(); + arr.html_value(idx, truncate) + } + DataType::MultiPolygon(_) => { + let arr = s.multi_polygon().unwrap(); + arr.html_value(idx, truncate) + } + DataType::GeometryCollection(_) => { + let arr = s.geometry_collection().unwrap(); + arr.html_value(idx, truncate) + } + DataType::Geometry(_) => { + let arr = s.geometry().unwrap(); + arr.html_value(idx, truncate) + } + DataType::Geography => { + let arr = s.geography().unwrap(); + arr.html_value(idx, truncate) + } + DataType::Rect(_) => { + let arr = s.rect().unwrap(); + arr.html_value(idx, truncate) + } } } diff --git a/src/daft-schema/src/dtype.rs b/src/daft-schema/src/dtype.rs index 83e54331ca..be97900397 100644 --- a/src/daft-schema/src/dtype.rs +++ b/src/daft-schema/src/dtype.rs @@ -7,7 +7,14 @@ use arrow_schema::IntervalUnit; use common_error::{DaftError, DaftResult}; use serde::{Deserialize, Serialize}; -use crate::{field::Field, image_mode::ImageMode, media_type::MediaType, time_unit::TimeUnit}; +use crate::{ + field::Field, + geospatial_mode::{Dimension, GeospatialMode}, + image_mode::ImageMode, + media_type::MediaType, + time_unit::TimeUnit, + union_mode::UnionMode, +}; pub type DaftDataType = DataType; #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)] @@ -138,6 +145,42 @@ pub enum DataType { Unknown, File(MediaType), + + Union(Vec, Vec, UnionMode), + + // A logical type for geoemetries in WKT format + WKT(GeospatialMode), + + // A logical type for geoemetries in WKB format + WKB(GeospatialMode), + + // A logical type for point geometries + Point(GeospatialMode), + + // A logical type for a list of point geometries + MultiPoint(GeospatialMode), + + // A logical type for a linestring + LineString(GeospatialMode), + + // A logical type for a collection of linestrings + MultiLineString(GeospatialMode), + + // A logical type for a polygon + Polygon(GeospatialMode), + + // A logical type for a collection of polygons + MultiPolygon(GeospatialMode), + // A logical type for a geometry collection + GeometryCollection(GeospatialMode), + + // A logical type for a collection of geometries + Geometry(GeospatialMode), + + Rect(GeospatialMode), + + // A logical type for a collection of geometries + Geography, } impl Display for DataType { @@ -213,6 +256,31 @@ impl Display for DataType { Self::Python => write!(f, "Python"), Self::Unknown => write!(f, "Unknown"), Self::File(format) => write!(f, "File[{format}]"), + Self::Union(fields, ids, mode) => { + let mut contents = String::default(); + for (index, field) in fields.iter().enumerate() { + if index != 0 { + write!(&mut contents, ", ")?; + } + if !(field.name.is_empty() && field.dtype.is_null()) { + write!(&mut contents, "{}: {}", field.name, field.dtype)?; + } + } + + write!(f, "Union[{}; ids: {:?}; mode: {}]", contents, ids, mode) + } + Self::WKT(mode) => write!(f, "WKT[{mode}]"), + Self::WKB(mode) => write!(f, "WKB[{mode}]"), + Self::Point(mode) => write!(f, "Point[{mode}]"), + Self::LineString(mode) => write!(f, "LineString[{mode}]"), + Self::Polygon(mode) => write!(f, "Polygon[{mode}]"), + Self::MultiPoint(mode) => write!(f, "MultiPoint[{mode}]"), + Self::MultiLineString(mode) => write!(f, "MultiLineString[{mode}]"), + Self::MultiPolygon(mode) => write!(f, "MultiPolygon[{mode}]"), + Self::GeometryCollection(mode) => write!(f, "GeometryCollection[{mode}]"), + Self::Geometry(_) => write!(f, "Geometry"), + Self::Rect(mode) => write!(f, "Rect[{mode}]"), + Self::Geography => write!(f, "Geography"), } } } @@ -235,6 +303,58 @@ impl DataTypePayload { } pub const DAFT_SUPER_EXTENSION_NAME: &str = "daft.super_extension"; +fn generate_point_coordinate(dim: &Dimension) -> DataType { + use DataType::*; + match dim { + Dimension::XY => Struct(vec![Field::new("x", Float64), Field::new("y", Float64)]), + Dimension::XYZ => Struct(vec![ + Field::new("x", Float64), + Field::new("y", Float64), + Field::new("z", Float64), + ]), + Dimension::XYM => Struct(vec![ + Field::new("x", Float64), + Field::new("y", Float64), + Field::new("m", Float64), + ]), + Dimension::XYZM => Struct(vec![ + Field::new("x", Float64), + Field::new("y", Float64), + Field::new("z", Float64), + Field::new("m", Float64), + ]), + } +} + +fn generate_geometry_collection(dim: &Dimension) -> DataType { + use DataType::*; + let (type_ids, suffix): (Vec, &str) = match dim { + Dimension::XY => (vec![1, 2, 3, 4, 5, 6], ""), + Dimension::XYZ => (vec![11, 12, 13, 14, 15, 16], " Z"), + Dimension::XYM => (vec![21, 22, 23, 24, 25, 26], " M"), + Dimension::XYZM => (vec![31, 32, 33, 34, 35, 36], " ZM"), + }; + let pt = generate_point_coordinate(dim); + let fields = vec![ + Field::new(format!("Point{suffix}"), pt.clone()), + Field::new(format!("LineString{suffix}"), List(Box::new(pt.clone()))), + Field::new( + format!("Polygon{suffix}"), + List(Box::new(List(Box::new(pt.clone())))), + ), + Field::new(format!("MultiPoint{suffix}"), List(Box::new(pt.clone()))), + Field::new( + format!("MultiLineString{suffix}"), + List(Box::new(List(Box::new(pt.clone())))), + ), + Field::new( + format!("MultiPolygon{suffix}"), + List(Box::new(List(Box::new(List(Box::new(pt)))))), + ), + ]; + Union(fields, type_ids, UnionMode::Dense) +} + impl DataType { pub fn new_null() -> Self { Self::Null @@ -308,7 +428,16 @@ impl DataType { } Self::Date => arrow_schema::DataType::Date32, Self::Time(time_unit) => arrow_schema::DataType::Time64(time_unit.to_arrow()), - + Self::Union(fields, ids, mode) => arrow_schema::DataType::Union( + arrow_schema::UnionFields::try_new( + ids.clone(), + fields + .iter() + .map(|f| f.to_arrow()) + .collect::>>()?, + )?, + mode.to_arrow(), + ), _ => { return Err(DaftError::TypeError(format!( "Can not convert {self:?} into arrow type" @@ -383,6 +512,112 @@ impl DataType { Field::new("url", Utf8), Field::new("io_config", Binary), ]), + WKT(_) => Utf8, + WKB(_) => Binary, + Point(mode) => generate_point_coordinate(&mode.dimension), + MultiPoint(mode) => { + let point = generate_point_coordinate(&mode.dimension); + List(Box::new(point)) + } + LineString(mode) => { + let point = generate_point_coordinate(&mode.dimension); + List(Box::new(point)) + } + MultiLineString(mode) => { + let point = generate_point_coordinate(&mode.dimension); + List(Box::new(List(Box::new(point)))) + } + Polygon(mode) => { + let point = generate_point_coordinate(&mode.dimension); + List(Box::new(List(Box::new(point)))) + } + MultiPolygon(mode) => { + let point = generate_point_coordinate(&mode.dimension); + List(Box::new(List(Box::new(List(Box::new(point)))))) + } + GeometryCollection(mode) => { + let collection = generate_geometry_collection(&mode.dimension); + List(Box::new(collection)) + } + Geometry(_) => { + let type_ids = vec![ + 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, + 31, 32, 33, 34, 35, 36, 37, + ]; + let mut fields = vec![]; + for (dim, suffix) in [ + (Dimension::XY, ""), + (Dimension::XYZ, " Z"), + (Dimension::XYM, " M"), + (Dimension::XYZM, " ZM"), + ] { + let pt = generate_point_coordinate(&dim); + fields.push(Field::new(format!("Point{suffix}"), pt.clone())); + fields.push(Field::new( + format!("LineString{suffix}"), + List(Box::new(pt.clone())), + )); + fields.push(Field::new( + format!("Polygon{suffix}"), + List(Box::new(List(Box::new(pt.clone())))), + )); + fields.push(Field::new( + format!("MultiPoint{suffix}"), + List(Box::new(pt.clone())), + )); + fields.push(Field::new( + format!("MultiLineString{suffix}"), + List(Box::new(List(Box::new(pt.clone())))), + )); + fields.push(Field::new( + format!("MultiPolygon{suffix}"), + List(Box::new(List(Box::new(List(Box::new(pt)))))), + )); + fields.push(Field::new( + format!("GeometryCollection{suffix}"), + generate_geometry_collection(&dim), + )); + } + Union(fields, type_ids, UnionMode::Dense) + } + Rect(mode) => match mode.dimension { + Dimension::XY => Struct(vec![ + Field::new("xmin", List(Box::new(Self::Float64))), + Field::new("ymin", List(Box::new(Self::Float64))), + Field::new("xmax", List(Box::new(Self::Float64))), + Field::new("ymax", List(Box::new(Self::Float64))), + ]), + Dimension::XYZ => Struct(vec![ + Field::new("xmin", List(Box::new(Self::Float64))), + Field::new("ymin", List(Box::new(Self::Float64))), + Field::new("zmin", List(Box::new(Self::Float64))), + Field::new("xmax", List(Box::new(Self::Float64))), + Field::new("ymax", List(Box::new(Self::Float64))), + Field::new("zmax", List(Box::new(Self::Float64))), + ]), + Dimension::XYM => Struct(vec![ + Field::new("xmin", List(Box::new(Self::Float64))), + Field::new("ymin", List(Box::new(Self::Float64))), + Field::new("mmin", List(Box::new(Self::Float64))), + Field::new("xmax", List(Box::new(Self::Float64))), + Field::new("ymax", List(Box::new(Self::Float64))), + Field::new("mmax", List(Box::new(Self::Float64))), + ]), + Dimension::XYZM => Struct(vec![ + Field::new("xmin", List(Box::new(Self::Float64))), + Field::new("ymin", List(Box::new(Self::Float64))), + Field::new("zmin", List(Box::new(Self::Float64))), + Field::new("mmin", List(Box::new(Self::Float64))), + Field::new("xmax", List(Box::new(Self::Float64))), + Field::new("ymax", List(Box::new(Self::Float64))), + Field::new("zmax", List(Box::new(Self::Float64))), + Field::new("mmax", List(Box::new(Self::Float64))), + ]), + }, + Geography => Struct(vec![ + Field::new("x", List(Box::new(Self::Float64))), + Field::new("y", List(Box::new(Self::Float64))), + ]), _ => { assert!(self.is_physical()); self.clone() @@ -697,6 +932,58 @@ impl DataType { } } + #[inline] + pub fn is_geospatial(&self) -> bool { + match self { + Self::Point(_) + | Self::LineString(_) + | Self::Polygon(_) + | Self::MultiPoint(_) + | Self::MultiLineString(_) + | Self::MultiPolygon(_) + | Self::GeometryCollection(_) + | Self::Geometry(_) => true, + Self::Extension(_, inner, _) => inner.is_geospatial(), + _ => false, + } + } + + #[inline] + pub fn is_geography(&self) -> bool { + match self { + Self::Geography => true, + Self::Extension(_, inner, _) => inner.is_geography(), + _ => false, + } + } + + #[inline] + pub fn is_wkt(&self) -> bool { + match self { + Self::WKT(_) => true, + Self::Extension(_, inner, _) => inner.is_wkt(), + _ => false, + } + } + + #[inline] + pub fn is_wkb(&self) -> bool { + match self { + Self::WKB(_) => true, + Self::Extension(_, inner, _) => inner.is_wkb(), + _ => false, + } + } + + #[inline] + pub fn is_rect(&self) -> bool { + match self { + Self::Rect(_) => true, + Self::Extension(_, inner, _) => inner.is_rect(), + _ => false, + } + } + #[inline] pub fn to_floating_representation(&self) -> DaftResult { let data_type = match self { @@ -799,6 +1086,18 @@ impl DataType { | Self::FixedShapeSparseTensor(..) | Self::Map { .. } | Self::File(..) + | Self::WKT(..) + | Self::WKB(..) + | Self::Point(..) + | Self::LineString(..) + | Self::MultiPoint(..) + | Self::MultiLineString(..) + | Self::Polygon(..) + | Self::MultiPolygon(..) + | Self::GeometryCollection(..) + | Self::Geometry(..) + | Self::Geography + | Self::Rect(..) ) } diff --git a/src/daft-schema/src/field.rs b/src/daft-schema/src/field.rs index 0c57c5caab..4b8fa95ba4 100644 --- a/src/daft-schema/src/field.rs +++ b/src/daft-schema/src/field.rs @@ -118,7 +118,19 @@ impl Field { | dtype @ DataType::FixedShapeTensor(..) | dtype @ DataType::SparseTensor(..) | dtype @ DataType::FixedShapeSparseTensor(..) - | dtype @ DataType::File(..) => { + | dtype @ DataType::File(..) + | dtype @ DataType::WKT(..) + | dtype @ DataType::WKB(..) + | dtype @ DataType::Point(..) + | dtype @ DataType::LineString(..) + | dtype @ DataType::Polygon(..) + | dtype @ DataType::MultiPoint(..) + | dtype @ DataType::MultiLineString(..) + | dtype @ DataType::MultiPolygon(..) + | dtype @ DataType::GeometryCollection(..) + | dtype @ DataType::Geometry(..) + | dtype @ DataType::Geography + | dtype @ DataType::Rect(..) => { let physical = Box::new(self.to_physical()); let mut metadata_map = HashMap::new(); diff --git a/src/daft-schema/src/geospatial_mode.rs b/src/daft-schema/src/geospatial_mode.rs new file mode 100644 index 0000000000..4bd28cc604 --- /dev/null +++ b/src/daft-schema/src/geospatial_mode.rs @@ -0,0 +1,205 @@ +use common_error::{DaftError, DaftResult}; +use common_py_serde::impl_bincode_py_state_serialization; +use derive_more::Display; +#[cfg(feature = "python")] +use pyo3::{exceptions::PyValueError, prelude::*}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +#[derive(Debug, Default, Clone, Display, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum Dimension { + #[default] + XY, + XYZ, + XYM, + XYZM, +} + +impl Dimension { + pub fn num_coords(&self) -> usize { + match self { + Self::XY => 2, + Self::XYZ => 3, + Self::XYM => 3, + Self::XYZM => 4, + } + } + + pub fn from_user_dimension(dimension: &str) -> DaftResult { + match dimension { + "xy" => Ok(Self::XY), + "xyz" => Ok(Self::XYZ), + "xym" => Ok(Self::XYM), + "xyzm" => Ok(Self::XYZM), + _ => Err(DaftError::TypeError(format!( + "unsupported dimension: {dimension}, daft only supports xy, xyz, xym, xyzm" + ))), + } + } +} + +#[derive(Debug, Default, Display, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum CoordType { + #[default] + Interleaved, + Separated, +} + +impl CoordType { + pub fn from_user_coord_type(coord_type: &str) -> DaftResult { + match coord_type { + "interleaved" => Ok(Self::Interleaved), + "separated" => Ok(Self::Separated), + _ => Err(DaftError::TypeError(format!( + "unsupported coord type: {coord_type}, daft only supports interleaved and separated" + ))), + } + } +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct Crs { + pub crs: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub crs_type: Option, +} + +impl Crs { + pub fn new(crs: Option, crs_type: Option) -> Self { + Self { crs, crs_type } + } +} + +impl std::fmt::Display for Crs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}, {:?}", self.crs, self.crs_type) + } +} + +#[derive(Debug, Clone, Display, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum CrsType { + #[serde(rename = "projjson")] + Projjson, + #[serde(rename = "wkt2:2019")] + Wkt2_2019, + #[serde(rename = "authority_code")] + AuthorityCode, + #[serde(rename = "srid")] + Srid, +} + +impl CrsType { + pub fn from_user_crs_type(crs_type: &str) -> DaftResult { + match crs_type { + "projjson" => Ok(Self::Projjson), + "wkt2:2019" => Ok(Self::Wkt2_2019), + "authority_code" => Ok(Self::AuthorityCode), + "srid" => Ok(Self::Srid), + _ => Err(DaftError::TypeError(format!( + "unsupported crs type: {crs_type}, daft only supports projjson, wkt2:2019, authority_code and srid" + ))), + } + } +} + +#[derive(Debug, Clone, Display, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum Edges { + #[serde(rename = "andoyer")] + Andoyer, + #[serde(rename = "karney")] + Karney, + #[serde(rename = "spherical")] + Spherical, + #[serde(rename = "thomas")] + Thomas, + #[serde(rename = "vincenty")] + Vincenty, +} + +impl Edges { + pub fn from_user_edges(edges: &str) -> DaftResult { + match edges { + "andoyer" => Ok(Self::Andoyer), + "karney" => Ok(Self::Karney), + "spherical" => Ok(Self::Spherical), + "thomas" => Ok(Self::Thomas), + "vincenty" => Ok(Self::Vincenty), + _ => Err(DaftError::TypeError(format!( + "unsupported edges: {edges}, daft only supports andoyer, karney, spherical, thomas and vincenty" + ))), + } + } +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct Metadata { + #[serde(flatten)] + pub crs: Crs, + #[serde(skip_serializing_if = "Option::is_none")] + pub edges: Option, +} + +impl Metadata { + pub fn new(crs: Crs, edges: Option) -> Self { + Self { crs, edges } + } +} + +impl std::fmt::Display for Metadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}, {:?}", self.crs, self.edges) + } +} + +#[derive(Clone, Default, Debug, PartialEq, Eq, Serialize, Deserialize, Hash)] +#[cfg_attr( + feature = "python", + pyclass(name = "GeospatialMode", module = "daft.daft", eq, from_py_object) +)] +pub struct GeospatialMode { + pub metadata: Metadata, + pub dimension: Dimension, + pub coord_type: CoordType, +} + +impl std::fmt::Display for GeospatialMode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}, {}, {}", + self.dimension, self.coord_type, self.metadata + ) + } +} + +#[cfg(feature = "python")] +#[pymethods] +impl GeospatialMode { + #[staticmethod] + pub fn from_user_defined_mode(dimension: &str, coord_type: &str) -> PyResult { + Self::from_user_spec(dimension, coord_type) + .map_err(|e| PyValueError::new_err(e.to_string())) + } +} + +impl GeospatialMode { + pub fn new(metadata: Metadata, dimension: Dimension, coord_type: CoordType) -> Self { + Self { + metadata, + dimension, + coord_type, + } + } + + pub fn get_dims(&self) -> usize { + self.dimension.num_coords() + } + + pub fn from_user_spec(dimension: &str, coord_type: &str) -> DaftResult { + let dimension = Dimension::from_user_dimension(dimension)?; + let coord_type = CoordType::from_user_coord_type(coord_type)?; + Ok(Self::new(Metadata::default(), dimension, coord_type)) + } +} + +impl_bincode_py_state_serialization!(GeospatialMode); diff --git a/src/daft-schema/src/lib.rs b/src/daft-schema/src/lib.rs index 21f8f61c6e..359562d55c 100644 --- a/src/daft-schema/src/lib.rs +++ b/src/daft-schema/src/lib.rs @@ -1,9 +1,11 @@ pub mod dtype; pub mod field; +pub mod geospatial_mode; pub mod image_format; pub mod image_mode; pub mod image_property; pub mod prelude; +pub mod union_mode; pub mod media_type; #[cfg(feature = "python")] diff --git a/src/daft-schema/src/prelude.rs b/src/daft-schema/src/prelude.rs index 07a9063053..22977571e3 100644 --- a/src/daft-schema/src/prelude.rs +++ b/src/daft-schema/src/prelude.rs @@ -1,9 +1,11 @@ pub use crate::{ dtype::DataType, field::{Field, FieldID, FieldRef}, + geospatial_mode::{CoordType, Crs, CrsType, Dimension, Edges, GeospatialMode, Metadata}, image_format::ImageFormat, image_mode::ImageMode, image_property::ImageProperty, schema::{Schema, SchemaRef}, time_unit::{TimeUnit, infer_timeunit_from_format_string}, + union_mode::UnionMode, }; diff --git a/src/daft-schema/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs index 64508440ca..ab03182fa2 100644 --- a/src/daft-schema/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -14,8 +14,8 @@ use serde::{Deserialize, Serialize}; use super::field::PyField; use crate::{ - dtype::DataType, field::Field, image_mode::ImageMode, media_type::MediaType, - time_unit::TimeUnit, + dtype::DataType, field::Field, geospatial_mode::GeospatialMode, image_mode::ImageMode, + media_type::MediaType, time_unit::TimeUnit, }; #[pyclass(from_py_object)] @@ -345,6 +345,132 @@ impl PyDataType { Ok(DataType::File(ff).into()) } + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn wkt(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::WKT(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::WKT(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn wkb(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::WKB(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::WKB(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn point(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::Point(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::Point(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn linestring(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::LineString(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::LineString(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn polygon(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::Polygon(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::Polygon(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn multipoint(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::MultiPoint(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::MultiPoint(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn multilinestring(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::MultiLineString(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::MultiLineString(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn multipolygon(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::MultiPolygon(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::MultiPolygon(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn geometry_collection(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::GeometryCollection(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::GeometryCollection(mode).into()) + } + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn geometry(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::Geometry(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::Geometry(mode).into()) + } + } + + #[staticmethod] + pub fn geography() -> PyResult { + Ok(DataType::Geography.into()) + } + + #[staticmethod] + #[pyo3(signature = (mode=None))] + pub fn rect(mode: Option) -> PyResult { + if let Some(mode) = mode { + Ok(DataType::Rect(mode).into()) + } else { + let mode = GeospatialMode::default(); + Ok(DataType::Rect(mode).into()) + } + } + pub fn to_arrow<'py>(&self, py: Python<'py>) -> PyResult> { let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; match &self.dtype { diff --git a/src/daft-schema/src/python/mod.rs b/src/daft-schema/src/python/mod.rs index f2e0180634..7b66f2399b 100644 --- a/src/daft-schema/src/python/mod.rs +++ b/src/daft-schema/src/python/mod.rs @@ -6,8 +6,8 @@ pub mod schema; pub use datatype::{PyDataType, PyTimeUnit}; use crate::{ - image_format::ImageFormat, image_mode::ImageMode, image_property::ImageProperty, - media_type::MediaType, + geospatial_mode::GeospatialMode, image_format::ImageFormat, image_mode::ImageMode, + image_property::ImageProperty, media_type::MediaType, }; pub fn register_modules(parent: &Bound) -> PyResult<()> { @@ -19,6 +19,7 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; + parent.add_class::()?; Ok(()) } diff --git a/src/daft-schema/src/union_mode.rs b/src/daft-schema/src/union_mode.rs new file mode 100644 index 0000000000..cad6a8ccf5 --- /dev/null +++ b/src/daft-schema/src/union_mode.rs @@ -0,0 +1,21 @@ +use derive_more::Display; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] +pub enum UnionMode { + Sparse, + Dense, +} + +impl UnionMode { + pub fn to_arrow(&self) -> arrow_schema::UnionMode { + match self { + Self::Sparse => arrow_schema::UnionMode::Sparse, + Self::Dense => arrow_schema::UnionMode::Dense, + } + } + + pub fn is_dense(&self) -> bool { + matches!(self, Self::Dense) + } +} diff --git a/src/daft-stats/src/column_stats/mod.rs b/src/daft-stats/src/column_stats/mod.rs index 3f5581016e..f4aebb6922 100644 --- a/src/daft-stats/src/column_stats/mod.rs +++ b/src/daft-stats/src/column_stats/mod.rs @@ -98,7 +98,7 @@ impl ColumnRangeStatistics { // UNSUPPORTED TYPES: // Types that don't support comparisons and can't be used as ColumnRangeStatistics - DataType::List(..) | DataType::FixedSizeList(..) | DataType::Image(..) | DataType::FixedShapeImage(..) | DataType::Tensor(..) | DataType::SparseTensor(..) | DataType::FixedShapeSparseTensor(..) | DataType::FixedShapeTensor(..) | DataType::Struct(..) | DataType::Map { .. } | DataType::Extension(..) | DataType::Embedding(..) | DataType::Unknown | DataType::File(_) => false, + DataType::List(..) | DataType::FixedSizeList(..) | DataType::Image(..) | DataType::FixedShapeImage(..) | DataType::Tensor(..) | DataType::SparseTensor(..) | DataType::FixedShapeSparseTensor(..) | DataType::FixedShapeTensor(..) | DataType::Struct(..) | DataType::Map { .. } | DataType::Extension(..) | DataType::Embedding(..) | DataType::Unknown | DataType::File(_) | DataType::Union { .. } | DataType::WKT(..) | DataType::WKB(..) | DataType::Point(..) | DataType::LineString(..) | DataType::Polygon(..) | DataType::MultiPoint(..) | DataType::MultiLineString(..) | DataType::MultiPolygon(..) | DataType::GeometryCollection(..) | DataType::Geometry(..) | DataType::Geography | DataType::Rect(..) => false, #[cfg(feature = "python")] DataType::Python => false, } diff --git a/src/geoarrow/geoarrow-array/Cargo.toml b/src/geoarrow/geoarrow-array/Cargo.toml new file mode 100644 index 0000000000..6cb43a6597 --- /dev/null +++ b/src/geoarrow/geoarrow-array/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "geoarrow-array" +version = "0.7.0" +authors = ["Kyle Barron "] +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/geoarrow/geoarrow-rs" +description = "GeoArrow array definitions." + +[features] +geozero = ["dep:geozero", "dep:arrow-json"] + +[dependencies] +arrow-array = {workspace = true} +arrow-buffer = {workspace = true} +arrow-json = {workspace = true, optional = true} +arrow-schema = {workspace = true} +geo-traits = {workspace = true} +geo-types = {workspace = true} +geoarrow-schema = {workspace = true} +geozero = {workspace = true, optional = true} +num-traits = {workspace = true} +wkb = {workspace = true} +wkt = {workspace = true} + +[dev-dependencies] +geo = {workspace = true} +geo-types = {workspace = true} +wkt = {workspace = true} diff --git a/src/geoarrow/geoarrow-array/src/array/coord/combined.rs b/src/geoarrow/geoarrow-array/src/array/coord/combined.rs new file mode 100644 index 0000000000..a86de09cf6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/coord/combined.rs @@ -0,0 +1,313 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, FixedSizeListArray, StructArray}; +use arrow_schema::DataType; +use geoarrow_schema::{ + CoordType, Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{ + array::{InterleavedCoordBuffer, SeparatedCoordBuffer}, + builder::{InterleavedCoordBufferBuilder, SeparatedCoordBufferBuilder}, + scalar::Coord, +}; + +/// An Arrow representation of an array of coordinates. +/// +/// As defined in the GeoArrow spec, coordinates can either be interleaved (i.e. a single array of +/// XYXYXY) or separated (i.e. two arrays, one XXX and another YYY). +/// +/// This CoordBuffer abstracts over an `InterleavedCoordBuffer` and a `SeparatedCoordBuffer`. +/// +/// For now all coordinate buffers support only two dimensions. +/// +/// This is named `CoordBuffer` instead of `CoordArray` because the buffer does not store its own +/// validity bitmask. Rather the geometry arrays that build on top of this maintain their own +/// validity masks. +#[derive(Debug, Clone)] +pub enum CoordBuffer { + /// Interleaved coordinates + Interleaved(InterleavedCoordBuffer), + /// Separated coordinates + Separated(SeparatedCoordBuffer), +} + +impl CoordBuffer { + /// Slice this buffer + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { + match self { + CoordBuffer::Interleaved(c) => CoordBuffer::Interleaved(c.slice(offset, length)), + CoordBuffer::Separated(c) => CoordBuffer::Separated(c.slice(offset, length)), + } + } + + /// The underlying coordinate type + pub fn coord_type(&self) -> CoordType { + match self { + CoordBuffer::Interleaved(_) => CoordType::Interleaved, + CoordBuffer::Separated(_) => CoordType::Separated, + } + } + + /// The arrow [DataType] for this coordinate buffer. + pub(crate) fn storage_type(&self) -> DataType { + match self { + CoordBuffer::Interleaved(c) => c.storage_type(), + CoordBuffer::Separated(c) => c.storage_type(), + } + } + + /// The length of this coordinate buffer + pub fn len(&self) -> usize { + match self { + CoordBuffer::Interleaved(c) => c.len(), + CoordBuffer::Separated(c) => c.len(), + } + } + + /// Whether this coordinate buffer is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::{CoordBuffer, SeparatedCoordBuffer}; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = CoordBuffer::from( + /// SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap() + /// ); + /// let coord = coord_buffer.value(0); + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Panics + /// + /// Panics if the value is outside the bounds of the buffer. + pub fn value(&self, index: usize) -> Coord<'_> { + match self { + CoordBuffer::Interleaved(c) => Coord::Interleaved(c.value(index)), + CoordBuffer::Separated(c) => Coord::Separated(c.value(index)), + } + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::{CoordBuffer, SeparatedCoordBuffer}; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = CoordBuffer::from( + /// SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap() + /// ); + /// let coord = unsafe { coord_buffer.value_unchecked(0) }; + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Safety + /// + /// Caller is responsible for ensuring that the index is within the bounds of the buffer. + pub unsafe fn value_unchecked(&self, index: usize) -> Coord<'_> { + match self { + CoordBuffer::Interleaved(c) => Coord::Interleaved(unsafe { c.value_unchecked(index) }), + CoordBuffer::Separated(c) => Coord::Separated(unsafe { c.value_unchecked(index) }), + } + } + + pub(crate) fn into_array_ref(self) -> ArrayRef { + self.into() + } + + /// The dimension of this coordinate buffer + pub fn dim(&self) -> Dimension { + match self { + CoordBuffer::Interleaved(c) => c.dim(), + CoordBuffer::Separated(c) => c.dim(), + } + } + + /// Convert this coordinate array into the given [CoordType] + /// + /// This is a no-op if the coord_type matches the existing coord type. Otherwise a full clone + /// of the underlying coordinate buffers will be performed. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + let dim = self.dim(); + match (self, coord_type) { + (CoordBuffer::Interleaved(cb), CoordType::Interleaved) => CoordBuffer::Interleaved(cb), + (CoordBuffer::Interleaved(cb), CoordType::Separated) => { + let mut new_buffer = SeparatedCoordBufferBuilder::with_capacity(cb.len(), dim); + for i in 0..cb.len() { + let coord = cb.value(i); + new_buffer.push_coord(&coord); + } + CoordBuffer::Separated(new_buffer.finish()) + } + (CoordBuffer::Separated(cb), CoordType::Separated) => CoordBuffer::Separated(cb), + (CoordBuffer::Separated(cb), CoordType::Interleaved) => { + let mut new_buffer = InterleavedCoordBufferBuilder::with_capacity(cb.len(), dim); + for i in 0..cb.len() { + let coord = cb.value(i); + new_buffer.push_coord(&coord); + } + CoordBuffer::Interleaved(new_buffer.finish()) + } + } + } + + pub(crate) fn from_arrow(value: &dyn Array, dim: Dimension) -> GeoArrowResult { + match value.data_type() { + DataType::Struct(_) => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + Ok(CoordBuffer::Separated(SeparatedCoordBuffer::from_arrow( + downcasted, dim, + )?)) + } + DataType::FixedSizeList(_, _) => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + Ok(CoordBuffer::Interleaved( + InterleavedCoordBuffer::from_arrow(downcasted, dim)?, + )) + } + _ => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected coord buffer type: {:?}", + value.data_type() + ))), + } + } +} + +impl From for ArrayRef { + fn from(value: CoordBuffer) -> Self { + match value { + CoordBuffer::Interleaved(c) => Arc::new(FixedSizeListArray::from(c)), + CoordBuffer::Separated(c) => Arc::new(StructArray::from(c)), + } + } +} + +impl PartialEq for CoordBuffer { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (CoordBuffer::Interleaved(a), CoordBuffer::Interleaved(b)) => PartialEq::eq(a, b), + (CoordBuffer::Interleaved(left), CoordBuffer::Separated(right)) => { + if left.len() != right.len() { + return false; + } + + for i in 0..left.len() { + let left_coord = left.value(i); + let right_coord = right.value(i); + + if left_coord != right_coord { + return false; + } + } + + true + } + (CoordBuffer::Separated(a), CoordBuffer::Separated(b)) => PartialEq::eq(a, b), + (CoordBuffer::Separated(left), CoordBuffer::Interleaved(right)) => { + if left.len() != right.len() { + return false; + } + + for i in 0..left.len() { + let left_coord = left.value(i); + let right_coord = right.value(i); + + if left_coord != right_coord { + return false; + } + } + + true + } + } + } +} + +impl From for CoordBuffer { + fn from(value: InterleavedCoordBuffer) -> Self { + Self::Interleaved(value) + } +} + +impl From for CoordBuffer { + fn from(value: SeparatedCoordBuffer) -> Self { + Self::Separated(value) + } +} + +// #[cfg(test)] +// mod test { +// use crate::error::Result; + +// use super::*; + +// #[test] +// fn test_eq_both_interleaved() -> Result<()> { +// let coords1 = vec![0., 3., 1., 4., 2., 5.]; +// let buf1 = +// CoordBuffer::Interleaved(InterleavedCoordBuffer::from_vec(coords1, Dimension::XY)?); + +// let coords2 = vec![0., 3., 1., 4., 2., 5.]; +// let buf2 = +// CoordBuffer::Interleaved(InterleavedCoordBuffer::from_vec(coords2, Dimension::XY)?); + +// assert_eq!(buf1, buf2); +// Ok(()) +// } + +// #[test] +// fn test_eq_across_types() -> Result<()> { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; + +// let buf1 = CoordBuffer::Separated(SeparatedCoordBuffer::new( +// [x1.into(), y1.into(), vec![].into(), vec![].into()], +// Dimension::XY, +// )); + +// let coords2 = vec![0., 3., 1., 4., 2., 5.]; +// let buf2 = +// CoordBuffer::Interleaved(InterleavedCoordBuffer::new(coords2.into(), Dimension::XY)); + +// assert_eq!(buf1, buf2); +// Ok(()) +// } + +// #[test] +// fn test_eq_across_types_slicing() -> Result<()> { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; + +// let buf1 = CoordBuffer::Separated((x1, y1).try_into()?).slice(1, 1); + +// let coords2 = vec![0., 3., 1., 4., 2., 5.]; +// let buf2 = +// CoordBuffer::Interleaved(InterleavedCoordBuffer::new(coords2.into(), Dimension::XY)) +// .slice(1, 1); + +// assert_eq!(buf1, buf2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/coord/interleaved.rs b/src/geoarrow/geoarrow-array/src/array/coord/interleaved.rs new file mode 100644 index 0000000000..6a245e278e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/coord/interleaved.rs @@ -0,0 +1,222 @@ +use std::sync::Arc; + +use arrow_array::{Array, FixedSizeListArray, Float64Array}; +use arrow_buffer::ScalarBuffer; +use arrow_schema::{DataType, Field}; +use geo_traits::CoordTrait; +use geoarrow_schema::{ + CoordType, Dimension, PointType, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{builder::InterleavedCoordBufferBuilder, scalar::InterleavedCoord}; + +/// An array of coordinates stored interleaved in a single buffer. +/// +/// This stores all coordinates in interleaved fashion in a single underlying buffer: e.g. `xyxyxy` +/// for 2D coordinates. +#[derive(Debug, Clone, PartialEq)] +pub struct InterleavedCoordBuffer { + pub(crate) coords: ScalarBuffer, + pub(crate) dim: Dimension, +} + +fn check(coords: &ScalarBuffer, dim: Dimension) -> GeoArrowResult<()> { + if !coords.len().is_multiple_of(dim.size()) { + return Err(GeoArrowError::InvalidGeoArrow( + "Length of interleaved coordinate buffer must be a multiple of the dimension size" + .to_string(), + )); + } + + Ok(()) +} + +impl InterleavedCoordBuffer { + /// The underlying coordinate type + pub const COORD_TYPE: CoordType = CoordType::Interleaved; + + /// Construct a new InterleavedCoordBuffer + /// + /// # Panics + /// + /// - if coords.len() % dim.size() != 0 + pub fn new(coords: ScalarBuffer, dim: Dimension) -> Self { + Self::try_new(coords, dim).unwrap() + } + + /// Construct a new InterleavedCoordBuffer + /// + /// # Errors + /// + /// - if the coordinate buffer have different lengths + pub fn try_new(coords: ScalarBuffer, dim: Dimension) -> GeoArrowResult { + check(&coords, dim)?; + Ok(Self { coords, dim }) + } + + /// Construct from an iterator of coordinates. + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> GeoArrowResult { + Ok(InterleavedCoordBufferBuilder::from_coords(coords, dim)?.finish()) + } + + /// Access the underlying coordinate buffer. + pub fn coords(&self) -> &ScalarBuffer { + &self.coords + } + + pub(crate) fn values_array(&self) -> Float64Array { + Float64Array::new(self.coords.clone(), None) + } + + /// The dimension of this coordinate buffer + pub fn dim(&self) -> Dimension { + self.dim + } + + pub(crate) fn values_field(&self) -> Field { + match self.dim { + Dimension::XY => Field::new("xy", DataType::Float64, false), + Dimension::XYZ => Field::new("xyz", DataType::Float64, false), + Dimension::XYM => Field::new("xym", DataType::Float64, false), + Dimension::XYZM => Field::new("xyzm", DataType::Float64, false), + } + } + + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + coords: self + .coords + .slice(offset * self.dim.size(), length * self.dim.size()), + dim: self.dim, + } + } + + pub(crate) fn storage_type(&self) -> DataType { + PointType::new(self.dim, Default::default()) + .with_coord_type(Self::COORD_TYPE) + .data_type() + } + + /// The number of coordinates + pub fn len(&self) -> usize { + self.coords.len() / self.dim.size() + } + + /// Whether this buffer is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::InterleavedCoordBuffer; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = InterleavedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap(); + /// let coord = coord_buffer.value(0); + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Panics + /// + /// Panics if the value is outside the bounds of the buffer. + pub fn value(&self, index: usize) -> InterleavedCoord<'_> { + assert!(index <= self.len()); + unsafe { self.value_unchecked(index) } + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::InterleavedCoordBuffer; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = InterleavedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap(); + /// let coord = unsafe { coord_buffer.value_unchecked(0) }; + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Safety + /// + /// Caller is responsible for ensuring that the index is within the bounds of the buffer. + pub unsafe fn value_unchecked(&self, index: usize) -> InterleavedCoord<'_> { + InterleavedCoord { + coords: &self.coords, + i: index, + dim: self.dim, + } + } + + pub(crate) fn from_arrow(array: &FixedSizeListArray, dim: Dimension) -> GeoArrowResult { + if array.value_length() != dim.size() as i32 { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Expected the FixedSizeListArray to match the dimension. Array length is {}, dimension is: {:?} have size 2", + array.value_length(), + dim + ))); + } + + let coord_array_values = array + .values() + .as_any() + .downcast_ref::() + .unwrap(); + + Ok(InterleavedCoordBuffer::new( + coord_array_values.values().clone(), + dim, + )) + } +} + +impl From for FixedSizeListArray { + fn from(value: InterleavedCoordBuffer) -> Self { + FixedSizeListArray::new( + Arc::new(value.values_field()), + value.dim.size() as i32, + Arc::new(value.values_array()), + None, + ) + } +} + +// #[cfg(test)] +// mod test { +// use super::*; + +// #[test] +// fn test_eq_slicing() { +// let coords1 = vec![0., 3., 1., 4., 2., 5.]; +// let buf1 = InterleavedCoordBuffer::new(coords1.into(), Dimension::XY).slice(1, 1); + +// let coords2 = vec![1., 4.]; +// let buf2 = InterleavedCoordBuffer::new(coords2.into(), Dimension::XY); + +// assert_eq!(buf1, buf2); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/coord/mod.rs b/src/geoarrow/geoarrow-array/src/array/coord/mod.rs new file mode 100644 index 0000000000..1493e5a5f1 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/coord/mod.rs @@ -0,0 +1,13 @@ +//! Contains implementations for how to encode arrays of coordinates for all other geometry array +//! types. +//! +//! Coordinates can be either _interleaved_, where they're represented as a `FixedSizeList`, or +//! _separated_, where they're represented with a `StructArray`. + +mod combined; +mod interleaved; +mod separated; + +pub use combined::CoordBuffer; +pub use interleaved::InterleavedCoordBuffer; +pub use separated::SeparatedCoordBuffer; diff --git a/src/geoarrow/geoarrow-array/src/array/coord/separated.rs b/src/geoarrow/geoarrow-array/src/array/coord/separated.rs new file mode 100644 index 0000000000..8720e31fe1 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/coord/separated.rs @@ -0,0 +1,323 @@ +use std::sync::Arc; + +use arrow_array::{ArrayRef, Float64Array, StructArray, cast::AsArray, types::Float64Type}; +use arrow_buffer::ScalarBuffer; +use arrow_schema::{DataType, Field}; +use geo_traits::CoordTrait; +use geoarrow_schema::{ + CoordType, Dimension, PointType, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{builder::SeparatedCoordBufferBuilder, scalar::SeparatedCoord}; + +/// An array of coordinates stored in separate buffers of the same length. +/// +/// This stores all coordinates in separated fashion as multiple underlying buffers: e.g. `xxx` and +/// `yyy` for 2D coordinates. +#[derive(Debug, Clone, PartialEq)] +pub struct SeparatedCoordBuffer { + /// We always store a buffer for all 4 dimensions. The buffers for dimension 3 and 4 may be + /// empty. + pub(crate) buffers: [ScalarBuffer; 4], + pub(crate) dim: Dimension, +} + +fn check(buffers: &[ScalarBuffer; 4], dim: Dimension) -> GeoArrowResult<()> { + let all_same_length = match dim { + Dimension::XY => buffers[0].len() == buffers[1].len(), + Dimension::XYZ | Dimension::XYM => { + buffers[0].len() == buffers[1].len() && buffers[1].len() == buffers[2].len() + } + Dimension::XYZM => { + buffers[0].len() == buffers[1].len() + && buffers[1].len() == buffers[2].len() + && buffers[2].len() == buffers[3].len() + } + }; + + if !all_same_length { + return Err(GeoArrowError::InvalidGeoArrow( + "all buffers must have the same length".to_string(), + )); + } + + Ok(()) +} + +impl SeparatedCoordBuffer { + /// The underlying coordinate type + pub const COORD_TYPE: CoordType = CoordType::Separated; + + /// Construct a new SeparatedCoordBuffer from an array of existing buffers. + /// + /// The number of _valid_ buffers in the array must match the dimension size. E.g. if the `dim` + /// is `Dimension::XY`, then only the first two buffers must have non-zero length, and the last + /// two buffers in the array can have length zero. + pub fn from_array(buffers: [ScalarBuffer; 4], dim: Dimension) -> GeoArrowResult { + check(&buffers, dim)?; + Ok(Self { buffers, dim }) + } + + /// Construct a new SeparatedCoordBuffer from a `Vec` of existing buffers. + /// + /// All buffers within `buffers` must have the same length, and the length of `buffers` must + /// equal the dimension size. + pub fn from_vec(buffers: Vec>, dim: Dimension) -> GeoArrowResult { + if buffers.len() != dim.size() { + return Err(GeoArrowError::InvalidGeoArrow( + "Buffers must match dimension length ".into(), + )); + } + + let mut buffers = buffers.into_iter().map(Some).collect::>(); + + // Fill buffers with empty buffers past needed dimensions + let buffers = core::array::from_fn(|i| { + if i < buffers.len() { + buffers[i].take().unwrap() + } else { + Vec::new().into() + } + }); + + Self::from_array(buffers, dim) + } + + /// Access the underlying coordinate buffers. + /// + /// Note that not all four buffers may be valid. Only so many buffers have defined meaning as + /// there are dimensions, so for an XY buffer, only the first two buffers have defined meaning, + /// and the last two may be any buffer, or empty. + pub fn raw_buffers(&self) -> &[ScalarBuffer; 4] { + &self.buffers + } + + /// Access the underlying coordinate buffers. + /// + /// In comparison to raw_buffers, all of the returned buffers are valid. + pub fn buffers(&self) -> Vec> { + match self.dim { + Dimension::XY => { + vec![self.buffers[0].clone(), self.buffers[1].clone()] + } + Dimension::XYZ | Dimension::XYM => { + vec![ + self.buffers[0].clone(), + self.buffers[1].clone(), + self.buffers[2].clone(), + ] + } + Dimension::XYZM => { + vec![ + self.buffers[0].clone(), + self.buffers[1].clone(), + self.buffers[2].clone(), + self.buffers[3].clone(), + ] + } + } + } + + /// The dimension of this coordinate buffer + pub fn dim(&self) -> Dimension { + self.dim + } + + pub(crate) fn values_array(&self) -> Vec { + match self.dim { + Dimension::XY => { + vec![ + Arc::new(Float64Array::new(self.buffers[0].clone(), None)), + Arc::new(Float64Array::new(self.buffers[1].clone(), None)), + ] + } + Dimension::XYZ | Dimension::XYM => { + vec![ + Arc::new(Float64Array::new(self.buffers[0].clone(), None)), + Arc::new(Float64Array::new(self.buffers[1].clone(), None)), + Arc::new(Float64Array::new(self.buffers[2].clone(), None)), + ] + } + Dimension::XYZM => { + vec![ + Arc::new(Float64Array::new(self.buffers[0].clone(), None)), + Arc::new(Float64Array::new(self.buffers[1].clone(), None)), + Arc::new(Float64Array::new(self.buffers[2].clone(), None)), + Arc::new(Float64Array::new(self.buffers[3].clone(), None)), + ] + } + } + } + + pub(crate) fn values_field(&self) -> Vec { + match self.dim { + Dimension::XY => { + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ] + } + Dimension::XYZ => { + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("z", DataType::Float64, false), + ] + } + Dimension::XYM => { + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("m", DataType::Float64, false), + ] + } + Dimension::XYZM => { + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("z", DataType::Float64, false), + Field::new("m", DataType::Float64, false), + ] + } + } + } + + pub(crate) fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + + // Initialize array with existing buffers, then overwrite them + let mut sliced_buffers = self.buffers.clone(); + for (i, buffer) in self.buffers.iter().enumerate().take(self.dim.size()) { + sliced_buffers[i] = buffer.slice(offset, length); + } + + Self { + buffers: sliced_buffers, + dim: self.dim, + } + } + + pub(crate) fn storage_type(&self) -> DataType { + PointType::new(self.dim, Default::default()) + .with_coord_type(Self::COORD_TYPE) + .data_type() + } + + /// The number of coordinates + pub fn len(&self) -> usize { + self.buffers[0].len() + } + + /// Whether the coordinate buffer is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::SeparatedCoordBuffer; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap(); + /// let coord = coord_buffer.value(0); + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Panics + /// + /// Panics if the value is outside the bounds of the buffer. + pub fn value(&self, index: usize) -> SeparatedCoord<'_> { + assert!(index <= self.len()); + unsafe { self.value_unchecked(index) } + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::CoordTrait; + /// use geoarrow_array::array::SeparatedCoordBuffer; + /// use geoarrow_schema::Dimension; + /// + /// let coords = [ + /// geo_types::coord! { x: 1.0, y: 2.0 }, + /// geo_types::coord! { x: 3.0, y: 4.0 }, + /// ]; + /// let coord_buffer = SeparatedCoordBuffer::from_coords(coords.iter(), Dimension::XY).unwrap(); + /// let coord = unsafe { coord_buffer.value_unchecked(0) }; + /// assert_eq!(coord.x(), 1.0); + /// assert_eq!(coord.y(), 2.0); + /// ``` + /// + /// # Safety + /// + /// Caller is responsible for ensuring that the index is within the bounds of the buffer. + pub unsafe fn value_unchecked(&self, index: usize) -> SeparatedCoord<'_> { + SeparatedCoord { + buffers: &self.buffers, + i: index, + dim: self.dim, + } + } + + pub(crate) fn from_arrow(array: &StructArray, dim: Dimension) -> GeoArrowResult { + let buffers = array + .columns() + .iter() + .map(|c| c.as_primitive::().values().clone()) + .collect(); + Self::from_vec(buffers, dim) + } + + /// Construct from an iterator of coordinates + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> GeoArrowResult { + Ok(SeparatedCoordBufferBuilder::from_coords(coords, dim)?.finish()) + } +} + +impl From for StructArray { + fn from(value: SeparatedCoordBuffer) -> Self { + StructArray::new(value.values_field().into(), value.values_array(), None) + } +} + +// #[cfg(test)] +// mod test { +// use super::*; + +// #[test] +// fn test_eq_slicing() { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; + +// let buf1 = SeparatedCoordBuffer::from_vec(vec![x1.into(), y1.into()], Dimension::XY) +// .unwrap() +// .slice(1, 1); +// dbg!(&buf1.buffers[0]); +// dbg!(&buf1.buffers[1]); + +// let x2 = vec![1.]; +// let y2 = vec![4.]; +// let buf2 = +// SeparatedCoordBuffer::from_vec(vec![x2.into(), y2.into()], Dimension::XY).unwrap(); + +// assert_eq!(buf1, buf2); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/geometry.rs b/src/geoarrow/geoarrow-array/src/array/geometry.rs new file mode 100644 index 0000000000..b0ed222f61 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/geometry.rs @@ -0,0 +1,1256 @@ +use std::{collections::HashSet, sync::Arc}; + +use arrow_array::{Array, ArrayRef, OffsetSizeTrait, UnionArray, cast::AsArray}; +use arrow_buffer::{NullBuffer, ScalarBuffer}; +use arrow_schema::{ArrowError, DataType, Field, UnionMode}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, GeometryCollectionType, GeometryType, LineStringType, + Metadata, MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::*, + builder::*, + capacity::GeometryCapacity, + scalar::Geometry, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, +}; + +/// Macro to implement child array accessor with proper slicing support +/// +/// This macro generates code to access a child array from a GeometryArray, +/// handling both sliced and non-sliced cases. +/// +/// # Arguments +/// * `$geom_arr` - The child geometry array +/// +/// # Returns +/// A cloned or sliced version of the child array +macro_rules! impl_child_accessor { + ($self:expr, $geom_arr:expr) => {{ + let geom_arr = $geom_arr; + if !$self.is_sliced() { + // Fast path: if not sliced, just clone the array + geom_arr.clone() + } else { + // Slow path: find the range of this geometry type in the sliced view + let target_type_id = geom_arr.geometry_type_id(); + let first_index = $self.type_ids.iter().position(|id| *id == target_type_id); + let last_index = $self.type_ids.iter().rposition(|id| *id == target_type_id); + + match (first_index, last_index) { + (Some(first), Some(last)) => { + // Found both first and last occurrence + let first_offset = $self.offsets[first] as usize; + let last_offset = $self.offsets[last] as usize; + geom_arr.slice(first_offset, last_offset - first_offset + 1) + } + (Some(first), None) => { + unreachable!("Shouldn't happen: found first offset but not last: {first}"); + } + (None, Some(last)) => { + unreachable!("Shouldn't happen: found last offset but not first: {last}"); + } + (None, None) => { + // This geometry type is not present in the sliced view + geom_arr.slice(0, 0) + } + } + } + }}; +} + +/// An immutable array of geometries of unknown geometry type and dimension. +/// +// # Invariants +// +// - All arrays must have the same dimension +// - All arrays must have the same coordinate layout (interleaved or separated) +// +// - 1: Point +// - 2: LineString +// - 3: Polygon +// - 4: MultiPoint +// - 5: MultiLineString +// - 6: MultiPolygon +// - 7: GeometryCollection +// - 11: Point Z +// - 12: LineString Z +// - 13: Polygon Z +// - 14: MultiPoint Z +// - 15: MultiLineString Z +// - 16: MultiPolygon Z +// - 17: GeometryCollection Z +// - 21: Point M +// - 22: LineString M +// - 23: Polygon M +// - 24: MultiPoint M +// - 25: MultiLineString M +// - 26: MultiPolygon M +// - 27: GeometryCollection M +// - 31: Point ZM +// - 32: LineString ZM +// - 33: Polygon ZM +// - 34: MultiPoint ZM +// - 35: MultiLineString ZM +// - 36: MultiPolygon ZM +// - 37: GeometryCollection ZM +#[derive(Debug, Clone)] +pub struct GeometryArray { + pub(crate) data_type: GeometryType, + + /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not + /// provided. If `type_ids` exist in the NativeType, then every item in `type_ids` is `> 0 && ` + pub(crate) type_ids: ScalarBuffer, + + /// Invariant: `offsets.len() == type_ids.len()` + pub(crate) offsets: ScalarBuffer, + + /// An array of PointArray, ordered XY, XYZ, XYM, XYZM + pub(crate) points: [PointArray; 4], + pub(crate) line_strings: [LineStringArray; 4], + pub(crate) polygons: [PolygonArray; 4], + pub(crate) mpoints: [MultiPointArray; 4], + pub(crate) mline_strings: [MultiLineStringArray; 4], + pub(crate) mpolygons: [MultiPolygonArray; 4], + pub(crate) gcs: [GeometryCollectionArray; 4], +} + +impl GeometryArray { + /// Create a new GeometryArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the validity is not `None` and its length is different from the number of geometries + /// - if the largest geometry offset does not match the number of coordinates + #[allow(clippy::too_many_arguments)] + pub fn new( + type_ids: ScalarBuffer, + offsets: ScalarBuffer, + points: [PointArray; 4], + line_strings: [LineStringArray; 4], + polygons: [PolygonArray; 4], + mpoints: [MultiPointArray; 4], + mline_strings: [MultiLineStringArray; 4], + mpolygons: [MultiPolygonArray; 4], + gcs: [GeometryCollectionArray; 4], + metadata: Arc, + ) -> Self { + // Validate that all arrays have the same coord type. + let mut coord_types = HashSet::new(); + points.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + line_strings.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + polygons.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + mpoints.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + mline_strings.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + mpolygons.iter().for_each(|arr| { + coord_types.insert(arr.data_type.coord_type()); + }); + + assert!(coord_types.len() == 1); + let coord_type = coord_types.into_iter().next().unwrap(); + + Self { + data_type: GeometryType::new(metadata).with_coord_type(coord_type), + type_ids, + offsets, + points, + line_strings, + polygons, + mpoints, + mline_strings, + mpolygons, + gcs, + } + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> GeometryCapacity { + GeometryCapacity::new( + 0, + core::array::from_fn(|i| self.points[i].buffer_lengths()), + core::array::from_fn(|i| self.line_strings[i].buffer_lengths()), + core::array::from_fn(|i| self.polygons[i].buffer_lengths()), + core::array::from_fn(|i| self.mpoints[i].buffer_lengths()), + core::array::from_fn(|i| self.mline_strings[i].buffer_lengths()), + core::array::from_fn(|i| self.mpolygons[i].buffer_lengths()), + core::array::from_fn(|i| self.gcs[i].buffer_lengths()), + ) + } + + /// Returns the `type_ids` buffer for this array + pub fn type_ids(&self) -> &ScalarBuffer { + &self.type_ids + } + + /// Returns the `offsets` buffer for this array + pub fn offsets(&self) -> &ScalarBuffer { + &self.offsets + } + + /// Determine whether this array has been sliced. + /// + /// This array has been sliced iff the total number of geometries in the child arrays does not + /// equal the number of values in the type_ids array. + /// + /// Since the length of each child array is pre-computed, this operation is O(1). + fn is_sliced(&self) -> bool { + let mut physical_geom_len = 0; + physical_geom_len += self.points.iter().fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self.line_strings.iter().fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self.polygons.iter().fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self.mpoints.iter().fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self + .mline_strings + .iter() + .fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self.mpolygons.iter().fold(0, |acc, arr| acc + arr.len()); + physical_geom_len += self.gcs.iter().fold(0, |acc, arr| acc + arr.len()); + + physical_geom_len != self.type_ids.len() + } + + /// Access the PointArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn point_child(&self, dim: Dimension) -> PointArray { + impl_child_accessor!(self, &self.points[dim.order()]) + } + + /// Access the LineStringArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn line_string_child(&self, dim: Dimension) -> LineStringArray { + impl_child_accessor!(self, &self.line_strings[dim.order()]) + } + + /// Access the PolygonArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn polygon_child(&self, dim: Dimension) -> PolygonArray { + impl_child_accessor!(self, &self.polygons[dim.order()]) + } + + /// Access the MultiPointArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn multi_point_child(&self, dim: Dimension) -> MultiPointArray { + impl_child_accessor!(self, &self.mpoints[dim.order()]) + } + + /// Access the MultiLineStringArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn multi_line_string_child(&self, dim: Dimension) -> MultiLineStringArray { + impl_child_accessor!(self, &self.mline_strings[dim.order()]) + } + + /// Access the MultiPolygonArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn multi_polygon_child(&self, dim: Dimension) -> MultiPolygonArray { + impl_child_accessor!(self, &self.mpolygons[dim.order()]) + } + + /// Access the GeometryCollectionArray child for the given dimension. + /// + /// Note that ordering will be maintained within the child array, but there may have been other + /// geometries in between in the parent array. + pub fn geometry_collection_child(&self, dim: Dimension) -> GeometryCollectionArray { + impl_child_accessor!(self, &self.gcs[dim.order()]) + } + + // TODO: handle slicing + pub(crate) fn has_points(&self, dim: Dimension) -> bool { + !self.points[dim.order()].is_empty() + } + + pub(crate) fn has_line_strings(&self, dim: Dimension) -> bool { + !self.line_strings[dim.order()].is_empty() + } + + pub(crate) fn has_polygons(&self, dim: Dimension) -> bool { + !self.polygons[dim.order()].is_empty() + } + + pub(crate) fn has_multi_points(&self, dim: Dimension) -> bool { + !self.mpoints[dim.order()].is_empty() + } + + pub(crate) fn has_multi_line_strings(&self, dim: Dimension) -> bool { + !self.mline_strings[dim.order()].is_empty() + } + + pub(crate) fn has_multi_polygons(&self, dim: Dimension) -> bool { + !self.mpolygons[dim.order()].is_empty() + } + + #[allow(dead_code)] + pub(crate) fn has_geometry_collections(&self, dim: Dimension) -> bool { + !self.gcs[dim.order()].is_empty() + } + + /// Return `true` if this array holds at least one non-empty array of the given dimension + pub fn has_dimension(&self, dim: Dimension) -> bool { + self.has_points(dim) + || self.has_line_strings(dim) + || self.has_polygons(dim) + || self.has_multi_points(dim) + || self.has_multi_line_strings(dim) + || self.has_multi_polygons(dim) + } + + /// Return `true` if this array holds at least one geometry array of the given dimension and no + /// arrays of any other dimension. + pub fn has_only_dimension(&self, dim: Dimension) -> bool { + use Dimension::*; + let existent_dims = [ + self.has_dimension(XY), + self.has_dimension(XYZ), + self.has_dimension(XYM), + self.has_dimension(XYZM), + ]; + existent_dims.iter().map(|b| *b as u8).sum::() == 1 && existent_dims[dim.order()] + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + self.buffer_lengths().num_bytes() + } + + /// Slice this [`GeometryArray`]. + /// + /// # Implementation + /// + /// This operation is `O(F)` where `F` is the number of fields. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + data_type: self.data_type.clone(), + type_ids: self.type_ids.slice(offset, length), + offsets: self.offsets.slice(offset, length), + + points: self.points.clone(), + line_strings: self.line_strings.clone(), + polygons: self.polygons.clone(), + mpoints: self.mpoints.clone(), + mline_strings: self.mline_strings.clone(), + mpolygons: self.mpolygons.clone(), + gcs: self.gcs.clone(), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + points: self.points.map(|arr| arr.into_coord_type(coord_type)), + line_strings: self.line_strings.map(|arr| arr.into_coord_type(coord_type)), + polygons: self.polygons.map(|arr| arr.into_coord_type(coord_type)), + mpoints: self.mpoints.map(|arr| arr.into_coord_type(coord_type)), + mline_strings: self + .mline_strings + .map(|arr| arr.into_coord_type(coord_type)), + mpolygons: self.mpolygons.map(|arr| arr.into_coord_type(coord_type)), + gcs: self.gcs.map(|arr| arr.into_coord_type(coord_type)), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } + + // TODO: recursively expand the types from the geometry collection array + #[allow(dead_code)] + pub(crate) fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + self.points.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.line_strings.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.polygons.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.mpoints.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.mline_strings.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.mpolygons.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + self.gcs.iter().for_each(|arr| { + if !arr.is_empty() { + types.insert(arr.data_type()); + } + }); + + types + } +} + +impl GeoArrowArray for GeometryArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + // Note that `type_ids` is sliced as usual, and thus always has the correct length. + self.type_ids.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.to_array_ref().logical_nulls() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.to_array_ref().logical_null_count() + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + let type_id = self.type_ids[i]; + let offset = self.offsets[i] as usize; + let dim = (type_id / 10) as usize; + match type_id % 10 { + PointType::GEOMETRY_TYPE_OFFSET => self.points[dim].is_null(offset), + LineStringType::GEOMETRY_TYPE_OFFSET => self.line_strings[dim].is_null(offset), + PolygonType::GEOMETRY_TYPE_OFFSET => self.polygons[dim].is_null(offset), + MultiPointType::GEOMETRY_TYPE_OFFSET => self.mpoints[dim].is_null(offset), + MultiLineStringType::GEOMETRY_TYPE_OFFSET => self.mline_strings[dim].is_null(offset), + MultiPolygonType::GEOMETRY_TYPE_OFFSET => self.mpolygons[dim].is_null(offset), + GeometryCollectionType::GEOMETRY_TYPE_OFFSET => self.gcs[dim].is_null(offset), + _ => unreachable!("unknown type_id {}", type_id), + } + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::Geometry(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for GeometryArray { + type Item = Geometry<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + let type_id = self.type_ids[index]; + let offset = self.offsets[index] as usize; + + let dim = (type_id / 10) as usize; + + let result = match type_id % 10 { + PointType::GEOMETRY_TYPE_OFFSET => Geometry::Point(self.points[dim].value(offset)?), + LineStringType::GEOMETRY_TYPE_OFFSET => { + Geometry::LineString(self.line_strings[dim].value(offset)?) + } + PolygonType::GEOMETRY_TYPE_OFFSET => { + Geometry::Polygon(self.polygons[dim].value(offset)?) + } + MultiPointType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiPoint(self.mpoints[dim].value(offset)?) + } + MultiLineStringType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiLineString(self.mline_strings[dim].value(offset)?) + } + MultiPolygonType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiPolygon(self.mpolygons[dim].value(offset)?) + } + GeometryCollectionType::GEOMETRY_TYPE_OFFSET => { + Geometry::GeometryCollection(self.gcs[dim].value(offset)?) + } + _ => unreachable!("unknown type_id {}", type_id), + }; + Ok(result) + } +} + +impl IntoArrow for GeometryArray { + type ArrowArray = UnionArray; + type ExtensionType = GeometryType; + + fn into_arrow(self) -> Self::ArrowArray { + let union_fields = match self.data_type.data_type() { + DataType::Union(union_fields, _) => union_fields, + _ => unreachable!(), + }; + + // https://stackoverflow.com/a/34406459/7319250 + let mut child_arrays: Vec> = vec![None; 28]; + for (i, arr) in self.points.into_iter().enumerate() { + child_arrays[i * 7] = Some(arr.into_array_ref()); + } + for (i, arr) in self.line_strings.into_iter().enumerate() { + child_arrays[i * 7 + 1] = Some(arr.into_array_ref()); + } + for (i, arr) in self.polygons.into_iter().enumerate() { + child_arrays[i * 7 + 2] = Some(arr.into_array_ref()); + } + for (i, arr) in self.mpoints.into_iter().enumerate() { + child_arrays[i * 7 + 3] = Some(arr.into_array_ref()); + } + for (i, arr) in self.mline_strings.into_iter().enumerate() { + child_arrays[i * 7 + 4] = Some(arr.into_array_ref()); + } + for (i, arr) in self.mpolygons.into_iter().enumerate() { + child_arrays[i * 7 + 5] = Some(arr.into_array_ref()); + } + for (i, arr) in self.gcs.into_iter().enumerate() { + child_arrays[i * 7 + 6] = Some(arr.into_array_ref()); + } + + UnionArray::try_new( + union_fields, + self.type_ids, + Some(self.offsets), + child_arrays.into_iter().map(|x| x.unwrap()).collect(), + ) + .unwrap() + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&UnionArray, GeometryType)> for GeometryArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&UnionArray, GeometryType)) -> GeoArrowResult { + let mut points: [Option; 4] = Default::default(); + let mut line_strings: [Option; 4] = Default::default(); + let mut polygons: [Option; 4] = Default::default(); + let mut mpoints: [Option; 4] = Default::default(); + let mut mline_strings: [Option; 4] = Default::default(); + let mut mpolygons: [Option; 4] = Default::default(); + let mut gcs: [Option; 4] = Default::default(); + + let coord_type = typ.coord_type(); + let metadata = typ.metadata().clone(); + + // Note: From the spec: + // + // The child arrays should not themselves contain GeoArrow metadata. Only the top-level + // geometry array should contain GeoArrow metadata. + match value.data_type() { + DataType::Union(fields, mode) => { + if !matches!(mode, UnionMode::Dense) { + return Err(ArrowError::SchemaError("Expected dense union".to_string()).into()); + } + + for (type_id, _field) in fields.iter() { + let dim = Dimension::from_order((type_id / 10) as _)?; + let index = dim.order(); + + match type_id % 10 { + 1 => { + points[index] = Some( + ( + value.child(type_id).as_ref(), + PointType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 2 => { + line_strings[index] = Some( + ( + value.child(type_id).as_ref(), + LineStringType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 3 => { + polygons[index] = Some( + ( + value.child(type_id).as_ref(), + PolygonType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 4 => { + mpoints[index] = Some( + ( + value.child(type_id).as_ref(), + MultiPointType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 5 => { + mline_strings[index] = Some( + ( + value.child(type_id).as_ref(), + MultiLineStringType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 6 => { + mpolygons[index] = Some( + ( + value.child(type_id).as_ref(), + MultiPolygonType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + 7 => { + gcs[index] = Some( + ( + value.child(type_id).as_ref(), + GeometryCollectionType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into()?, + ); + } + _ => { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected type_id when converting to GeometryArray {type_id}", + ))); + } + } + } + } + _ => { + return Err(GeoArrowError::InvalidGeoArrow( + "expected union type when converting to GeometryArray".to_string(), + )); + } + }; + + let type_ids = value.type_ids().clone(); + // This is after checking for dense union + let offsets = value.offsets().unwrap().clone(); + + // We need to convert the array [Option; 4] into `[PointArray; 4]`. + // But we also need to ensure the underlying PointArray has the correct `Dimension` for the + // given array index. + // In order to do this, we need the index of the array, which `map` doesn't give us. And + // using `core::array::from_fn` doesn't let us move out of the existing array. + // So we mutate the existing array of `[Option; 4]` to ensure all values are + // `Some`, and then later we call `unwrap` on all array values in a `map`. + points.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + PointBuilder::new( + PointType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + line_strings.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + LineStringBuilder::new( + LineStringType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + polygons.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + PolygonBuilder::new( + PolygonType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + mpoints.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + MultiPointBuilder::new( + MultiPointType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + mline_strings.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + MultiLineStringBuilder::new( + MultiLineStringType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + mpolygons.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + MultiPolygonBuilder::new( + MultiPolygonType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + gcs.iter_mut().enumerate().for_each(|(i, arr)| { + let new_val = if let Some(arr) = arr.take() { + arr + } else { + GeometryCollectionBuilder::new( + GeometryCollectionType::new( + Dimension::from_order(i).unwrap(), + Default::default(), + ) + .with_coord_type(coord_type), + ) + .finish() + }; + arr.replace(new_val); + }); + + Ok(Self::new( + type_ids, + offsets, + points.map(|x| x.unwrap()), + line_strings.map(|x| x.unwrap()), + polygons.map(|x| x.unwrap()), + mpoints.map(|x| x.unwrap()), + mline_strings.map(|x| x.unwrap()), + mpolygons.map(|x| x.unwrap()), + gcs.map(|x| x.unwrap()), + metadata, + )) + } +} + +impl TryFrom<(&dyn Array, GeometryType)> for GeometryArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, GeometryType)) -> GeoArrowResult { + match value.data_type() { + DataType::Union(_, _) => (value.as_union(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected GeometryArray DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for GeometryArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, GeometryType)> for GeometryArray { + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, GeometryType)) -> GeoArrowResult { + let mut_arr: GeometryBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +pub(crate) trait DimensionIndex: Sized { + /// Get the positional index of the internal array for the given dimension. + fn order(&self) -> usize; + + fn from_order(index: usize) -> GeoArrowResult; +} + +impl DimensionIndex for Dimension { + fn order(&self) -> usize { + match self { + Self::XY => 0, + Self::XYZ => 1, + Self::XYM => 2, + Self::XYZM => 3, + } + } + + fn from_order(index: usize) -> GeoArrowResult { + match index { + 0 => Ok(Self::XY), + 1 => Ok(Self::XYZ), + 2 => Ok(Self::XYM), + 3 => Ok(Self::XYZM), + i => { + Err(ArrowError::SchemaError(format!("unsupported index in from_order: {i}")).into()) + } + } + } +} + +impl PartialEq for GeometryArray { + fn eq(&self, other: &Self) -> bool { + self.type_ids == other.type_ids + && self.offsets == other.offsets + && self.points == other.points + && self.line_strings == other.line_strings + && self.polygons == other.polygons + && self.mpoints == other.mpoints + && self.mline_strings == other.mline_strings + && self.mpolygons == other.mpolygons + && self.gcs == other.gcs + } +} + +type ChildrenArrays = ( + [PointArray; 4], + [LineStringArray; 4], + [PolygonArray; 4], + [MultiPointArray; 4], + [MultiLineStringArray; 4], + [MultiPolygonArray; 4], + [GeometryCollectionArray; 4], +); + +/// Initialize empty children with the given coord type. +/// +/// This is used in the impls like `From for GeometryArray`. This lets us initialize +/// all empty children and then just swap in the one array that's valid. +fn empty_children(coord_type: CoordType) -> ChildrenArrays { + ( + core::array::from_fn(|i| { + PointBuilder::new( + PointType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + LineStringBuilder::new( + LineStringType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + PolygonBuilder::new( + PolygonType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + MultiPointBuilder::new( + MultiPointType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + MultiLineStringBuilder::new( + MultiLineStringType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + MultiPolygonBuilder::new( + MultiPolygonType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + core::array::from_fn(|i| { + GeometryCollectionBuilder::new( + GeometryCollectionType::new(Dimension::from_order(i).unwrap(), Default::default()) + .with_coord_type(coord_type), + ) + .finish() + }), + ) +} + +macro_rules! impl_primitive_cast { + ($source_array:ty, $value_edit:tt) => { + impl From<$source_array> for GeometryArray { + fn from(value: $source_array) -> Self { + let coord_type = value.data_type.coord_type(); + let dim = value.data_type.dimension(); + let metadata = value.data_type.metadata().clone(); + + let type_ids = vec![value.geometry_type_id(); value.len()].into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let data_type = GeometryType::new(metadata).with_coord_type(coord_type); + let mut children = empty_children(coord_type); + + children.$value_edit[dim.order()] = value; + Self { + data_type, + type_ids, + offsets, + points: children.0, + line_strings: children.1, + polygons: children.2, + mpoints: children.3, + mline_strings: children.4, + mpolygons: children.5, + gcs: children.6, + } + } + } + }; +} + +impl_primitive_cast!(PointArray, 0); +impl_primitive_cast!(LineStringArray, 1); +impl_primitive_cast!(PolygonArray, 2); +impl_primitive_cast!(MultiPointArray, 3); +impl_primitive_cast!(MultiLineStringArray, 4); +impl_primitive_cast!(MultiPolygonArray, 5); +impl_primitive_cast!(GeometryCollectionArray, 6); + +// #[cfg(test)] +// mod test { +// use ::wkt::{Wkt, wkt}; +// use geo_traits::to_geo::ToGeoGeometry; +// use geoarrow_schema::Crs; +// use geoarrow_test::raw; + +// use super::*; +// use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon}; + +// fn geoms() -> Vec { +// vec![ +// point::p0().into(), +// point::p1().into(), +// point::p2().into(), +// linestring::ls0().into(), +// linestring::ls1().into(), +// polygon::p0().into(), +// polygon::p1().into(), +// multipoint::mp0().into(), +// multipoint::mp1().into(), +// multilinestring::ml0().into(), +// multilinestring::ml1().into(), +// multipolygon::mp0().into(), +// multipolygon::mp1().into(), +// ] +// } + +// fn geom_array(coord_type: CoordType) -> GeometryArray { +// let geoms = geoms().into_iter().map(Some).collect::>(); +// let typ = GeometryType::new(Default::default()).with_coord_type(coord_type); +// GeometryBuilder::from_nullable_geometries(&geoms, typ) +// .unwrap() +// .finish() +// } + +// #[test] +// fn test_2d() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = geoms(); +// let geometry_array = geom_array(coord_type); +// let geoms_again = geometry_array +// .iter_values() +// .map(|g| g.unwrap().to_geometry()) +// .collect::>(); +// assert_eq!(geoms, geoms_again); +// } +// } + +// #[test] +// fn test_2d_roundtrip_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = geoms(); +// let geometry_array = geom_array(coord_type); +// let field = geometry_array.data_type.to_field("geometry", true); +// let union_array = geometry_array.into_arrow(); + +// let geometry_array_again = +// GeometryArray::try_from((&union_array as _, &field)).unwrap(); +// let geoms_again = geometry_array_again +// .iter_values() +// .map(|g| g.unwrap().to_geometry()) +// .collect::>(); +// assert_eq!(geoms, geoms_again); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for prefer_multi in [true, false] { +// let geo_arr = crate::test::geometry::array(coord_type, prefer_multi); + +// let point_type = geo_arr.extension_type().clone(); +// let field = point_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: GeometryArray = (arrow_arr.as_ref(), point_type).try_into().unwrap(); +// let geo_arr3: GeometryArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn test_nullability() { +// let geoms = raw::geometry::geoms(); +// let null_idxs = geoms +// .iter() +// .enumerate() +// .filter_map(|(i, geom)| if geom.is_none() { Some(i) } else { None }) +// .collect::>(); + +// let typ = GeometryType::new(Default::default()); +// let geo_arr = GeometryBuilder::from_nullable_geometries(&geoms, typ) +// .unwrap() +// .finish(); + +// for null_idx in &null_idxs { +// assert!(geo_arr.is_null(*null_idx)); +// } +// } + +// #[test] +// fn test_logical_nulls() { +// let geoms = raw::geometry::geoms(); +// let expected_nulls = NullBuffer::from_iter(geoms.iter().map(|g| g.is_some())); + +// let typ = GeometryType::new(Default::default()); +// let geo_arr = GeometryBuilder::from_nullable_geometries(&geoms, typ) +// .unwrap() +// .finish(); + +// assert_eq!(geo_arr.logical_nulls().unwrap(), expected_nulls); +// } + +// #[test] +// fn into_coord_type() { +// for prefer_multi in [true, false] { +// let geo_arr = crate::test::geometry::array(CoordType::Interleaved, prefer_multi); +// let geo_arr2 = geo_arr +// .clone() +// .into_coord_type(CoordType::Separated) +// .into_coord_type(CoordType::Interleaved); + +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn partial_eq() { +// for prefer_multi in [true, false] { +// let arr1 = crate::test::geometry::array(CoordType::Interleaved, prefer_multi); +// let arr2 = crate::test::geometry::array(CoordType::Separated, prefer_multi); + +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } +// } + +// #[test] +// fn should_persist_crs() { +// let geo_arr = crate::test::geometry::array(CoordType::Interleaved, false); +// let crs = Crs::from_authority_code("EPSG:4326".to_string()); +// let geo_arr = geo_arr.with_metadata(Arc::new(Metadata::new(crs.clone(), None))); + +// let arrow_arr = geo_arr.to_array_ref(); +// let field = geo_arr.data_type().to_field("geometry", true); + +// let geo_arr2: GeometryArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr2.data_type.metadata().crs().clone(), crs); +// } + +// #[test] +// fn arrow_round_trip_should_preserve_slicing() { +// let geo_arr = crate::test::geometry::array(CoordType::Separated, false); +// let geometry_type = geo_arr.extension_type().clone(); + +// let sliced = geo_arr.slice(2, 4); +// let arrow_arr = sliced.to_array_ref(); +// let geo_arr2 = GeometryArray::try_from((arrow_arr.as_ref(), geometry_type)).unwrap(); + +// assert_eq!(sliced, geo_arr2); +// assert_eq!(sliced.value(0).unwrap(), geo_arr2.value(0).unwrap()); +// } + +// #[test] +// fn determine_if_sliced() { +// let geo_arr = crate::test::geometry::array(CoordType::Separated, false); +// assert!(!geo_arr.is_sliced()); + +// let sliced = geo_arr.slice(2, 4); +// assert!(sliced.is_sliced()); +// } + +// #[test] +// fn test_point_child_via_slicing() { +// let point_array = crate::test::point::array(Default::default(), Dimension::XY); +// let geometry_array = GeometryArray::from(point_array.clone()); + +// let returned = geometry_array.point_child(Dimension::XY); +// assert_eq!(returned, point_array); + +// // Sliced at beginning +// let sliced_geometry_array = geometry_array.slice(0, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, point_array.slice(0, 2)); + +// // Sliced in middle +// let sliced_geometry_array = geometry_array.slice(1, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, point_array.slice(1, 2)); + +// // Sliced at end +// let sliced_geometry_array = geometry_array.slice(2, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, point_array.slice(2, 2)); +// } + +// #[test] +// fn test_point_child_mixed_geometries() { +// let geoms: Vec> = vec![ +// // 2D points +// Some(wkt! { POINT (30. 10.) }.into()), +// Some(wkt! { POINT (40. 20.) }.into()), +// // 3D points +// Some(wkt! { POINT Z (30. 10. 40.) }.into()), +// Some(wkt! { POINT Z (40. 20. 60.) }.into()), +// // More 2D points +// Some(wkt! { POINT (30. 10.) }.into()), +// Some(wkt! { POINT (40. 20.) }.into()), +// ]; + +// let mut full_xy_point_arr = +// PointBuilder::new(PointType::new(Dimension::XY, Default::default())); +// for idx in [0, 1, 4, 5] { +// full_xy_point_arr +// .push_geometry(geoms[idx].as_ref()) +// .unwrap(); +// } +// let full_xy_point_arr = full_xy_point_arr.finish(); + +// let geometry_array = GeometryBuilder::from_nullable_geometries(&geoms, Default::default()) +// .unwrap() +// .finish(); + +// let returned = geometry_array.point_child(Dimension::XY); +// assert_eq!(returned, full_xy_point_arr); + +// // Sliced at beginning +// let sliced_geometry_array = geometry_array.slice(0, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, full_xy_point_arr.slice(0, 2)); + +// // Sliced in middle +// let sliced_geometry_array = geometry_array.slice(1, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, full_xy_point_arr.slice(1, 1)); + +// // Sliced in middle, removing all 2D points +// let sliced_geometry_array = geometry_array.slice(2, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, full_xy_point_arr.slice(1, 0)); + +// let sliced_geometry_array = geometry_array.slice(3, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, full_xy_point_arr.slice(2, 1)); + +// // Sliced at end +// let sliced_geometry_array = geometry_array.slice(4, 2); +// let point_child = sliced_geometry_array.point_child(Dimension::XY); +// assert_eq!(point_child, full_xy_point_arr.slice(2, 2)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/geometrycollection.rs b/src/geoarrow/geoarrow-array/src/array/geometrycollection.rs new file mode 100644 index 0000000000..a6f2cf51bd --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/geometrycollection.rs @@ -0,0 +1,354 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, GeometryCollectionType, Metadata, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{GenericWkbArray, MixedGeometryArray}, + builder::GeometryCollectionBuilder, + capacity::GeometryCollectionCapacity, + eq::offset_buffer_eq, + scalar::GeometryCollection, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of GeometryCollection geometries. +/// +/// This is semantically equivalent to `Vec>` due to the internal +/// validity bitmap. +#[derive(Debug, Clone)] +pub struct GeometryCollectionArray { + pub(crate) data_type: GeometryCollectionType, + + pub(crate) array: MixedGeometryArray, + + /// Offsets into the mixed geometry array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +impl GeometryCollectionArray { + /// Create a new GeometryCollectionArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + pub fn new( + array: MixedGeometryArray, + geom_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self { + data_type: GeometryCollectionType::new(array.dim, metadata) + .with_coord_type(array.coord_type), + array, + geom_offsets, + nulls, + } + } + + fn geometries_field(&self) -> Arc { + Field::new("geometries", self.array.storage_type(), false).into() + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> GeometryCollectionCapacity { + GeometryCollectionCapacity::new( + self.array.buffer_lengths(), + *self.geom_offsets.last() as usize, + ) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`GeometryCollectionArray`]. + /// + /// # Implementation + /// + /// This operation is `O(1)` as it amounts to increasing a few ref counts. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data + Self { + data_type: self.data_type.clone(), + array: self.array.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + array: self.array.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for GeometryCollectionArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::GeometryCollection(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for GeometryCollectionArray { + type Item = GeometryCollection<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(GeometryCollection::new( + &self.array, + &self.geom_offsets, + index, + )) + } +} + +impl IntoArrow for GeometryCollectionArray { + type ArrowArray = GenericListArray; + type ExtensionType = GeometryCollectionType; + + fn into_arrow(self) -> Self::ArrowArray { + let geometries_field = self.geometries_field(); + let nulls = self.nulls; + let values = self.array.into_array_ref(); + GenericListArray::new(geometries_field, self.geom_offsets, values, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, GeometryCollectionType)> for GeometryCollectionArray { + type Error = GeoArrowError; + + fn try_from( + (value, typ): (&GenericListArray, GeometryCollectionType), + ) -> GeoArrowResult { + let geoms: MixedGeometryArray = + (value.values().as_ref(), typ.dimension(), typ.coord_type()).try_into()?; + let geom_offsets = offsets_buffer_i32_to_i64(value.offsets()); + let nulls = value.nulls(); + + Ok(Self::new( + geoms, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, GeometryCollectionType)> for GeometryCollectionArray { + type Error = GeoArrowError; + + fn try_from( + (value, typ): (&GenericListArray, GeometryCollectionType), + ) -> GeoArrowResult { + let geoms: MixedGeometryArray = + (value.values().as_ref(), typ.dimension(), typ.coord_type()).try_into()?; + let geom_offsets = value.offsets(); + let nulls = value.nulls(); + + Ok(Self::new( + geoms, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, GeometryCollectionType)> for GeometryCollectionArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, GeometryCollectionType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected GeometryCollection Arrow DataType: {dt:?}" + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for GeometryCollectionArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, GeometryCollectionType)> + for GeometryCollectionArray +{ + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, GeometryCollectionType)) -> GeoArrowResult { + let mut_arr: GeometryCollectionBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl PartialEq for GeometryCollectionArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && self.array == other.array + } +} + +impl GeometryTypeId for GeometryCollectionArray { + const GEOMETRY_TYPE_OFFSET: i8 = 7; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{CoordType, Dimension}; +// use geoarrow_test::raw; + +// use super::*; +// use crate::test::geometrycollection; + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// for prefer_multi in [true, false] { +// let geo_arr = geometrycollection::array(coord_type, dim, prefer_multi); + +// let point_type = geo_arr.extension_type().clone(); +// let field = point_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: GeometryCollectionArray = +// (arrow_arr.as_ref(), point_type).try_into().unwrap(); +// let geo_arr3: GeometryCollectionArray = +// (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } +// } + +// #[test] +// fn test_nullability() { +// let geoms = raw::geometrycollection::xy::geoms(); +// let null_idxs = geoms +// .iter() +// .enumerate() +// .filter_map(|(i, geom)| if geom.is_none() { Some(i) } else { None }) +// .collect::>(); + +// let typ = GeometryCollectionType::new(Dimension::XY, Default::default()); +// let geo_arr = GeometryCollectionBuilder::from_nullable_geometry_collections(&geoms, typ) +// .unwrap() +// .finish(); + +// for null_idx in &null_idxs { +// assert!(geo_arr.is_null(*null_idx)); +// } +// } + +// #[test] +// fn test_logical_nulls() { +// let geoms = raw::geometrycollection::xy::geoms(); +// let expected_nulls = NullBuffer::from_iter(geoms.iter().map(|g| g.is_some())); + +// let typ = GeometryCollectionType::new(Dimension::XY, Default::default()); +// let geo_arr = GeometryCollectionBuilder::from_nullable_geometry_collections(&geoms, typ) +// .unwrap() +// .finish(); + +// assert_eq!(geo_arr.logical_nulls().unwrap(), expected_nulls); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/linestring.rs b/src/geoarrow/geoarrow-array/src/array/linestring.rs new file mode 100644 index 0000000000..d5107e4a21 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/linestring.rs @@ -0,0 +1,476 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, LineStringType, Metadata, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, GenericWkbArray}, + builder::LineStringBuilder, + capacity::LineStringCapacity, + eq::offset_buffer_eq, + scalar::LineString, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of LineString geometries. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity +/// bitmap. +#[derive(Debug, Clone)] +pub struct LineStringArray { + pub(crate) data_type: LineStringType, + + pub(crate) coords: CoordBuffer, + + /// Offsets into the coordinate array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +pub(super) fn check( + coords: &CoordBuffer, + validity_len: Option, + geom_offsets: &OffsetBuffer, +) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) { + return Err(GeoArrowError::InvalidGeoArrow( + "nulls mask length must match the number of values".to_string(), + )); + } + + // Offset can be smaller than coords length if sliced + if *geom_offsets.last() as usize > coords.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest geometry offset must not be longer than coords length".to_string(), + )); + } + + Ok(()) +} + +impl LineStringArray { + /// Create a new LineStringArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest geometry offset does not match the number of coordinates + pub fn new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self::try_new(coords, geom_offsets, nulls, metadata).unwrap() + } + + /// Create a new LineStringArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls buffer does not have the same length as the number of geometries + /// - if the geometry offsets do not match the number of coordinates + pub fn try_new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check(&coords, nulls.as_ref().map(|v| v.len()), &geom_offsets)?; + Ok(Self { + data_type: LineStringType::new(coords.dim(), metadata) + .with_coord_type(coords.coord_type()), + coords, + geom_offsets, + nulls, + }) + } + + /// Access the underlying coordinate buffer + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// Access the underlying geometry offsets buffer + pub fn geom_offsets(&self) -> &OffsetBuffer { + &self.geom_offsets + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> LineStringCapacity { + LineStringCapacity::new(*self.geom_offsets.last() as usize, self.len()) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`LineStringArray`]. + /// + /// # Implementation + /// + /// This operation is `O(1)` as it amounts to increasing a few ref counts. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets + // would be in the wrong location. + Self { + data_type: self.data_type.clone(), + coords: self.coords.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for LineStringArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::LineString(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for LineStringArray { + type Item = LineString<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(LineString::new(&self.coords, &self.geom_offsets, index)) + } +} + +impl IntoArrow for LineStringArray { + type ArrowArray = GenericListArray; + type ExtensionType = LineStringType; + + fn into_arrow(self) -> Self::ArrowArray { + let vertices_field = match self.data_type.data_type() { + DataType::LargeList(inner_field) => inner_field, + _ => unreachable!(), + }; + let nulls = self.nulls; + let coord_array = self.coords.into_array_ref(); + GenericListArray::new(vertices_field, self.geom_offsets, coord_array, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, LineStringType)> for LineStringArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&GenericListArray, LineStringType)) -> GeoArrowResult { + let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?; + let geom_offsets = offsets_buffer_i32_to_i64(value.offsets()); + let nulls = value.nulls(); + + Ok(Self::new( + coords, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, LineStringType)> for LineStringArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&GenericListArray, LineStringType)) -> GeoArrowResult { + let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?; + let geom_offsets = value.offsets(); + let nulls = value.nulls(); + + Ok(Self::new( + coords, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} +impl TryFrom<(&dyn Array, LineStringType)> for LineStringArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, LineStringType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected LineString DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for LineStringArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, LineStringType)> for LineStringArray { + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, LineStringType)) -> GeoArrowResult { + let mut_arr: LineStringBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl PartialEq for LineStringArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && self.coords == other.coords + } +} + +impl GeometryTypeId for LineStringArray { + const GEOMETRY_TYPE_OFFSET: i8 = 2; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use arrow_array::RecordBatch; +// use arrow_schema::Schema; +// use geo_traits::to_geo::ToGeoLineString; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::test::linestring; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [Some(linestring::ls0()), None, Some(linestring::ls1()), None]; +// let typ = +// LineStringType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr = LineStringBuilder::from_nullable_line_strings(&geoms, typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_line_string())); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!( +// geoms[i + 2], +// g.transpose().unwrap().map(|g| g.to_line_string()) +// ); +// } +// } +// } + +// #[test] +// fn geo_round_trip2() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geo_arr = linestring::array(coord_type, Dimension::XY); +// let geo_geoms = geo_arr +// .iter() +// .map(|x| x.transpose().unwrap().map(|g| g.to_line_string())) +// .collect::>(); + +// let typ = +// LineStringType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr2 = LineStringBuilder::from_nullable_line_strings(&geo_geoms, typ).finish(); +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = linestring::array(coord_type, dim); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: LineStringArray = +// (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: LineStringArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn partial_eq() { +// let arr1 = linestring::ls_array(CoordType::Interleaved); +// let arr2 = linestring::ls_array(CoordType::Separated); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } + +// #[test] +// fn test_validation_with_sliced_array() { +// let arr = linestring::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.slice(0, 1); + +// let back = LineStringArray::try_from(( +// sliced.to_array_ref().as_ref(), +// arr.extension_type().clone(), +// )) +// .unwrap(); +// assert_eq!(back.len(), 1); +// } + +// #[test] +// fn slice_then_go_through_arrow() { +// let arr = linestring::array(CoordType::Separated, Dimension::XY); +// let sliced_array = arr.slice(0, 1); + +// let ls_array: LineStringArray = ( +// sliced_array.to_array_ref().as_ref(), +// arr.extension_type().clone(), +// ) +// .try_into() +// .unwrap(); +// assert_eq!(ls_array.len(), 1); +// } + +// #[test] +// fn slice_back_from_arrow_rs_record_batch() { +// let arr = linestring::array(CoordType::Separated, Dimension::XY); +// let field = arr.extension_type().to_field("geometry", true); +// let schema = Schema::new(vec![field]); + +// let batch = RecordBatch::try_new(Arc::new(schema), vec![arr.to_array_ref()]).unwrap(); +// let sliced_batch = batch.slice(0, 1); + +// let array = sliced_batch.column(0); +// let field = sliced_batch.schema_ref().field(0); +// let ls_array: LineStringArray = (array.as_ref(), field).try_into().unwrap(); +// assert_eq!(ls_array.len(), 1); +// } + +// #[test] +// fn slice_back_from_arrow_rs_array() { +// let arr = linestring::array(CoordType::Separated, Dimension::XY); +// let field = arr.extension_type().to_field("geometry", true); + +// let array = arr.to_array_ref(); +// let sliced_array = array.slice(0, 1); + +// let ls_array: LineStringArray = (sliced_array.as_ref(), &field).try_into().unwrap(); +// assert_eq!(ls_array.len(), 1); +// } + +// #[test] +// fn slice_back_from_arrow_rs_array_with_nulls() { +// let arr = linestring::ls_array(CoordType::Separated); +// let field = arr.extension_type().to_field("geometry", true); + +// let array = arr.to_array_ref(); +// let sliced_array = array.slice(0, 1); + +// let ls_array: LineStringArray = (sliced_array.as_ref(), &field).try_into().unwrap(); +// assert_eq!(ls_array.len(), 1); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/mixed.rs b/src/geoarrow/geoarrow-array/src/array/mixed.rs new file mode 100644 index 0000000000..c142f7a1c6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/mixed.rs @@ -0,0 +1,712 @@ +use std::{collections::HashSet, sync::Arc}; + +use arrow_array::{Array, ArrayRef, UnionArray, cast::AsArray}; +use arrow_buffer::ScalarBuffer; +use arrow_schema::{DataType, UnionMode}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, GeometryCollectionType, LineStringType, + MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArrayAccessor, + array::{ + DimensionIndex, LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray, + PointArray, PolygonArray, + }, + builder::{ + LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, MultiPolygonBuilder, + PointBuilder, PolygonBuilder, + }, + capacity::MixedCapacity, + scalar::Geometry, + trait_::GeoArrowArray, +}; + +/// # Invariants +/// +/// - All arrays must have the same dimension +/// - All arrays must have the same coordinate layout (interleaved or separated) +/// +/// - 1: Point +/// - 2: LineString +/// - 3: Polygon +/// - 4: MultiPoint +/// - 5: MultiLineString +/// - 6: MultiPolygon +/// - 7: GeometryCollection +/// - 11: Point Z +/// - 12: LineString Z +/// - 13: Polygon Z +/// - 14: MultiPoint Z +/// - 15: MultiLineString Z +/// - 16: MultiPolygon Z +/// - 17: GeometryCollection Z +/// - 21: Point M +/// - 22: LineString M +/// - 23: Polygon M +/// - 24: MultiPoint M +/// - 25: MultiLineString M +/// - 26: MultiPolygon M +/// - 27: GeometryCollection M +/// - 31: Point ZM +/// - 32: LineString ZM +/// - 33: Polygon ZM +/// - 34: MultiPoint ZM +/// - 35: MultiLineString ZM +/// - 36: MultiPolygon ZM +/// - 37: GeometryCollection ZM +#[derive(Debug, Clone)] +pub struct MixedGeometryArray { + pub(crate) coord_type: CoordType, + pub(crate) dim: Dimension, + + /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not provided. + pub(crate) type_ids: ScalarBuffer, + + /// Invariant: `offsets.len() == type_ids.len()` + pub(crate) offsets: ScalarBuffer, + + /// Invariant: Any of these arrays that are `Some()` must have length >0 + pub(crate) points: PointArray, + pub(crate) line_strings: LineStringArray, + pub(crate) polygons: PolygonArray, + pub(crate) multi_points: MultiPointArray, + pub(crate) multi_line_strings: MultiLineStringArray, + pub(crate) multi_polygons: MultiPolygonArray, + + /// We don't need a separate slice_length, because that's the length of the full + /// MixedGeometryArray + slice_offset: usize, +} + +impl MixedGeometryArray { + /// Create a new MixedGeometryArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the validity is not `None` and its length is different from the number of geometries + /// - if the largest geometry offset does not match the number of coordinates + #[allow(clippy::too_many_arguments)] + pub fn new( + type_ids: ScalarBuffer, + offsets: ScalarBuffer, + points: Option, + line_strings: Option, + polygons: Option, + multi_points: Option, + multi_line_strings: Option, + multi_polygons: Option, + ) -> Self { + let mut coord_types = HashSet::new(); + if let Some(points) = &points { + coord_types.insert(points.data_type.coord_type()); + } + if let Some(line_strings) = &line_strings { + coord_types.insert(line_strings.data_type.coord_type()); + } + if let Some(polygons) = &polygons { + coord_types.insert(polygons.data_type.coord_type()); + } + if let Some(multi_points) = &multi_points { + coord_types.insert(multi_points.data_type.coord_type()); + } + if let Some(multi_line_strings) = &multi_line_strings { + coord_types.insert(multi_line_strings.data_type.coord_type()); + } + if let Some(multi_polygons) = &multi_polygons { + coord_types.insert(multi_polygons.data_type.coord_type()); + } + assert!(coord_types.len() <= 1); + let coord_type = coord_types + .into_iter() + .next() + .unwrap_or(CoordType::Interleaved); + + let mut dimensions = HashSet::new(); + if let Some(points) = &points { + dimensions.insert(points.data_type.dimension()); + } + if let Some(line_strings) = &line_strings { + dimensions.insert(line_strings.data_type.dimension()); + } + if let Some(polygons) = &polygons { + dimensions.insert(polygons.data_type.dimension()); + } + if let Some(multi_points) = &multi_points { + dimensions.insert(multi_points.data_type.dimension()); + } + if let Some(multi_line_strings) = &multi_line_strings { + dimensions.insert(multi_line_strings.data_type.dimension()); + } + if let Some(multi_polygons) = &multi_polygons { + dimensions.insert(multi_polygons.data_type.dimension()); + } + assert_eq!(dimensions.len(), 1); + let dim = dimensions.into_iter().next().unwrap(); + + Self { + coord_type, + dim, + type_ids, + offsets, + points: points.unwrap_or( + PointBuilder::new( + PointType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + line_strings: line_strings.unwrap_or( + LineStringBuilder::new( + LineStringType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + polygons: polygons.unwrap_or( + PolygonBuilder::new( + PolygonType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + multi_points: multi_points.unwrap_or( + MultiPointBuilder::new( + MultiPointType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + multi_line_strings: multi_line_strings.unwrap_or( + MultiLineStringBuilder::new( + MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + multi_polygons: multi_polygons.unwrap_or( + MultiPolygonBuilder::new( + MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type), + ) + .finish(), + ), + slice_offset: 0, + } + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> MixedCapacity { + MixedCapacity::new( + self.points.buffer_lengths(), + self.line_strings.buffer_lengths(), + self.polygons.buffer_lengths(), + self.multi_points.buffer_lengths(), + self.multi_line_strings.buffer_lengths(), + self.multi_polygons.buffer_lengths(), + ) + } + + /// Return `true` if this array has been sliced. + pub(crate) fn is_sliced(&self) -> bool { + // Note this is still not a valid check, because it could've been sliced with start 0 but + // length less than the full length. + // self.slice_offset > 0 || self.slice_length + + let mut child_lengths = 0; + child_lengths += self.points.len(); + child_lengths += self.line_strings.len(); + child_lengths += self.polygons.len(); + child_lengths += self.multi_points.len(); + child_lengths += self.multi_line_strings.len(); + child_lengths += self.multi_polygons.len(); + + child_lengths > self.len() + } + + pub fn has_points(&self) -> bool { + if self.points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == PointType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_line_strings(&self) -> bool { + if self.line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == LineStringType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_polygons(&self) -> bool { + if self.polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == PolygonType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_multi_points(&self) -> bool { + if self.multi_points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == MultiPointType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_multi_line_strings(&self) -> bool { + if self.multi_line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == MultiLineStringType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_multi_polygons(&self) -> bool { + if self.multi_polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == MultiPolygonType::GEOMETRY_TYPE_OFFSET { + return true; + } + } + + return false; + } + + true + } + + pub fn has_only_points(&self) -> bool { + self.has_points() + && !self.has_line_strings() + && !self.has_polygons() + && !self.has_multi_points() + && !self.has_multi_line_strings() + && !self.has_multi_polygons() + } + + pub fn has_only_line_strings(&self) -> bool { + !self.has_points() + && self.has_line_strings() + && !self.has_polygons() + && !self.has_multi_points() + && !self.has_multi_line_strings() + && !self.has_multi_polygons() + } + + pub fn has_only_polygons(&self) -> bool { + !self.has_points() + && !self.has_line_strings() + && self.has_polygons() + && !self.has_multi_points() + && !self.has_multi_line_strings() + && !self.has_multi_polygons() + } + + pub fn has_only_multi_points(&self) -> bool { + !self.has_points() + && !self.has_line_strings() + && !self.has_polygons() + && self.has_multi_points() + && !self.has_multi_line_strings() + && !self.has_multi_polygons() + } + + pub fn has_only_multi_line_strings(&self) -> bool { + !self.has_points() + && !self.has_line_strings() + && !self.has_polygons() + && !self.has_multi_points() + && self.has_multi_line_strings() + && !self.has_multi_polygons() + } + + pub fn has_only_multi_polygons(&self) -> bool { + !self.has_points() + && !self.has_line_strings() + && !self.has_polygons() + && !self.has_multi_points() + && !self.has_multi_line_strings() + && self.has_multi_polygons() + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + self.buffer_lengths().num_bytes(self.dim) + } + + /// Slice this [`MixedGeometryArray`]. + /// + /// # Implementation + /// + /// This operation is `O(F)` where `F` is the number of fields. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + coord_type: self.coord_type, + dim: self.dim, + type_ids: self.type_ids.slice(offset, length), + offsets: self.offsets.slice(offset, length), + points: self.points.clone(), + line_strings: self.line_strings.clone(), + polygons: self.polygons.clone(), + multi_points: self.multi_points.clone(), + multi_line_strings: self.multi_line_strings.clone(), + multi_polygons: self.multi_polygons.clone(), + slice_offset: self.slice_offset + offset, + } + } + + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + coord_type, + points: self.points.into_coord_type(coord_type), + line_strings: self.line_strings.into_coord_type(coord_type), + polygons: self.polygons.into_coord_type(coord_type), + multi_points: self.multi_points.into_coord_type(coord_type), + multi_line_strings: self.multi_line_strings.into_coord_type(coord_type), + multi_polygons: self.multi_polygons.into_coord_type(coord_type), + ..self + } + } + + pub fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + if self.has_points() { + types.insert(self.points.data_type()); + } + if self.has_line_strings() { + types.insert(self.line_strings.data_type()); + } + if self.has_polygons() { + types.insert(self.polygons.data_type()); + } + if self.has_multi_points() { + types.insert(self.multi_points.data_type()); + } + if self.has_multi_line_strings() { + types.insert(self.multi_line_strings.data_type()); + } + if self.has_multi_polygons() { + types.insert(self.multi_polygons.data_type()); + } + + types + } + + pub(crate) fn storage_type(&self) -> DataType { + match GeometryCollectionType::new(self.dim, Default::default()) + .with_coord_type(self.coord_type) + .data_type() + { + DataType::LargeList(inner_field) => inner_field.data_type().clone(), + _ => unreachable!(), + } + } + + pub(crate) fn into_array_ref(self) -> ArrayRef { + Arc::new(UnionArray::from(self)) + } + + #[inline] + fn len(&self) -> usize { + // Note that `type_ids` is sliced as usual, and thus always has the correct length. + self.type_ids.len() + } + + // Note: this is copied from ArrayAccessor because MixedGeometryArray doesn't implement + // GeoArrowArray + pub(crate) unsafe fn value_unchecked(&self, index: usize) -> Geometry<'_> { + let type_id = self.type_ids[index]; + let offset = self.offsets[index] as usize; + + let expect_msg = "native geometry value access should never error"; + match type_id % 10 { + PointType::GEOMETRY_TYPE_OFFSET => { + Geometry::Point(self.points.value(offset).expect(expect_msg)) + } + LineStringType::GEOMETRY_TYPE_OFFSET => { + Geometry::LineString(self.line_strings.value(offset).expect(expect_msg)) + } + PolygonType::GEOMETRY_TYPE_OFFSET => { + Geometry::Polygon(self.polygons.value(offset).expect(expect_msg)) + } + MultiPointType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiPoint(self.multi_points.value(offset).expect(expect_msg)) + } + MultiLineStringType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiLineString(self.multi_line_strings.value(offset).expect(expect_msg)) + } + MultiPolygonType::GEOMETRY_TYPE_OFFSET => { + Geometry::MultiPolygon(self.multi_polygons.value(offset).expect(expect_msg)) + } + GeometryCollectionType::GEOMETRY_TYPE_OFFSET => { + panic!("nested geometry collections not supported in GeoArrow") + } + _ => unreachable!("unknown type_id {}", type_id), + } + } + + // Note: this is copied from ArrayAccessor because MixedGeometryArray doesn't implement + // GeoArrowArray + pub(crate) fn value(&self, index: usize) -> Geometry<'_> { + assert!(index <= self.len()); + unsafe { self.value_unchecked(index) } + } +} + +impl From for UnionArray { + fn from(value: MixedGeometryArray) -> Self { + let union_fields = match value.storage_type() { + DataType::Union(union_fields, _) => union_fields, + _ => unreachable!(), + }; + + let child_arrays = vec![ + value.points.into_array_ref(), + value.line_strings.into_array_ref(), + value.polygons.into_array_ref(), + value.multi_points.into_array_ref(), + value.multi_line_strings.into_array_ref(), + value.multi_polygons.into_array_ref(), + ]; + + UnionArray::try_new( + union_fields, + value.type_ids, + Some(value.offsets), + child_arrays, + ) + .unwrap() + } +} + +impl TryFrom<(&UnionArray, Dimension, CoordType)> for MixedGeometryArray { + type Error = GeoArrowError; + + fn try_from( + (value, dim, coord_type): (&UnionArray, Dimension, CoordType), + ) -> GeoArrowResult { + let mut points: Option = None; + let mut line_strings: Option = None; + let mut polygons: Option = None; + let mut multi_points: Option = None; + let mut multi_line_strings: Option = None; + let mut multi_polygons: Option = None; + + match value.data_type() { + DataType::Union(fields, mode) => { + if !matches!(mode, UnionMode::Dense) { + return Err(GeoArrowError::InvalidGeoArrow( + "Expected dense union".to_string(), + )); + } + + for (type_id, _field) in fields.iter() { + let found_dimension = Dimension::from_order((type_id / 10) as _)?; + + if dim != found_dimension { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "expected dimension: {dim:?}, found child array with dimension {found_dimension:?} and type_id: {type_id}", + ))); + } + + match type_id % 10 { + PointType::GEOMETRY_TYPE_OFFSET => { + points = Some( + ( + value.child(type_id).as_ref(), + PointType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + LineStringType::GEOMETRY_TYPE_OFFSET => { + line_strings = Some( + ( + value.child(type_id).as_ref(), + LineStringType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + PolygonType::GEOMETRY_TYPE_OFFSET => { + polygons = Some( + ( + value.child(type_id).as_ref(), + PolygonType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + MultiPointType::GEOMETRY_TYPE_OFFSET => { + multi_points = Some( + ( + value.child(type_id).as_ref(), + MultiPointType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + MultiLineStringType::GEOMETRY_TYPE_OFFSET => { + multi_line_strings = Some( + ( + value.child(type_id).as_ref(), + MultiLineStringType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + MultiPolygonType::GEOMETRY_TYPE_OFFSET => { + multi_polygons = Some( + ( + value.child(type_id).as_ref(), + MultiPolygonType::new(dim, Default::default()) + .with_coord_type(coord_type), + ) + .try_into() + .unwrap(), + ); + } + _ => { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected type_id {type_id} when converting to MixedGeometryArray", + ))); + } + } + } + } + _ => { + return Err(GeoArrowError::InvalidGeoArrow( + "expected union type when converting to MixedGeometryArray".to_string(), + )); + } + }; + + let type_ids = value.type_ids().clone(); + // This is after checking for dense union + let offsets = value.offsets().unwrap().clone(); + + Ok(Self::new( + type_ids, + offsets, + points, + line_strings, + polygons, + multi_points, + multi_line_strings, + multi_polygons, + )) + } +} + +impl TryFrom<(&dyn Array, Dimension, CoordType)> for MixedGeometryArray { + type Error = GeoArrowError; + + fn try_from( + (value, dim, coord_type): (&dyn Array, Dimension, CoordType), + ) -> GeoArrowResult { + match value.data_type() { + DataType::Union(_, _) => (value.as_union(), dim, coord_type).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected MixedGeometryArray DataType: {dt:?}", + ))), + } + } +} + +impl PartialEq for MixedGeometryArray { + fn eq(&self, other: &Self) -> bool { + self.dim == other.dim + && self.type_ids == other.type_ids + && self.offsets == other.offsets + && self.points == other.points + && self.line_strings == other.line_strings + && self.polygons == other.polygons + && self.multi_points == other.multi_points + && self.multi_line_strings == other.multi_line_strings + && self.multi_polygons == other.multi_polygons + && self.slice_offset == other.slice_offset + } +} diff --git a/src/geoarrow/geoarrow-array/src/array/mod.rs b/src/geoarrow/geoarrow-array/src/array/mod.rs new file mode 100644 index 0000000000..0e86012a5c --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/mod.rs @@ -0,0 +1,117 @@ +//! The concrete array definitions. +//! +//! All arrays implement the core [GeoArrowArray] trait. + +mod coord; +mod geometry; +mod geometrycollection; +mod linestring; +mod mixed; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod rect; +mod wkb; +mod wkb_view; +mod wkt; +mod wkt_view; + +use std::sync::Arc; + +use arrow_array::Array; +use arrow_schema::Field; +pub use coord::{CoordBuffer, InterleavedCoordBuffer, SeparatedCoordBuffer}; +use geoarrow_schema::{GeoArrowType, error::GeoArrowResult}; +pub(crate) use geometry::DimensionIndex; +pub use geometry::GeometryArray; +pub use geometrycollection::GeometryCollectionArray; +pub use linestring::LineStringArray; +pub(crate) use mixed::MixedGeometryArray; +pub use multilinestring::MultiLineStringArray; +pub use multipoint::MultiPointArray; +pub use multipolygon::MultiPolygonArray; +pub use point::PointArray; +pub use polygon::PolygonArray; +pub use rect::RectArray; +pub use wkb::{GenericWkbArray, LargeWkbArray, WkbArray}; +pub use wkb_view::WkbViewArray; +pub use wkt::{GenericWktArray, LargeWktArray, WktArray}; +pub use wkt_view::WktViewArray; + +use crate::GeoArrowArray; + +/// Construct a new [GeoArrowArray] from an Arrow [Array] and [Field]. +pub fn from_arrow_array( + array: &dyn Array, + field: &Field, +) -> GeoArrowResult> { + use GeoArrowType::*; + + let geo_type = GeoArrowType::from_arrow_field(field)?; + let result: Arc = match geo_type { + Point(_) => Arc::new(PointArray::try_from((array, field))?), + LineString(_) => Arc::new(LineStringArray::try_from((array, field))?), + Polygon(_) => Arc::new(PolygonArray::try_from((array, field))?), + MultiPoint(_) => Arc::new(MultiPointArray::try_from((array, field))?), + MultiLineString(_) => Arc::new(MultiLineStringArray::try_from((array, field))?), + MultiPolygon(_) => Arc::new(MultiPolygonArray::try_from((array, field))?), + GeometryCollection(_) => Arc::new(GeometryCollectionArray::try_from((array, field))?), + Rect(_) => Arc::new(RectArray::try_from((array, field))?), + Geometry(_) => Arc::new(GeometryArray::try_from((array, field))?), + Wkb(_) => Arc::new(WkbArray::try_from((array, field))?), + LargeWkb(_) => Arc::new(LargeWkbArray::try_from((array, field))?), + WkbView(_) => Arc::new(WkbViewArray::try_from((array, field))?), + Wkt(_) => Arc::new(WktArray::try_from((array, field))?), + LargeWkt(_) => Arc::new(LargeWktArray::try_from((array, field))?), + WktView(_) => Arc::new(WktViewArray::try_from((array, field))?), + }; + Ok(result) +} + +// TODO: should we have an API to get the raw underlying string/&[u8] value? + +/// A trait for GeoArrow arrays that can hold WKB data. +/// +/// Currently three types are supported: +/// +/// - [`GenericWkbArray`] +/// - [`GenericWkbArray`] +/// - [`WkbViewArray`] +/// +/// This trait helps to abstract over the different types of WKB arrays so that we don’t need to +/// duplicate the implementation for each type. +/// +/// This is modeled after the upstream [`BinaryArrayType`][arrow_array::array::BinaryArrayType] +/// trait. +pub trait GenericWkbArrayType<'a>: + Sized + crate::GeoArrowArrayAccessor<'a, Item = ::wkb::reader::Wkb<'a>> +{ +} + +impl GenericWkbArrayType<'_> for GenericWkbArray {} +impl GenericWkbArrayType<'_> for GenericWkbArray {} +impl GenericWkbArrayType<'_> for WkbViewArray {} + +/// A trait for GeoArrow arrays that can hold WKT data. +/// +/// Currently three types are supported: +/// +/// - [`GenericWktArray`] +/// - [`GenericWktArray`] +/// - [`WktViewArray`] +/// +/// This trait helps to abstract over the different types of WKT arrays so that we don’t need to +/// duplicate the implementation for each type. +/// +/// This is modeled after the upstream [`StringArrayType`][arrow_array::array::StringArrayType] +/// trait. +pub trait GenericWktArrayType: + Sized + for<'a> crate::GeoArrowArrayAccessor<'a, Item = ::wkt::Wkt> +{ +} + +impl GenericWktArrayType for GenericWktArray {} +impl GenericWktArrayType for GenericWktArray {} +impl GenericWktArrayType for WktViewArray {} diff --git a/src/geoarrow/geoarrow-array/src/array/multilinestring.rs b/src/geoarrow/geoarrow-array/src/array/multilinestring.rs new file mode 100644 index 0000000000..4deb600ffa --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/multilinestring.rs @@ -0,0 +1,532 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, Metadata, MultiLineStringType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, GenericWkbArray, LineStringArray}, + builder::MultiLineStringBuilder, + capacity::MultiLineStringCapacity, + eq::offset_buffer_eq, + scalar::MultiLineString, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of MultiLineString geometries. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity +/// bitmap. +#[derive(Debug, Clone)] +pub struct MultiLineStringArray { + pub(crate) data_type: MultiLineStringType, + + pub(crate) coords: CoordBuffer, + + /// Offsets into the ring array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +pub(super) fn check( + coords: &CoordBuffer, + geom_offsets: &OffsetBuffer, + ring_offsets: &OffsetBuffer, + validity_len: Option, +) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) { + return Err(GeoArrowError::InvalidGeoArrow( + "nulls mask length must match the number of values".to_string(), + )); + } + + if *ring_offsets.last() as usize != coords.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest ring offset must match coords length".to_string(), + )); + } + + // Offset can be smaller than length if sliced + if *geom_offsets.last() as usize > ring_offsets.len_proxy() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest geometry offset must not be longer than ring offsets length".to_string(), + )); + } + + Ok(()) +} + +impl MultiLineStringArray { + /// Create a new MultiLineStringArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest geometry offset does not match the size of ring offsets + pub fn new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self::try_new(coords, geom_offsets, ring_offsets, nulls, metadata).unwrap() + } + + /// Create a new MultiLineStringArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest geometry offset does not match the size of ring offsets + pub fn try_new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check( + &coords, + &geom_offsets, + &ring_offsets, + nulls.as_ref().map(|v| v.len()), + )?; + Ok(Self { + data_type: MultiLineStringType::new(coords.dim(), metadata) + .with_coord_type(coords.coord_type()), + coords, + geom_offsets, + ring_offsets, + nulls, + }) + } + + fn vertices_field(&self) -> Arc { + Field::new("vertices", self.coords.storage_type(), false).into() + } + + fn linestrings_field(&self) -> Arc { + Field::new_large_list("linestrings", self.vertices_field(), false).into() + } + + /// Access the underlying coordinate buffer + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// Access the underlying geometry offsets buffer + pub fn geom_offsets(&self) -> &OffsetBuffer { + &self.geom_offsets + } + + /// Access the underlying ring offsets buffer + pub fn ring_offsets(&self) -> &OffsetBuffer { + &self.ring_offsets + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> MultiLineStringCapacity { + MultiLineStringCapacity::new( + *self.ring_offsets.last() as usize, + *self.geom_offsets.last() as usize, + self.len(), + ) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`MultiLineStringArray`]. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets + // would be in the wrong location. + Self { + data_type: self.data_type.clone(), + coords: self.coords.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + ring_offsets: self.ring_offsets.clone(), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for MultiLineStringArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::MultiLineString(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for MultiLineStringArray { + type Item = MultiLineString<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(MultiLineString::new( + &self.coords, + &self.geom_offsets, + &self.ring_offsets, + index, + )) + } +} + +impl IntoArrow for MultiLineStringArray { + type ArrowArray = GenericListArray; + type ExtensionType = MultiLineStringType; + + fn into_arrow(self) -> Self::ArrowArray { + let vertices_field = self.vertices_field(); + let linestrings_field = self.linestrings_field(); + let nulls = self.nulls; + let coord_array = self.coords.into_array_ref(); + let ring_array = Arc::new(GenericListArray::new( + vertices_field, + self.ring_offsets, + coord_array, + None, + )); + GenericListArray::new(linestrings_field, self.geom_offsets, ring_array, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, MultiLineStringType)> for MultiLineStringArray { + type Error = GeoArrowError; + + fn try_from( + (geom_array, typ): (&GenericListArray, MultiLineStringType), + ) -> GeoArrowResult { + let geom_offsets = offsets_buffer_i32_to_i64(geom_array.offsets()); + let nulls = geom_array.nulls(); + + let rings_dyn_array = geom_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = offsets_buffer_i32_to_i64(rings_array.offsets()); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, MultiLineStringType)> for MultiLineStringArray { + type Error = GeoArrowError; + + fn try_from( + (geom_array, typ): (&GenericListArray, MultiLineStringType), + ) -> GeoArrowResult { + let geom_offsets = geom_array.offsets(); + let nulls = geom_array.nulls(); + + let rings_dyn_array = geom_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = rings_array.offsets(); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, MultiLineStringType)> for MultiLineStringArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, MultiLineStringType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected MultiLineString DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for MultiLineStringArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, MultiLineStringType)> + for MultiLineStringArray +{ + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, MultiLineStringType)) -> GeoArrowResult { + let mut_arr: MultiLineStringBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl From for MultiLineStringArray { + fn from(value: LineStringArray) -> Self { + let (coord_type, dimension, metadata) = value.data_type.into_inner(); + let new_type = MultiLineStringType::new(dimension, metadata).with_coord_type(coord_type); + + let coords = value.coords; + let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]); + let ring_offsets = value.geom_offsets; + let nulls = value.nulls; + Self { + data_type: new_type, + coords, + geom_offsets, + ring_offsets, + nulls, + } + } +} + +impl PartialEq for MultiLineStringArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets) + && self.coords == other.coords + } +} + +impl GeometryTypeId for MultiLineStringArray { + const GEOMETRY_TYPE_OFFSET: i8 = 5; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoMultiLineString; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::test::multilinestring; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [ +// Some(multilinestring::ml0()), +// None, +// Some(multilinestring::ml1()), +// None, +// ]; +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()) +// .with_coord_type(coord_type); +// let geo_arr = +// MultiLineStringBuilder::from_nullable_multi_line_strings(&geoms, typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!( +// geoms[i], +// g.transpose().unwrap().map(|g| g.to_multi_line_string()) +// ); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!( +// geoms[i + 2], +// g.transpose().unwrap().map(|g| g.to_multi_line_string()) +// ); +// } +// } +// } + +// #[test] +// fn geo_round_trip2() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geo_arr = multilinestring::array(coord_type, Dimension::XY); +// let geo_geoms = geo_arr +// .iter() +// .map(|x| x.transpose().unwrap().map(|g| g.to_multi_line_string())) +// .collect::>(); + +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()) +// .with_coord_type(coord_type); +// let geo_arr2 = +// MultiLineStringBuilder::from_nullable_multi_line_strings(&geo_geoms, typ).finish(); +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = multilinestring::array(coord_type, dim); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: MultiLineStringArray = +// (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: MultiLineStringArray = +// (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn partial_eq() { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr1 = multilinestring::array(CoordType::Interleaved, dim); +// let arr2 = multilinestring::array(CoordType::Separated, dim); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } +// } + +// #[test] +// fn test_validation_with_sliced_array() { +// let arr = multilinestring::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.slice(0, 1); + +// let back = MultiLineStringArray::try_from(( +// sliced.to_array_ref().as_ref(), +// arr.extension_type().clone(), +// )) +// .unwrap(); +// assert_eq!(back.len(), 1); +// } + +// #[test] +// fn test_validation_with_array_sliced_by_arrow_rs() { +// let arr = multilinestring::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.to_array_ref().slice(0, 1); + +// let back = MultiLineStringArray::try_from((sliced.as_ref(), arr.extension_type().clone())) +// .unwrap(); +// assert_eq!(back.len(), 1); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/multipoint.rs b/src/geoarrow/geoarrow-array/src/array/multipoint.rs new file mode 100644 index 0000000000..56ada6cb7c --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/multipoint.rs @@ -0,0 +1,435 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, Metadata, MultiPointType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, GenericWkbArray, PointArray}, + builder::MultiPointBuilder, + capacity::MultiPointCapacity, + eq::offset_buffer_eq, + scalar::MultiPoint, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of MultiPoint geometries. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity +/// bitmap. +#[derive(Debug, Clone)] +pub struct MultiPointArray { + pub(crate) data_type: MultiPointType, + + pub(crate) coords: CoordBuffer, + + /// Offsets into the coordinate array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +pub(super) fn check( + coords: &CoordBuffer, + validity_len: Option, + geom_offsets: &OffsetBuffer, +) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) { + return Err(GeoArrowError::InvalidGeoArrow( + "nulls mask length must match the number of values".to_string(), + )); + } + + // Offset can be smaller than coords length if sliced + if *geom_offsets.last() as usize > coords.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest geometry offset must not be longer than coords length".to_string(), + )); + } + + Ok(()) +} + +impl MultiPointArray { + /// Create a new MultiPointArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest geometry offset does not match the number of coordinates + pub fn new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self::try_new(coords, geom_offsets, nulls, metadata).unwrap() + } + + /// Create a new MultiPointArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the geometry offsets do not match the number of coordinates + pub fn try_new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check(&coords, nulls.as_ref().map(|v| v.len()), &geom_offsets)?; + Ok(Self { + data_type: MultiPointType::new(coords.dim(), metadata) + .with_coord_type(coords.coord_type()), + coords, + geom_offsets, + nulls, + }) + } + + fn vertices_field(&self) -> Arc { + Field::new("points", self.coords.storage_type(), false).into() + } + + /// Access the underlying coord buffer + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// Access the underlying geometry offsets buffer + pub fn geom_offsets(&self) -> &OffsetBuffer { + &self.geom_offsets + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> MultiPointCapacity { + MultiPointCapacity::new(*self.geom_offsets.last() as usize, self.len()) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`MultiPointArray`]. + /// + /// # Implementation + /// + /// This operation is `O(1)` as it amounts to increasing a few ref counts. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets + // would be in the wrong location. + Self { + data_type: self.data_type.clone(), + coords: self.coords.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for MultiPointArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::MultiPoint(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for MultiPointArray { + type Item = MultiPoint<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(MultiPoint::new(&self.coords, &self.geom_offsets, index)) + } +} + +impl IntoArrow for MultiPointArray { + type ArrowArray = GenericListArray; + type ExtensionType = MultiPointType; + + fn into_arrow(self) -> Self::ArrowArray { + let vertices_field = self.vertices_field(); + let nulls = self.nulls; + let coord_array = self.coords.into(); + GenericListArray::new(vertices_field, self.geom_offsets, coord_array, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, MultiPointType)> for MultiPointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&GenericListArray, MultiPointType)) -> GeoArrowResult { + let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?; + let geom_offsets = offsets_buffer_i32_to_i64(value.offsets()); + let nulls = value.nulls(); + + Ok(Self::new( + coords, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, MultiPointType)> for MultiPointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&GenericListArray, MultiPointType)) -> GeoArrowResult { + let coords = CoordBuffer::from_arrow(value.values().as_ref(), typ.dimension())?; + let geom_offsets = value.offsets(); + let nulls = value.nulls(); + + Ok(Self::new( + coords, + geom_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, MultiPointType)> for MultiPointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, MultiPointType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected MultiPoint DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for MultiPointArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, MultiPointType)> for MultiPointArray { + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, MultiPointType)) -> GeoArrowResult { + let mut_arr: MultiPointBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl From for MultiPointArray { + fn from(value: PointArray) -> Self { + let (coord_type, dimension, metadata) = value.data_type.into_inner(); + let new_type = MultiPointType::new(dimension, metadata).with_coord_type(coord_type); + + let coords = value.coords; + let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]); + let nulls = value.nulls; + Self { + data_type: new_type, + coords, + geom_offsets, + nulls, + } + } +} + +impl PartialEq for MultiPointArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && self.coords == other.coords + } +} + +impl GeometryTypeId for MultiPointArray { + const GEOMETRY_TYPE_OFFSET: i8 = 4; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoMultiPoint; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::test::multipoint; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [Some(multipoint::mp0()), None, Some(multipoint::mp1()), None]; +// let typ = +// MultiPointType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr = MultiPointBuilder::from_nullable_multi_points(&geoms, typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_multi_point())); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!( +// geoms[i + 2], +// g.transpose().unwrap().map(|g| g.to_multi_point()) +// ); +// } +// } +// } + +// #[test] +// fn geo_round_trip2() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geo_arr = multipoint::array(coord_type, Dimension::XY); +// let geo_geoms = geo_arr +// .iter() +// .map(|x| x.transpose().unwrap().map(|g| g.to_multi_point())) +// .collect::>(); + +// let typ = +// MultiPointType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr2 = MultiPointBuilder::from_nullable_multi_points(&geo_geoms, typ).finish(); +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = multipoint::array(coord_type, dim); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: MultiPointArray = +// (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: MultiPointArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn partial_eq() { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr1 = multipoint::array(CoordType::Interleaved, dim); +// let arr2 = multipoint::array(CoordType::Separated, dim); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/multipolygon.rs b/src/geoarrow/geoarrow-array/src/array/multipolygon.rs new file mode 100644 index 0000000000..6b224ec0c2 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/multipolygon.rs @@ -0,0 +1,587 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, Metadata, MultiPolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, GenericWkbArray, PolygonArray}, + builder::MultiPolygonBuilder, + capacity::MultiPolygonCapacity, + eq::offset_buffer_eq, + scalar::MultiPolygon, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of MultiPolygon geometries. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity +/// bitmap. +#[derive(Debug, Clone)] +pub struct MultiPolygonArray { + pub(crate) data_type: MultiPolygonType, + + pub(crate) coords: CoordBuffer, + + /// Offsets into the polygon array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Offsets into the ring array where each polygon starts + pub(crate) polygon_offsets: OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +pub(super) fn check( + coords: &CoordBuffer, + geom_offsets: &OffsetBuffer, + polygon_offsets: &OffsetBuffer, + ring_offsets: &OffsetBuffer, + validity_len: Option, +) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) { + return Err(GeoArrowError::InvalidGeoArrow( + "nulls mask length must match the number of values".to_string(), + )); + } + + // Offset can be smaller than coords length if sliced + if *ring_offsets.last() as usize != coords.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest ring offset must match coords length".to_string(), + )); + } + + if *polygon_offsets.last() as usize != ring_offsets.len_proxy() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest polygon offset must match ring offsets length".to_string(), + )); + } + + if *geom_offsets.last() as usize > polygon_offsets.len_proxy() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest geometry offset must not be longer than polygon offsets length".to_string(), + )); + } + + Ok(()) +} + +impl MultiPolygonArray { + /// Create a new MultiPolygonArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest polygon offset does not match the size of ring offsets + /// - if the largest geometry offset does not match the size of polygon offsets + pub fn new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + polygon_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self::try_new( + coords, + geom_offsets, + polygon_offsets, + ring_offsets, + nulls, + metadata, + ) + .unwrap() + } + + /// Create a new MultiPolygonArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest polygon offset does not match the size of ring offsets + /// - if the largest geometry offset does not match the size of polygon offsets + pub fn try_new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + polygon_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check( + &coords, + &geom_offsets, + &polygon_offsets, + &ring_offsets, + nulls.as_ref().map(|v| v.len()), + )?; + Ok(Self { + data_type: MultiPolygonType::new(coords.dim(), metadata) + .with_coord_type(coords.coord_type()), + coords, + geom_offsets, + polygon_offsets, + ring_offsets, + nulls, + }) + } + + fn vertices_field(&self) -> Arc { + Field::new("vertices", self.coords.storage_type(), false).into() + } + + fn rings_field(&self) -> Arc { + let name = "rings"; + Field::new_large_list(name, self.vertices_field(), false).into() + } + + fn polygons_field(&self) -> Arc { + let name = "polygons"; + Field::new_large_list(name, self.rings_field(), false).into() + } + + /// Access the underlying coordinate buffer + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// Access the underlying geometry offsets buffer + pub fn geom_offsets(&self) -> &OffsetBuffer { + &self.geom_offsets + } + + /// Access the underlying polygon offsets buffer + pub fn polygon_offsets(&self) -> &OffsetBuffer { + &self.polygon_offsets + } + + /// Access the underlying ring offsets buffer + pub fn ring_offsets(&self) -> &OffsetBuffer { + &self.ring_offsets + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> MultiPolygonCapacity { + MultiPolygonCapacity::new( + *self.ring_offsets.last() as usize, + *self.polygon_offsets.last() as usize, + *self.geom_offsets.last() as usize, + self.len(), + ) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`MultiPolygonArray`]. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data. Otherwise the offsets + // would be in the wrong location. + Self { + data_type: self.data_type.clone(), + coords: self.coords.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + polygon_offsets: self.polygon_offsets.clone(), + ring_offsets: self.ring_offsets.clone(), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for MultiPolygonArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::MultiPolygon(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for MultiPolygonArray { + type Item = MultiPolygon<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(MultiPolygon::new( + &self.coords, + &self.geom_offsets, + &self.polygon_offsets, + &self.ring_offsets, + index, + )) + } +} + +impl IntoArrow for MultiPolygonArray { + type ArrowArray = GenericListArray; + type ExtensionType = MultiPolygonType; + + fn into_arrow(self) -> Self::ArrowArray { + let vertices_field = self.vertices_field(); + let rings_field = self.rings_field(); + let polygons_field = self.polygons_field(); + + let nulls = self.nulls; + let coord_array = ArrayRef::from(self.coords); + let ring_array = Arc::new(GenericListArray::new( + vertices_field, + self.ring_offsets, + coord_array, + None, + )); + let polygons_array = Arc::new(GenericListArray::new( + rings_field, + self.polygon_offsets, + ring_array, + None, + )); + + GenericListArray::new(polygons_field, self.geom_offsets, polygons_array, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, MultiPolygonType)> for MultiPolygonArray { + type Error = GeoArrowError; + + fn try_from( + (geom_array, typ): (&GenericListArray, MultiPolygonType), + ) -> GeoArrowResult { + let geom_offsets = offsets_buffer_i32_to_i64(geom_array.offsets()); + let nulls = geom_array.nulls(); + + let polygons_dyn_array = geom_array.values(); + let polygons_array = polygons_dyn_array.as_list::(); + + let polygon_offsets = offsets_buffer_i32_to_i64(polygons_array.offsets()); + let rings_dyn_array = polygons_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = offsets_buffer_i32_to_i64(rings_array.offsets()); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + polygon_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, MultiPolygonType)> for MultiPolygonArray { + type Error = GeoArrowError; + + fn try_from( + (geom_array, typ): (&GenericListArray, MultiPolygonType), + ) -> GeoArrowResult { + let geom_offsets = geom_array.offsets(); + let nulls = geom_array.nulls(); + + let polygons_dyn_array = geom_array.values(); + let polygons_array = polygons_dyn_array.as_list::(); + + let polygon_offsets = polygons_array.offsets(); + let rings_dyn_array = polygons_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = rings_array.offsets(); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + polygon_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, MultiPolygonType)> for MultiPolygonArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, MultiPolygonType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected MultiPolygon DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for MultiPolygonArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, MultiPolygonType)> for MultiPolygonArray { + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, MultiPolygonType)) -> GeoArrowResult { + let mut_arr: MultiPolygonBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl From for MultiPolygonArray { + fn from(value: PolygonArray) -> Self { + let (coord_type, dimension, metadata) = value.data_type.into_inner(); + let new_type = MultiPolygonType::new(dimension, metadata).with_coord_type(coord_type); + + let coords = value.coords; + let geom_offsets = OffsetBuffer::from_lengths(vec![1; coords.len()]); + let ring_offsets = value.ring_offsets; + let polygon_offsets = value.geom_offsets; + let nulls = value.nulls; + Self { + data_type: new_type, + coords, + geom_offsets, + polygon_offsets, + ring_offsets, + nulls, + } + } +} + +impl PartialEq for MultiPolygonArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && offset_buffer_eq(&self.polygon_offsets, &other.polygon_offsets) + && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets) + && self.coords == other.coords + } +} + +impl GeometryTypeId for MultiPolygonArray { + const GEOMETRY_TYPE_OFFSET: i8 = 6; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoMultiPolygon; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::test::multipolygon; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [ +// Some(multipolygon::mp0()), +// None, +// Some(multipolygon::mp1()), +// None, +// ]; +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()) +// .with_coord_type(coord_type); +// let geo_arr = MultiPolygonBuilder::from_nullable_multi_polygons(&geoms, typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!( +// geoms[i], +// g.transpose().unwrap().map(|g| g.to_multi_polygon()) +// ); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!( +// geoms[i + 2], +// g.transpose().unwrap().map(|g| g.to_multi_polygon()) +// ); +// } +// } +// } + +// #[test] +// fn geo_round_trip2() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geo_arr = multipolygon::array(coord_type, Dimension::XY); +// let geo_geoms = geo_arr +// .iter() +// .map(|x| x.transpose().unwrap().map(|g| g.to_multi_polygon())) +// .collect::>(); + +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()) +// .with_coord_type(coord_type); +// let geo_arr2 = +// MultiPolygonBuilder::from_nullable_multi_polygons(&geo_geoms, typ).finish(); +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = multipolygon::array(coord_type, dim); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: MultiPolygonArray = +// (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: MultiPolygonArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn partial_eq() { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr1 = multipolygon::array(CoordType::Interleaved, dim); +// let arr2 = multipolygon::array(CoordType::Separated, dim); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } +// } + +// #[test] +// fn test_validation_with_sliced_array() { +// let arr = multipolygon::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.slice(0, 1); + +// let back = MultiPolygonArray::try_from(( +// sliced.to_array_ref().as_ref(), +// arr.extension_type().clone(), +// )) +// .unwrap(); +// assert_eq!(back.len(), 1); +// } + +// #[test] +// fn test_validation_with_array_sliced_by_arrow_rs() { +// let arr = multipolygon::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.to_array_ref().slice(0, 1); + +// let back = +// MultiPolygonArray::try_from((sliced.as_ref(), arr.extension_type().clone())).unwrap(); +// assert_eq!(back.len(), 1); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/point.rs b/src/geoarrow/geoarrow-array/src/array/point.rs new file mode 100644 index 0000000000..3dd9462cab --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/point.rs @@ -0,0 +1,398 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, FixedSizeListArray, StructArray, cast::AsArray}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, Metadata, PointType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, InterleavedCoordBuffer, SeparatedCoordBuffer}, + eq::point_eq, + scalar::Point, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, +}; + +/// An immutable array of Point geometries. +/// +/// All points must have the same dimension. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity bitmap. +#[derive(Debug, Clone)] +pub struct PointArray { + pub(crate) data_type: PointType, + pub(crate) coords: CoordBuffer, + pub(crate) nulls: Option, +} + +/// Perform checks: +/// +/// - Validity mask must have the same length as the coordinates. +pub(super) fn check(coords: &CoordBuffer, validity_len: Option) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != coords.len()) { + return Err(GeoArrowError::InvalidGeoArrow( + "validity mask length must match the number of values".to_string(), + )); + } + + Ok(()) +} + +impl PointArray { + /// Create a new PointArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the validity is not `None` and its length is different from the number of geometries + pub fn new(coords: CoordBuffer, validity: Option, metadata: Arc) -> Self { + Self::try_new(coords, validity, metadata).unwrap() + } + + /// Create a new PointArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + pub fn try_new( + coords: CoordBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check(&coords, nulls.as_ref().map(|v| v.len()))?; + Ok(Self { + data_type: PointType::new(coords.dim(), metadata).with_coord_type(coords.coord_type()), + coords, + nulls, + }) + } + + /// Access the underlying coordinate buffer + /// + /// Note that some coordinates may be null, depending on the value of [`Self::logical_nulls`] + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> usize { + self.len() + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let dimension = self.data_type.dimension(); + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths() * dimension.size() * 8 + } + + /// Slice this [`PointArray`]. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + data_type: self.data_type.clone(), + coords: self.coords.slice(offset, length), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for PointArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + self.into_arrow() + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.coords.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::Point(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for PointArray { + type Item = Point<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(Point::new(&self.coords, index)) + } +} + +impl IntoArrow for PointArray { + type ArrowArray = ArrayRef; + type ExtensionType = PointType; + + fn into_arrow(self) -> Self::ArrowArray { + let validity = self.nulls; + let dim = self.coords.dim(); + match self.coords { + CoordBuffer::Interleaved(c) => Arc::new(FixedSizeListArray::new( + c.values_field().into(), + dim.size() as i32, + Arc::new(c.values_array()), + validity, + )), + CoordBuffer::Separated(c) => { + let fields = c.values_field(); + Arc::new(StructArray::new(fields.into(), c.values_array(), validity)) + } + } + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&FixedSizeListArray, PointType)> for PointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&FixedSizeListArray, PointType)) -> GeoArrowResult { + let interleaved_coords = InterleavedCoordBuffer::from_arrow(value, typ.dimension())?; + + Ok(Self::new( + CoordBuffer::Interleaved(interleaved_coords), + value.nulls().cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&StructArray, PointType)> for PointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&StructArray, PointType)) -> GeoArrowResult { + let validity = value.nulls(); + let separated_coords = SeparatedCoordBuffer::from_arrow(value, typ.dimension())?; + Ok(Self::new( + CoordBuffer::Separated(separated_coords), + validity.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, PointType)> for PointArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, PointType)) -> GeoArrowResult { + match value.data_type() { + DataType::FixedSizeList(_, _) => (value.as_fixed_size_list(), typ).try_into(), + DataType::Struct(_) => (value.as_struct(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected Point DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for PointArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +// Implement a custom PartialEq for PointArray to allow Point(EMPTY) comparisons, which is stored +// as (NaN, NaN). By default, these resolve to false +impl PartialEq for PointArray { + fn eq(&self, other: &Self) -> bool { + if self.nulls != other.nulls { + return false; + } + + if self.coords.len() != other.coords.len() { + return false; + } + + for point_idx in 0..self.len() { + let p1 = self.get(point_idx).unwrap(); + let p2 = other.get(point_idx).unwrap(); + match (p1, p2) { + (Some(p1), Some(p2)) => { + if !point_eq(&p1, &p2) { + return false; + } + } + (None, None) => continue, + _ => return false, + } + } + + true + } +} + +impl GeometryTypeId for PointArray { + const GEOMETRY_TYPE_OFFSET: i8 = 1; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoPoint; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::{builder::PointBuilder, test::point}; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [ +// Some(point::p0()), +// Some(point::p1()), +// None, +// Some(point::p2()), +// ]; +// let typ = PointType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr = +// PointBuilder::from_nullable_points(geoms.iter().map(|x| x.as_ref()), typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_point())); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!(geoms[i + 2], g.transpose().unwrap().map(|g| g.to_point())); +// } +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = point::array(coord_type, dim); + +// let point_type = geo_arr.extension_type().clone(); +// let field = point_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: PointArray = (arrow_arr.as_ref(), point_type).try_into().unwrap(); +// let geo_arr3: PointArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn into_coord_type() { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = point::array(CoordType::Interleaved, dim); +// let geo_arr2 = geo_arr +// .clone() +// .into_coord_type(CoordType::Separated) +// .into_coord_type(CoordType::Interleaved); + +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn partial_eq() { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr1 = point::array(CoordType::Interleaved, dim); +// let arr2 = point::array(CoordType::Separated, dim); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/polygon.rs b/src/geoarrow/geoarrow-array/src/array/polygon.rs new file mode 100644 index 0000000000..32003daa9b --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/polygon.rs @@ -0,0 +1,515 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, cast::AsArray}; +use arrow_buffer::{NullBuffer, OffsetBuffer}; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + CoordType, Dimension, GeoArrowType, Metadata, PolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + array::{CoordBuffer, GenericWkbArray, RectArray}, + builder::PolygonBuilder, + capacity::PolygonCapacity, + eq::offset_buffer_eq, + scalar::Polygon, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{OffsetBufferUtils, offsets_buffer_i32_to_i64}, +}; + +/// An immutable array of Polygon geometries using GeoArrow's in-memory representation. +/// +/// All polygons must have the same dimension. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity bitmap. +#[derive(Debug, Clone)] +// #[derive(Debug, Clone, PartialEq)] +pub struct PolygonArray { + pub(crate) data_type: PolygonType, + + pub(crate) coords: CoordBuffer, + + /// Offsets into the ring array where each geometry starts + pub(crate) geom_offsets: OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetBuffer, + + /// Validity bitmap + pub(crate) nulls: Option, +} + +pub(super) fn check( + coords: &CoordBuffer, + geom_offsets: &OffsetBuffer, + ring_offsets: &OffsetBuffer, + validity_len: Option, +) -> GeoArrowResult<()> { + if validity_len.is_some_and(|len| len != geom_offsets.len_proxy()) { + return Err(GeoArrowError::InvalidGeoArrow( + "nulls mask length must match the number of values".to_string(), + )); + } + + if *ring_offsets.last() as usize != coords.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest ring offset must match coords length".to_string(), + )); + } + + // Offset can be smaller than length if sliced + if *geom_offsets.last() as usize > ring_offsets.len_proxy() { + return Err(GeoArrowError::InvalidGeoArrow( + "largest geometry offset must not be longer than ring offsets length".to_string(), + )); + } + + Ok(()) +} + +impl PolygonArray { + /// Create a new PolygonArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest geometry offset does not match the size of ring offsets + pub fn new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + Self::try_new(coords, geom_offsets, ring_offsets, nulls, metadata).unwrap() + } + + /// Create a new PolygonArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the nulls is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest geometry offset does not match the size of ring offsets + pub fn try_new( + coords: CoordBuffer, + geom_offsets: OffsetBuffer, + ring_offsets: OffsetBuffer, + nulls: Option, + metadata: Arc, + ) -> GeoArrowResult { + check( + &coords, + &geom_offsets, + &ring_offsets, + nulls.as_ref().map(|v| v.len()), + )?; + Ok(Self { + data_type: PolygonType::new(coords.dim(), metadata) + .with_coord_type(coords.coord_type()), + coords, + geom_offsets, + ring_offsets, + nulls, + }) + } + + fn vertices_field(&self) -> Arc { + Field::new("vertices", self.coords.storage_type(), false).into() + } + + fn rings_field(&self) -> Arc { + let name = "rings"; + Field::new_large_list(name, self.vertices_field(), false).into() + } + + /// Access the underlying coordinate buffer + pub fn coords(&self) -> &CoordBuffer { + &self.coords + } + + /// Access the underlying geometry offsets buffer + pub fn geom_offsets(&self) -> &OffsetBuffer { + &self.geom_offsets + } + + /// Access the underlying ring offsets buffer + pub fn ring_offsets(&self) -> &OffsetBuffer { + &self.ring_offsets + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> PolygonCapacity { + PolygonCapacity::new( + *self.ring_offsets.last() as usize, + *self.geom_offsets.last() as usize, + self.len(), + ) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self.nulls.as_ref().map(|v| v.buffer().len()).unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes(self.data_type.dimension()) + } + + /// Slice this [`PolygonArray`]. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + // Note: we **only** slice the geom_offsets and not any actual data or other offsets. + // Otherwise the offsets would be in the wrong location. + Self { + data_type: self.data_type.clone(), + coords: self.coords.clone(), + geom_offsets: self.geom_offsets.slice(offset, length), + ring_offsets: self.ring_offsets.clone(), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`CoordType`] of this array. + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self { + data_type: self.data_type.with_coord_type(coord_type), + coords: self.coords.into_coord_type(coord_type), + ..self + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for PolygonArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::Polygon(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for PolygonArray { + type Item = Polygon<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(Polygon::new( + &self.coords, + &self.geom_offsets, + &self.ring_offsets, + index, + )) + } +} + +impl IntoArrow for PolygonArray { + type ArrowArray = GenericListArray; + type ExtensionType = PolygonType; + + fn into_arrow(self) -> Self::ArrowArray { + let vertices_field = self.vertices_field(); + let rings_field = self.rings_field(); + let nulls = self.nulls; + let coord_array = self.coords.into(); + let ring_array = Arc::new(GenericListArray::new( + vertices_field, + self.ring_offsets, + coord_array, + None, + )); + GenericListArray::new(rings_field, self.geom_offsets, ring_array, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&GenericListArray, PolygonType)> for PolygonArray { + type Error = GeoArrowError; + + fn try_from((geom_array, typ): (&GenericListArray, PolygonType)) -> GeoArrowResult { + let geom_offsets = offsets_buffer_i32_to_i64(geom_array.offsets()); + let nulls = geom_array.nulls(); + + let rings_dyn_array = geom_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = offsets_buffer_i32_to_i64(rings_array.offsets()); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&GenericListArray, PolygonType)> for PolygonArray { + type Error = GeoArrowError; + + fn try_from((geom_array, typ): (&GenericListArray, PolygonType)) -> GeoArrowResult { + let geom_offsets = geom_array.offsets(); + let nulls = geom_array.nulls(); + + let rings_dyn_array = geom_array.values(); + let rings_array = rings_dyn_array.as_list::(); + + let ring_offsets = rings_array.offsets(); + let coords = CoordBuffer::from_arrow(rings_array.values().as_ref(), typ.dimension())?; + + Ok(Self::new( + coords, + geom_offsets.clone(), + ring_offsets.clone(), + nulls.cloned(), + typ.metadata().clone(), + )) + } +} +impl TryFrom<(&dyn Array, PolygonType)> for PolygonArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, PolygonType)) -> GeoArrowResult { + match value.data_type() { + DataType::List(_) => (value.as_list::(), typ).try_into(), + DataType::LargeList(_) => (value.as_list::(), typ).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected Polygon DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for PolygonArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl TryFrom<(GenericWkbArray, PolygonType)> for PolygonArray { + type Error = GeoArrowError; + + fn try_from(value: (GenericWkbArray, PolygonType)) -> GeoArrowResult { + let mut_arr: PolygonBuilder = value.try_into()?; + Ok(mut_arr.finish()) + } +} + +impl From for PolygonArray { + fn from(value: RectArray) -> Self { + let polygon_type = PolygonType::new( + value.data_type.dimension(), + value.data_type.metadata().clone(), + ) + .with_coord_type(CoordType::Separated); + + // The number of output geoms is the same as the input + let geom_capacity = value.len(); + + // Each output polygon is a simple polygon with only one ring + let ring_capacity = geom_capacity; + + // Each output polygon has exactly 5 coordinates + // Don't reserve capacity for null entries + let coord_capacity = (value.len() - value.logical_null_count()) * 5; + + let capacity = PolygonCapacity::new(coord_capacity, ring_capacity, geom_capacity); + let mut output_array = PolygonBuilder::with_capacity(polygon_type, capacity); + + value.iter().for_each(|maybe_g| { + output_array + .push_rect(maybe_g.transpose().unwrap().as_ref()) + .unwrap() + }); + + output_array.finish() + } +} + +impl PartialEq for PolygonArray { + fn eq(&self, other: &Self) -> bool { + self.nulls == other.nulls + && offset_buffer_eq(&self.geom_offsets, &other.geom_offsets) + && offset_buffer_eq(&self.ring_offsets, &other.ring_offsets) + && self.coords == other.coords + } +} + +impl GeometryTypeId for PolygonArray { + const GEOMETRY_TYPE_OFFSET: i8 = 3; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoPolygon; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::test::polygon; + +// #[test] +// fn geo_round_trip() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geoms = [Some(polygon::p0()), None, Some(polygon::p1()), None]; +// let typ = +// PolygonType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr = PolygonBuilder::from_nullable_polygons(&geoms, typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_polygon())); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!(geoms[i + 2], g.transpose().unwrap().map(|g| g.to_polygon())); +// } +// } +// } + +// #[test] +// fn geo_round_trip2() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let geo_arr = polygon::array(coord_type, Dimension::XY); +// let geo_geoms = geo_arr +// .iter() +// .map(|x| x.transpose().unwrap().map(|g| g.to_polygon())) +// .collect::>(); + +// let typ = +// PolygonType::new(Dimension::XY, Default::default()).with_coord_type(coord_type); +// let geo_arr2 = PolygonBuilder::from_nullable_polygons(&geo_geoms, typ).finish(); +// assert_eq!(geo_arr, geo_arr2); +// } +// } + +// #[test] +// fn try_from_arrow() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let geo_arr = polygon::array(coord_type, dim); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: PolygonArray = +// (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: PolygonArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } +// } +// } + +// #[test] +// fn partial_eq() { +// let arr1 = polygon::p_array(CoordType::Interleaved); +// let arr2 = polygon::p_array(CoordType::Separated); +// assert_eq!(arr1, arr1); +// assert_eq!(arr2, arr2); +// assert_eq!(arr1, arr2); + +// assert_ne!(arr1, arr2.slice(0, 2)); +// } + +// #[test] +// fn test_validation_with_sliced_array() { +// let arr = polygon::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.slice(0, 1); + +// let back = +// PolygonArray::try_from((sliced.to_array_ref().as_ref(), arr.extension_type().clone())) +// .unwrap(); +// assert_eq!(back.len(), 1); +// } + +// #[test] +// fn test_validation_with_array_sliced_by_arrow_rs() { +// let arr = polygon::array(CoordType::Interleaved, Dimension::XY); +// let sliced = arr.to_array_ref().slice(0, 1); + +// let back = PolygonArray::try_from((sliced.as_ref(), arr.extension_type().clone())).unwrap(); +// assert_eq!(back.len(), 1); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/rect.rs b/src/geoarrow/geoarrow-array/src/array/rect.rs new file mode 100644 index 0000000000..9c025573b9 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/rect.rs @@ -0,0 +1,299 @@ +use std::sync::Arc; + +use arrow_array::{Array, ArrayRef, StructArray, cast::AsArray, types::Float64Type}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + BoxType, GeoArrowType, Metadata, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{ + array::SeparatedCoordBuffer, + scalar::Rect, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, +}; + +/// An immutable array of Rect or Box geometries. +/// +/// A rect is an axis-aligned bounded rectangle whose area is defined by minimum and maximum +/// coordinates. +/// +/// All rects must have the same dimension. +/// +/// This is **not** an array type defined by the GeoArrow specification (as of spec version 0.1) +/// but is included here for parity with georust/geo, and to save memory for the output of +/// `bounds()`. +/// +/// Internally this is implemented as a FixedSizeList, laid out as minx, miny, maxx, maxy. +#[derive(Debug, Clone)] +pub struct RectArray { + pub(crate) data_type: BoxType, + + /// Separated arrays for each of the "lower" dimensions + lower: SeparatedCoordBuffer, + + /// Separated arrays for each of the "upper" dimensions + upper: SeparatedCoordBuffer, + + nulls: Option, +} + +impl RectArray { + /// Construct a new [`RectArray`] from parts + pub fn new( + lower: SeparatedCoordBuffer, + upper: SeparatedCoordBuffer, + nulls: Option, + metadata: Arc, + ) -> Self { + assert_eq!(lower.dim(), upper.dim()); + Self { + data_type: BoxType::new(lower.dim(), metadata), + lower, + upper, + nulls, + } + } + + /// Access the coordinate buffer of the "lower" corner of the RectArray + /// + /// Note that this needs to be interpreted in conjunction with the [null + /// buffer][Self::logical_nulls]. + pub fn lower(&self) -> &SeparatedCoordBuffer { + &self.lower + } + + /// Access the coordinate buffer of the "upper" corner of the RectArray + /// + /// Note that this needs to be interpreted in conjunction with the [null + /// buffer][Self::logical_nulls]. + pub fn upper(&self) -> &SeparatedCoordBuffer { + &self.upper + } + + /// Slice this [`RectArray`]. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + + Self { + data_type: self.data_type.clone(), + lower: self.lower().slice(offset, length), + upper: self.upper().slice(offset, length), + nulls: self.nulls.as_ref().map(|v| v.slice(offset, length)), + } + } + + /// Change the [`Metadata`] of this array. + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { + data_type: self.data_type.with_metadata(metadata), + ..self + } + } +} + +impl GeoArrowArray for RectArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.lower.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.nulls.clone() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.nulls.as_ref().map(|v| v.null_count()).unwrap_or(0) + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.nulls + .as_ref() + .map(|n| n.is_null(i)) + .unwrap_or_default() + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::Rect(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for RectArray { + type Item = Rect<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + Ok(Rect::new(&self.lower, &self.upper, index)) + } +} + +impl IntoArrow for RectArray { + type ArrowArray = StructArray; + type ExtensionType = BoxType; + + fn into_arrow(self) -> Self::ArrowArray { + let fields = match self.data_type.data_type() { + DataType::Struct(fields) => fields, + _ => unreachable!(), + }; + + let mut arrays: Vec = vec![]; + + // values_array takes care of the correct number of dimensions + arrays.extend_from_slice(self.lower.values_array().as_slice()); + arrays.extend_from_slice(self.upper.values_array().as_slice()); + + let nulls = self.nulls; + StructArray::new(fields, arrays, nulls) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl TryFrom<(&StructArray, BoxType)> for RectArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&StructArray, BoxType)) -> GeoArrowResult { + let dim = typ.dimension(); + let nulls = value.nulls(); + let columns = value.columns(); + if columns.len() != dim.size() * 2 { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Invalid number of columns for RectArray: expected {} but got {}", + dim.size() * 2, + columns.len() + ))); + } + + let lower = columns[0..dim.size()] + .iter() + .map(|c| c.as_primitive::().values().clone()) + .collect::>(); + let lower = SeparatedCoordBuffer::from_vec(lower, dim)?; + + let upper = columns[dim.size()..] + .iter() + .map(|c| c.as_primitive::().values().clone()) + .collect::>(); + let upper = SeparatedCoordBuffer::from_vec(upper, dim)?; + + Ok(Self::new( + lower, + upper, + nulls.cloned(), + typ.metadata().clone(), + )) + } +} + +impl TryFrom<(&dyn Array, BoxType)> for RectArray { + type Error = GeoArrowError; + + fn try_from((value, dim): (&dyn Array, BoxType)) -> GeoArrowResult { + match value.data_type() { + DataType::Struct(_) => (value.as_struct(), dim).try_into(), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected Rect DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for RectArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field.try_extension_type::()?; + (arr, typ).try_into() + } +} + +impl PartialEq for RectArray { + fn eq(&self, other: &Self) -> bool { + // A naive implementation of PartialEq would check for buffer equality. This won't always + // work for null elements where the actual value can be undefined and doesn't have to be + // equal. As such, it's simplest to reuse the upstream PartialEq impl, especially since + // RectArray only has one coordinate type. + self.clone().into_arrow() == other.clone().into_arrow() + } +} + +// #[cfg(test)] +// mod test { +// use geo_traits::to_geo::ToGeoRect; +// use geoarrow_schema::Dimension; + +// use super::*; +// use crate::{builder::RectBuilder, test::rect}; + +// #[test] +// fn geo_round_trip() { +// let geoms = [Some(rect::r0()), None, Some(rect::r1()), None]; +// let typ = BoxType::new(Dimension::XY, Default::default()); +// let geo_arr = +// RectBuilder::from_nullable_rects(geoms.iter().map(|x| x.as_ref()), typ).finish(); + +// for (i, g) in geo_arr.iter().enumerate() { +// assert_eq!(geoms[i], g.transpose().unwrap().map(|g| g.to_rect())); +// } + +// // Test sliced +// for (i, g) in geo_arr.slice(2, 2).iter().enumerate() { +// assert_eq!(geoms[i + 2], g.transpose().unwrap().map(|g| g.to_rect())); +// } +// } + +// #[test] +// fn try_from_arrow() { +// let geo_arr = rect::r_array(); + +// let extension_type = geo_arr.extension_type().clone(); +// let field = extension_type.to_field("geometry", true); + +// let arrow_arr = geo_arr.to_array_ref(); + +// let geo_arr2: RectArray = (arrow_arr.as_ref(), extension_type).try_into().unwrap(); +// let geo_arr3: RectArray = (arrow_arr.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(geo_arr, geo_arr2); +// assert_eq!(geo_arr, geo_arr3); +// } + +// #[test] +// fn partial_eq() { +// let arr1 = rect::r_array(); +// assert_eq!(arr1, arr1); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/wkb.rs b/src/geoarrow/geoarrow-array/src/array/wkb.rs new file mode 100644 index 0000000000..242daedbf9 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/wkb.rs @@ -0,0 +1,367 @@ +use std::sync::Arc; + +use arrow_array::{ + Array, ArrayRef, BinaryArray, GenericBinaryArray, LargeBinaryArray, OffsetSizeTrait, + builder::GenericByteBuilder, cast::AsArray, +}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + GeoArrowType, Metadata, WkbType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkb::reader::Wkb; + +use crate::{ + array::WkbViewArray, + capacity::WkbCapacity, + trait_::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow}, + util::{offsets_buffer_i32_to_i64, offsets_buffer_i64_to_i32}, +}; + +/// An immutable array of WKB geometries. +/// +/// This is stored either as an Arrow [`BinaryArray`] or [`LargeBinaryArray`] and is semantically +/// equivalent to `Vec>` due to the internal validity bitmap. +/// +/// Refer to [`crate::cast`] for converting this array to other GeoArrow array types. +#[derive(Debug, Clone, PartialEq)] +pub struct GenericWkbArray { + pub(crate) data_type: WkbType, + pub(crate) array: GenericBinaryArray, +} + +// Implement geometry accessors +impl GenericWkbArray { + /// Create a new GenericWkbArray from a BinaryArray + pub fn new(array: GenericBinaryArray, metadata: Arc) -> Self { + Self { + data_type: WkbType::new(metadata), + array, + } + } + + /// Returns true if the array is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Access the underlying binary array. + pub fn inner(&self) -> &GenericBinaryArray { + &self.array + } + + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> WkbCapacity { + WkbCapacity::new( + self.array.offsets().last().unwrap().to_usize().unwrap(), + self.len(), + ) + } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + let validity_len = self + .array + .nulls() + .as_ref() + .map(|v| v.buffer().len()) + .unwrap_or(0); + validity_len + self.buffer_lengths().num_bytes::() + } + + /// Slice this [`GenericWkbArray`]. + /// + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + array: self.array.slice(offset, length), + data_type: self.data_type.clone(), + } + } + + /// Replace the [Metadata] in the array with the given metadata + pub fn with_metadata(&self, metadata: Arc) -> Self { + let mut arr = self.clone(); + arr.data_type = self.data_type.clone().with_metadata(metadata); + arr + } +} + +impl GeoArrowArray for GenericWkbArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.array.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.array.logical_nulls() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.array.logical_null_count() + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.array.is_null(i) + } + + fn data_type(&self) -> GeoArrowType { + if O::IS_LARGE { + GeoArrowType::LargeWkb(self.data_type.clone()) + } else { + GeoArrowType::Wkb(self.data_type.clone()) + } + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } +} + +impl<'a, O: OffsetSizeTrait> GeoArrowArrayAccessor<'a> for GenericWkbArray { + type Item = Wkb<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + let buf = self.array.value(index); + Wkb::try_new(buf).map_err(|err| GeoArrowError::External(Box::new(err))) + } +} + +impl IntoArrow for GenericWkbArray { + type ArrowArray = GenericBinaryArray; + type ExtensionType = WkbType; + + fn into_arrow(self) -> Self::ArrowArray { + self.array + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl From<(GenericBinaryArray, WkbType)> for GenericWkbArray { + fn from((value, typ): (GenericBinaryArray, WkbType)) -> Self { + Self { + data_type: typ, + array: value, + } + } +} + +impl TryFrom<(&dyn Array, WkbType)> for GenericWkbArray { + type Error = GeoArrowError; + fn try_from((value, typ): (&dyn Array, WkbType)) -> GeoArrowResult { + match value.data_type() { + DataType::Binary => Ok((value.as_binary::().clone(), typ).into()), + DataType::LargeBinary => { + let geom_array: GenericWkbArray = + (value.as_binary::().clone(), typ).into(); + geom_array.try_into() + } + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected GenericWkbArray DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, WkbType)> for GenericWkbArray { + type Error = GeoArrowError; + fn try_from((value, typ): (&dyn Array, WkbType)) -> GeoArrowResult { + match value.data_type() { + DataType::Binary => { + let geom_array: GenericWkbArray = + (value.as_binary::().clone(), typ).into(); + Ok(geom_array.into()) + } + DataType::LargeBinary => Ok((value.as_binary::().clone(), typ).into()), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected GenericWkbArray DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for GenericWkbArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl TryFrom<(&dyn Array, &Field)> for GenericWkbArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl From> for GenericWkbArray { + fn from(value: GenericWkbArray) -> Self { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + let array = LargeBinaryArray::new(offsets_buffer_i32_to_i64(&offsets), values, nulls); + Self { + data_type: value.data_type, + array, + } + } +} + +impl TryFrom> for GenericWkbArray { + type Error = GeoArrowError; + + fn try_from(value: GenericWkbArray) -> GeoArrowResult { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + let array = BinaryArray::new(offsets_buffer_i64_to_i32(&offsets)?, values, nulls); + Ok(Self { + data_type: value.data_type, + array, + }) + } +} + +impl From for GenericWkbArray { + fn from(value: WkbViewArray) -> Self { + let wkb_type = value.data_type; + let binary_view_array = value.array; + + // Copy the bytes from the binary view array into a new byte array + let mut builder = GenericByteBuilder::new(); + binary_view_array + .iter() + .for_each(|value| builder.append_option(value)); + + Self { + data_type: wkb_type, + array: builder.finish(), + } + } +} + +/// A [`GenericWkbArray`] using `i32` offsets +/// +/// The byte length of each element is represented by an i32. +/// +/// See [`GenericWkbArray`] for more information and examples +pub type WkbArray = GenericWkbArray; + +/// A [`GenericWkbArray`] using `i64` offsets +/// +/// The byte length of each element is represented by an i64. +/// +/// See [`GenericWkbArray`] for more information and examples +pub type LargeWkbArray = GenericWkbArray; + +// #[cfg(test)] +// mod test { +// use arrow_array::builder::{BinaryBuilder, LargeBinaryBuilder}; + +// use super::*; +// use crate::{GeoArrowArray, builder::WkbBuilder, test::point}; + +// fn wkb_data() -> GenericWkbArray { +// let mut builder = WkbBuilder::new(WkbType::new(Default::default())); +// builder.push_geometry(Some(&point::p0())).unwrap(); +// builder.push_geometry(Some(&point::p1())).unwrap(); +// builder.push_geometry(Some(&point::p2())).unwrap(); +// builder.finish() +// } + +// #[test] +// fn parse_dyn_array_i32() { +// let wkb_array = wkb_data::(); +// let array = wkb_array.to_array_ref(); +// let field = Field::new("geometry", array.data_type().clone(), true) +// .with_extension_type(wkb_array.data_type.clone()); +// let wkb_array_retour: GenericWkbArray = (array.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_retour); +// } + +// #[test] +// fn parse_dyn_array_i64() { +// let wkb_array = wkb_data::(); +// let array = wkb_array.to_array_ref(); +// let field = Field::new("geometry", array.data_type().clone(), true) +// .with_extension_type(wkb_array.data_type.clone()); +// let wkb_array_retour: GenericWkbArray = (array.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_retour); +// } + +// #[test] +// fn convert_i32_to_i64() { +// let wkb_array = wkb_data::(); +// let wkb_array_i64: GenericWkbArray = wkb_array.clone().into(); +// let wkb_array_i32: GenericWkbArray = wkb_array_i64.clone().try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_i32); +// } + +// #[test] +// fn convert_i64_to_i32_to_i64() { +// let wkb_array = wkb_data::(); +// let wkb_array_i32: GenericWkbArray = wkb_array.clone().try_into().unwrap(); +// let wkb_array_i64: GenericWkbArray = wkb_array_i32.clone().into(); + +// assert_eq!(wkb_array, wkb_array_i64); +// } + +// /// Passing a field without an extension name should not panic +// #[test] +// fn allow_field_without_extension_name() { +// // String array +// let mut builder = BinaryBuilder::new(); +// builder.append_value(b"a"); +// let array = Arc::new(builder.finish()) as ArrayRef; +// let field = Field::new("geometry", array.data_type().clone(), true); +// let _wkt_arr = GenericWkbArray::::try_from((array.as_ref(), &field)).unwrap(); + +// // Large string +// let mut builder = LargeBinaryBuilder::new(); +// builder.append_value(b"a"); +// let array = Arc::new(builder.finish()) as ArrayRef; +// let field = Field::new("geometry", array.data_type().clone(), true); +// let _wkt_arr = GenericWkbArray::::try_from((array.as_ref(), &field)).unwrap(); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/wkb_view.rs b/src/geoarrow/geoarrow-array/src/array/wkb_view.rs new file mode 100644 index 0000000000..807789d033 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/wkb_view.rs @@ -0,0 +1,185 @@ +use std::sync::Arc; + +use arrow_array::{ + Array, ArrayRef, BinaryViewArray, OffsetSizeTrait, builder::BinaryViewBuilder, cast::AsArray, +}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + GeoArrowType, Metadata, WkbType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkb::reader::Wkb; + +use crate::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, array::GenericWkbArray}; + +/// An immutable array of WKB geometries. +/// +/// This is stored as an Arrow [`BinaryViewArray`] and is semantically equivalent to +/// `Vec>` due to the internal validity bitmap. +/// +/// Refer to [`crate::cast`] for converting this array to other GeoArrow array types. +#[derive(Debug, Clone, PartialEq)] +pub struct WkbViewArray { + pub(crate) data_type: WkbType, + pub(crate) array: BinaryViewArray, +} + +impl WkbViewArray { + /// Create a new GenericWkbArray from a BinaryArray + pub fn new(array: BinaryViewArray, metadata: Arc) -> Self { + Self { + data_type: WkbType::new(metadata), + array, + } + } + + /// Returns true if the array is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Slice this [`GenericWkbArray`]. + /// + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + array: self.array.slice(offset, length), + data_type: self.data_type.clone(), + } + } + + /// Replace the [Metadata] in the array with the given metadata + pub fn with_metadata(&self, metadata: Arc) -> Self { + let mut arr = self.clone(); + arr.data_type = self.data_type.clone().with_metadata(metadata); + arr + } +} + +impl GeoArrowArray for WkbViewArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.array.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.array.logical_nulls() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.array.logical_null_count() + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.array.is_null(i) + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::WkbView(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for WkbViewArray { + type Item = Wkb<'a>; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + let buf = self.array.value(index); + Wkb::try_new(buf).map_err(|err| GeoArrowError::External(Box::new(err))) + } +} + +impl IntoArrow for WkbViewArray { + type ArrowArray = BinaryViewArray; + type ExtensionType = WkbType; + + fn into_arrow(self) -> Self::ArrowArray { + self.array + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl From<(BinaryViewArray, WkbType)> for WkbViewArray { + fn from((value, typ): (BinaryViewArray, WkbType)) -> Self { + Self { + data_type: typ, + array: value, + } + } +} + +impl TryFrom<(&dyn Array, WkbType)> for WkbViewArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, WkbType)) -> GeoArrowResult { + match value.data_type() { + DataType::BinaryView => Ok((value.as_binary_view().clone(), typ).into()), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected WkbView DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for WkbViewArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl From> for WkbViewArray { + fn from(value: GenericWkbArray) -> Self { + let wkb_type = value.data_type; + let binary_view_array = value.array; + + // Copy the bytes from the binary view array into a new byte array + let mut builder = BinaryViewBuilder::new(); + binary_view_array + .iter() + .for_each(|value| builder.append_option(value)); + + Self { + data_type: wkb_type, + array: builder.finish(), + } + } +} diff --git a/src/geoarrow/geoarrow-array/src/array/wkt.rs b/src/geoarrow/geoarrow-array/src/array/wkt.rs new file mode 100644 index 0000000000..5f45b5613e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/wkt.rs @@ -0,0 +1,345 @@ +use std::{str::FromStr, sync::Arc}; + +use arrow_array::{ + Array, ArrayRef, GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, + builder::GenericStringBuilder, cast::AsArray, +}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + GeoArrowType, Metadata, WktType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkt::Wkt; + +use crate::{ + GeoArrowArrayAccessor, + array::WktViewArray, + trait_::{GeoArrowArray, IntoArrow}, + util::{offsets_buffer_i32_to_i64, offsets_buffer_i64_to_i32}, +}; + +/// An immutable array of WKT geometries using GeoArrow's in-memory representation. +/// +/// This is a wrapper around an Arrow [GenericStringArray] and is semantically equivalent to +/// `Vec>` due to the internal validity bitmap. +/// +/// Refer to [`crate::cast`] for converting this array to other GeoArrow array types. +#[derive(Debug, Clone, PartialEq)] +pub struct GenericWktArray { + pub(crate) data_type: WktType, + pub(crate) array: GenericStringArray, +} + +// Implement geometry accessors +impl GenericWktArray { + /// Create a new GenericWktArray from a StringArray + pub fn new(array: GenericStringArray, metadata: Arc) -> Self { + Self { + data_type: WktType::new(metadata), + array, + } + } + + /// Returns true if the array is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Access the underlying string array. + pub fn inner(&self) -> &GenericStringArray { + &self.array + } + + /// Slice this [`GenericWktArray`]. + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + array: self.array.slice(offset, length), + data_type: self.data_type.clone(), + } + } + + /// Replace the [`Metadata`] contained in this array. + pub fn with_metadata(&self, metadata: Arc) -> Self { + let mut arr = self.clone(); + arr.data_type = self.data_type.clone().with_metadata(metadata); + arr + } +} + +impl GeoArrowArray for GenericWktArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.array.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.array.logical_nulls() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.array.logical_null_count() + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.array.is_null(i) + } + + fn data_type(&self) -> GeoArrowType { + if O::IS_LARGE { + GeoArrowType::LargeWkt(self.data_type.clone()) + } else { + GeoArrowType::Wkt(self.data_type.clone()) + } + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } +} + +impl<'a, O: OffsetSizeTrait> GeoArrowArrayAccessor<'a> for GenericWktArray { + type Item = Wkt; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + let s = unsafe { self.array.value_unchecked(index) }; + Wkt::from_str(s).map_err(|err| GeoArrowError::Wkt(err.to_string())) + } +} + +impl IntoArrow for GenericWktArray { + type ArrowArray = GenericStringArray; + type ExtensionType = WktType; + + fn into_arrow(self) -> Self::ArrowArray { + GenericStringArray::new( + self.array.offsets().clone(), + self.array.values().clone(), + self.array.nulls().cloned(), + ) + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl From<(GenericStringArray, WktType)> for GenericWktArray { + fn from((value, typ): (GenericStringArray, WktType)) -> Self { + Self::new(value, typ.metadata().clone()) + } +} + +impl TryFrom<(&dyn Array, WktType)> for GenericWktArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, WktType)) -> GeoArrowResult { + match value.data_type() { + DataType::Utf8 => Ok((value.as_string::().clone(), typ).into()), + DataType::LargeUtf8 => { + let geom_array: GenericWktArray = + (value.as_string::().clone(), typ).into(); + geom_array.try_into() + } + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected WktArray DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, WktType)> for GenericWktArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, WktType)) -> GeoArrowResult { + match value.data_type() { + DataType::Utf8 => { + let geom_array: GenericWktArray = + (value.as_string::().clone(), typ).into(); + Ok(geom_array.into()) + } + DataType::LargeUtf8 => Ok((value.as_string::().clone(), typ).into()), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected WktArray DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for GenericWktArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl TryFrom<(&dyn Array, &Field)> for GenericWktArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl From> for GenericWktArray { + fn from(value: GenericWktArray) -> Self { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + Self { + data_type: value.data_type, + array: LargeStringArray::new(offsets_buffer_i32_to_i64(&offsets), values, nulls), + } + } +} + +impl TryFrom> for GenericWktArray { + type Error = GeoArrowError; + + fn try_from(value: GenericWktArray) -> GeoArrowResult { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + Ok(Self { + data_type: value.data_type, + array: StringArray::new(offsets_buffer_i64_to_i32(&offsets)?, values, nulls), + }) + } +} + +impl From for GenericWktArray { + fn from(value: WktViewArray) -> Self { + let wkb_type = value.data_type; + let binary_view_array = value.array; + + // Copy the bytes from the binary view array into a new byte array + let mut builder = GenericStringBuilder::new(); + binary_view_array + .iter() + .for_each(|value| builder.append_option(value)); + + Self { + data_type: wkb_type, + array: builder.finish(), + } + } +} + +/// A [`GenericWktArray`] using `i32` offsets +/// +/// The byte length of each element is represented by an i32. +/// +/// See [`GenericWktArray`] for more information and examples +pub type WktArray = GenericWktArray; + +/// A [`GenericWktArray`] using `i64` offsets +/// +/// The byte length of each element is represented by an i64. +/// +/// See [`GenericWktArray`] for more information and examples +pub type LargeWktArray = GenericWktArray; + +// #[cfg(test)] +// mod test { +// use arrow_array::builder::{LargeStringBuilder, StringBuilder}; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; +// use crate::{GeoArrowArray, cast::to_wkt, test::point}; + +// fn wkt_data() -> GenericWktArray { +// to_wkt(&point::array(CoordType::Interleaved, Dimension::XY)).unwrap() +// } + +// #[test] +// fn parse_dyn_array_i32() { +// let wkb_array = wkt_data::(); +// let array = wkb_array.to_array_ref(); +// let field = Field::new("geometry", array.data_type().clone(), true) +// .with_extension_type(wkb_array.data_type.clone()); +// let wkb_array_retour: GenericWktArray = (array.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_retour); +// } + +// #[test] +// fn parse_dyn_array_i64() { +// let wkb_array = wkt_data::(); +// let array = wkb_array.to_array_ref(); +// let field = Field::new("geometry", array.data_type().clone(), true) +// .with_extension_type(wkb_array.data_type.clone()); +// let wkb_array_retour: GenericWktArray = (array.as_ref(), &field).try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_retour); +// } + +// #[test] +// fn convert_i32_to_i64() { +// let wkb_array = wkt_data::(); +// let wkb_array_i64: GenericWktArray = wkb_array.clone().into(); +// let wkb_array_i32: GenericWktArray = wkb_array_i64.clone().try_into().unwrap(); + +// assert_eq!(wkb_array, wkb_array_i32); +// } + +// #[test] +// fn convert_i64_to_i32_to_i64() { +// let wkb_array = wkt_data::(); +// let wkb_array_i32: GenericWktArray = wkb_array.clone().try_into().unwrap(); +// let wkb_array_i64: GenericWktArray = wkb_array_i32.clone().into(); + +// assert_eq!(wkb_array, wkb_array_i64); +// } + +// /// Passing a field without an extension name should not panic +// #[test] +// fn allow_field_without_extension_name() { +// // String array +// let mut builder = StringBuilder::new(); +// builder.append_value("POINT(1 2)"); +// let array = Arc::new(builder.finish()) as ArrayRef; +// let field = Field::new("geometry", array.data_type().clone(), true); +// let _wkt_arr = GenericWktArray::::try_from((array.as_ref(), &field)).unwrap(); + +// // Large string +// let mut builder = LargeStringBuilder::new(); +// builder.append_value("POINT(1 2)"); +// let array = Arc::new(builder.finish()) as ArrayRef; +// let field = Field::new("geometry", array.data_type().clone(), true); +// let _wkt_arr = GenericWktArray::::try_from((array.as_ref(), &field)).unwrap(); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/array/wkt_view.rs b/src/geoarrow/geoarrow-array/src/array/wkt_view.rs new file mode 100644 index 0000000000..4ba6e52b25 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/array/wkt_view.rs @@ -0,0 +1,190 @@ +use std::{str::FromStr, sync::Arc}; + +use arrow_array::{ + Array, ArrayRef, OffsetSizeTrait, StringViewArray, builder::StringViewBuilder, cast::AsArray, +}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; +use geoarrow_schema::{ + GeoArrowType, Metadata, WktType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkt::Wkt; + +use crate::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, array::GenericWktArray}; + +/// An immutable array of WKT geometries. +/// +/// This is stored as an Arrow [`StringViewArray`] and is semantically equivalent to +/// `Vec>` due to the internal validity bitmap. +/// +/// Refer to [`crate::cast`] for converting this array to other GeoArrow array types. +#[derive(Debug, Clone, PartialEq)] +pub struct WktViewArray { + pub(crate) data_type: WktType, + pub(crate) array: StringViewArray, +} + +impl WktViewArray { + /// Create a new WktViewArray from a StringViewArray + pub fn new(array: StringViewArray, metadata: Arc) -> Self { + Self { + data_type: WktType::new(metadata), + array, + } + } + + /// Returns true if the array is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Access the underlying string array. + pub fn inner(&self) -> &StringViewArray { + &self.array + } + + /// Slice this [`WktViewArray`]. + /// + /// + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + array: self.array.slice(offset, length), + data_type: self.data_type.clone(), + } + } + + /// Replace the [Metadata] in the array with the given metadata + pub fn with_metadata(&self, metadata: Arc) -> Self { + let mut arr = self.clone(); + arr.data_type = self.data_type.clone().with_metadata(metadata); + arr + } +} + +impl GeoArrowArray for WktViewArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn into_array_ref(self) -> ArrayRef { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> ArrayRef { + self.clone().into_array_ref() + } + + #[inline] + fn len(&self) -> usize { + self.array.len() + } + + #[inline] + fn logical_nulls(&self) -> Option { + self.array.logical_nulls() + } + + #[inline] + fn logical_null_count(&self) -> usize { + self.array.logical_null_count() + } + + #[inline] + fn is_null(&self, i: usize) -> bool { + self.array.is_null(i) + } + + fn data_type(&self) -> GeoArrowType { + GeoArrowType::WktView(self.data_type.clone()) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + Arc::new(Self::with_metadata(&self, metadata)) + } +} + +impl<'a> GeoArrowArrayAccessor<'a> for WktViewArray { + type Item = Wkt; + + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult { + let s = unsafe { self.array.value_unchecked(index) }; + Wkt::from_str(s).map_err(|err| GeoArrowError::Wkt(err.to_string())) + } +} + +impl IntoArrow for WktViewArray { + type ArrowArray = StringViewArray; + type ExtensionType = WktType; + + fn into_arrow(self) -> Self::ArrowArray { + self.array + } + + fn extension_type(&self) -> &Self::ExtensionType { + &self.data_type + } +} + +impl From<(StringViewArray, WktType)> for WktViewArray { + fn from((value, typ): (StringViewArray, WktType)) -> Self { + Self { + data_type: typ, + array: value, + } + } +} + +impl TryFrom<(&dyn Array, WktType)> for WktViewArray { + type Error = GeoArrowError; + + fn try_from((value, typ): (&dyn Array, WktType)) -> GeoArrowResult { + match value.data_type() { + DataType::Utf8View => Ok((value.as_string_view().clone(), typ).into()), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected WktView DataType: {dt:?}", + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for WktViewArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> GeoArrowResult { + let typ = field + .try_extension_type::() + .ok() + .unwrap_or_default(); + (arr, typ).try_into() + } +} + +impl From> for WktViewArray { + fn from(value: GenericWktArray) -> Self { + let wkb_type = value.data_type; + let binary_view_array = value.array; + + // Copy the bytes from the binary view array into a new byte array + let mut builder = StringViewBuilder::new(); + binary_view_array + .iter() + .for_each(|value| builder.append_option(value)); + + Self { + data_type: wkb_type, + array: builder.finish(), + } + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/coord/combined.rs b/src/geoarrow/geoarrow-array/src/builder/coord/combined.rs new file mode 100644 index 0000000000..a8a33eac55 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/coord/combined.rs @@ -0,0 +1,180 @@ +use core::f64; + +use geo_traits::{CoordTrait, PointTrait}; +use geoarrow_schema::{CoordType, Dimension, error::GeoArrowResult}; + +use crate::{ + array::CoordBuffer, + builder::{InterleavedCoordBufferBuilder, SeparatedCoordBufferBuilder}, +}; + +/// The GeoArrow equivalent to `Vec`: a mutable collection of coordinates. +/// +/// Converting an [`CoordBufferBuilder`] into a [`CoordBuffer`] is `O(1)`. +#[derive(Debug, Clone)] +pub enum CoordBufferBuilder { + /// Interleaved coordinates + Interleaved(InterleavedCoordBufferBuilder), + /// Separated coordinates + Separated(SeparatedCoordBufferBuilder), +} + +impl CoordBufferBuilder { + /// Initialize a buffer of a given length with all coordinates set to the given value. + pub fn initialize(len: usize, coord_type: CoordType, dim: Dimension, value: f64) -> Self { + match coord_type { + CoordType::Interleaved => CoordBufferBuilder::Interleaved( + InterleavedCoordBufferBuilder::initialize(len, dim, value), + ), + CoordType::Separated => CoordBufferBuilder::Separated( + SeparatedCoordBufferBuilder::initialize(len, dim, value), + ), + } + } + + /// Create a new builder with the given capacity and dimension + pub fn with_capacity(len: usize, coord_type: CoordType, dim: Dimension) -> Self { + match coord_type { + CoordType::Interleaved => CoordBufferBuilder::Interleaved( + InterleavedCoordBufferBuilder::with_capacity(len, dim), + ), + CoordType::Separated => { + CoordBufferBuilder::Separated(SeparatedCoordBufferBuilder::with_capacity(len, dim)) + } + } + } + + /// Reserves capacity for at least `additional` more coordinates to be inserted + /// in the given `Vec`. The collection may reserve more space to + /// speculatively avoid frequent reallocations. After calling `reserve`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: usize) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.reserve(additional), + CoordBufferBuilder::Separated(cb) => cb.reserve(additional), + } + } + + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: usize) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.reserve_exact(additional), + CoordBufferBuilder::Separated(cb) => cb.reserve_exact(additional), + } + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.shrink_to_fit(), + CoordBufferBuilder::Separated(cb) => cb.shrink_to_fit(), + } + } + + /// Returns the total number of coordinates the vector can hold without reallocating. + pub fn capacity(&self) -> usize { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.capacity(), + CoordBufferBuilder::Separated(cb) => cb.capacity(), + } + } + + /// The number of coordinates + pub fn len(&self) -> usize { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.len(), + CoordBufferBuilder::Separated(cb) => cb.len(), + } + } + + /// Whether the buffer is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// The underlying coordinate type + pub fn coord_type(&self) -> CoordType { + match self { + CoordBufferBuilder::Interleaved(_) => CoordType::Interleaved, + CoordBufferBuilder::Separated(_) => CoordType::Separated, + } + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn push_coord(&mut self, coord: &impl CoordTrait) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.push_coord(coord), + CoordBufferBuilder::Separated(cb) => cb.push_coord(coord), + } + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn try_push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.try_push_coord(coord), + CoordBufferBuilder::Separated(cb) => cb.try_push_coord(coord), + } + } + + /// Push a valid coordinate with the given constant value + /// + /// Used in the case of point and rect arrays, where a `null` array value still needs to have + /// space allocated for it. + pub(crate) fn push_constant(&mut self, value: f64) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.push_constant(value), + CoordBufferBuilder::Separated(cb) => cb.push_constant(value), + } + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub fn push_point(&mut self, point: &impl PointTrait) { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.push_point(point), + CoordBufferBuilder::Separated(cb) => cb.push_point(point), + } + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub fn try_push_point(&mut self, point: &impl PointTrait) -> GeoArrowResult<()> { + match self { + CoordBufferBuilder::Interleaved(cb) => cb.try_push_point(point), + CoordBufferBuilder::Separated(cb) => cb.try_push_point(point), + } + } + + /// Consume the builder and convert to an immutable [`CoordBuffer`] + pub fn finish(self) -> CoordBuffer { + match self { + CoordBufferBuilder::Interleaved(cb) => CoordBuffer::Interleaved(cb.finish()), + CoordBufferBuilder::Separated(cb) => CoordBuffer::Separated(cb.finish()), + } + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/coord/interleaved.rs b/src/geoarrow/geoarrow-array/src/builder/coord/interleaved.rs new file mode 100644 index 0000000000..12af492ef8 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/coord/interleaved.rs @@ -0,0 +1,240 @@ +use core::f64; + +use geo_traits::{CoordTrait, PointTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::array::InterleavedCoordBuffer; + +/// The GeoArrow equivalent to `Vec`: a mutable collection of coordinates. +/// +/// This stores all coordinates in interleaved fashion as `xyxyxy`. +/// +/// Converting an [`InterleavedCoordBufferBuilder`] into a [`InterleavedCoordBuffer`] is `O(1)`. +#[derive(Debug, Clone)] +pub struct InterleavedCoordBufferBuilder { + pub(crate) coords: Vec, + dim: Dimension, +} + +impl InterleavedCoordBufferBuilder { + /// Create a new empty builder with the given dimension + pub fn new(dim: Dimension) -> Self { + Self::with_capacity(0, dim) + } + + /// Create a new builder with the given capacity and dimension + pub fn with_capacity(capacity: usize, dim: Dimension) -> Self { + Self { + coords: Vec::with_capacity(capacity * dim.size()), + dim, + } + } + + /// Initialize a buffer of a given length with all coordinates set to the given value + pub fn initialize(len: usize, dim: Dimension, value: f64) -> Self { + Self { + coords: vec![value; len * dim.size()], + dim, + } + } + + /// Reserves capacity for at least `additional` more coordinates. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: usize) { + self.coords.reserve(additional * self.dim.size()); + } + + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: usize) { + self.coords.reserve_exact(additional * self.dim.size()); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + } + + /// Returns the total number of coordinates the vector can hold without reallocating. + pub fn capacity(&self) -> usize { + self.coords.capacity() / self.dim.size() + } + + /// The number of coordinates in this builder + pub fn len(&self) -> usize { + self.coords.len() / self.dim.size() + } + + /// Whether this builder is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn push_coord(&mut self, coord: &impl CoordTrait) { + self.try_push_coord(coord).unwrap() + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn try_push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + // Note duplicated across buffer types; consider refactoring + match self.dim { + Dimension::XY => match coord.dim() { + geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XY for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYZ => match coord.dim() { + geo_traits::Dimensions::Xyz | geo_traits::Dimensions::Unknown(3) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYZ for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYM => match coord.dim() { + geo_traits::Dimensions::Xym | geo_traits::Dimensions::Unknown(3) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYM for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYZM => match coord.dim() { + geo_traits::Dimensions::Xyzm | geo_traits::Dimensions::Unknown(4) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYZM for this buffer; got {d:?}." + ))); + } + }, + } + + self.coords.push(coord.x()); + self.coords.push(coord.y()); + if let Some(z) = coord.nth(2) { + self.coords.push(z); + }; + if let Some(m) = coord.nth(3) { + self.coords.push(m); + }; + Ok(()) + } + + /// Push a valid coordinate with the given constant value + /// + /// Used in the case of point and rect arrays, where a `null` array value still needs to have + /// space allocated for it. + pub(crate) fn push_constant(&mut self, value: f64) { + for _ in 0..self.dim.size() { + self.coords.push(value); + } + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub(crate) fn push_point(&mut self, point: &impl PointTrait) { + self.try_push_point(point).unwrap() + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub(crate) fn try_push_point( + &mut self, + point: &impl PointTrait, + ) -> GeoArrowResult<()> { + if let Some(coord) = point.coord() { + self.try_push_coord(&coord)?; + } else { + self.push_constant(f64::NAN); + }; + Ok(()) + } + + /// Construct a new builder and pre-fill it with coordinates from the provided iterator + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> GeoArrowResult { + let mut buffer = InterleavedCoordBufferBuilder::with_capacity(coords.len(), dim); + for coord in coords { + buffer.push_coord(coord); + } + Ok(buffer) + } + + /// Consume the builder and convert to an immutable [`InterleavedCoordBuffer`] + pub fn finish(self) -> InterleavedCoordBuffer { + InterleavedCoordBuffer::new(self.coords.into(), self.dim) + } +} + +// #[cfg(test)] +// mod test { +// use wkt::types::Coord; + +// use super::*; + +// #[test] +// fn errors_when_pushing_incompatible_coord() { +// let mut builder = InterleavedCoordBufferBuilder::new(Dimension::XY); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: Some(0.0), +// m: None, +// }) +// .expect_err("Should err pushing XYZ to XY buffer"); + +// let mut builder = InterleavedCoordBufferBuilder::new(Dimension::XYZ); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: None, +// m: None, +// }) +// .expect_err("Should err pushing XY to XYZ buffer"); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: Some(0.0), +// m: None, +// }) +// .unwrap(); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/builder/coord/mod.rs b/src/geoarrow/geoarrow-array/src/builder/coord/mod.rs new file mode 100644 index 0000000000..adc784c423 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/coord/mod.rs @@ -0,0 +1,13 @@ +//! Contains implementations for how to encode arrays of coordinates for all other geometry array +//! types. +//! +//! Coordinates can be either _interleaved_, where they're represented as a `FixedSizeList`, or +//! _separated_, where they're represented with a `StructArray`. + +mod combined; +mod interleaved; +mod separated; + +pub use combined::CoordBufferBuilder; +pub use interleaved::InterleavedCoordBufferBuilder; +pub use separated::SeparatedCoordBufferBuilder; diff --git a/src/geoarrow/geoarrow-array/src/builder/coord/separated.rs b/src/geoarrow/geoarrow-array/src/builder/coord/separated.rs new file mode 100644 index 0000000000..bb8ee45649 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/coord/separated.rs @@ -0,0 +1,261 @@ +use geo_traits::{CoordTrait, PointTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::array::SeparatedCoordBuffer; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of coordinates. +/// +/// This stores all coordinates in separated fashion as multiple arrays: `xxx` and `yyy`. +/// +/// Converting an [`SeparatedCoordBufferBuilder`] into a [`SeparatedCoordBuffer`] is `O(1)`. +#[derive(Debug, Clone)] +pub struct SeparatedCoordBufferBuilder { + buffers: [Vec; 4], + dim: Dimension, +} + +impl SeparatedCoordBufferBuilder { + /// Create a new empty builder with the given dimension + pub fn new(dim: Dimension) -> Self { + Self::with_capacity(0, dim) + } + + /// Create a new builder with the given capacity and dimension + pub fn with_capacity(capacity: usize, dim: Dimension) -> Self { + // Only allocate buffers for existent dimensions + let buffers = core::array::from_fn(|i| { + if i < dim.size() { + Vec::with_capacity(capacity) + } else { + Vec::new() + } + }); + + Self { buffers, dim } + } + + /// Initialize a buffer of a given length with all coordinates set to the given value + pub fn initialize(len: usize, dim: Dimension, value: f64) -> Self { + // Only allocate buffers for existent dimensions + let buffers = core::array::from_fn(|i| { + if i < dim.size() { + vec![value; len] + } else { + Vec::new() + } + }); + + Self { buffers, dim } + } + + /// Reserves capacity for at least `additional` more coordinates. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: usize) { + self.buffers + .iter_mut() + .for_each(|buffer| buffer.reserve(additional)) + } + + /// Reserves the minimum capacity for at least `additional` more coordinates. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: usize) { + self.buffers + .iter_mut() + .for_each(|buffer| buffer.reserve_exact(additional)) + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.buffers + .iter_mut() + .for_each(|buffer| buffer.shrink_to_fit()); + } + + /// Returns the total number of coordinates the vector can hold without reallocating. + pub fn capacity(&self) -> usize { + self.buffers[0].capacity() + } + + /// The number of coordinates in this builder + pub fn len(&self) -> usize { + self.buffers[0].len() + } + + /// Whether this builder is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn push_coord(&mut self, coord: &impl CoordTrait) { + self.try_push_coord(coord).unwrap() + } + + /// Push a new coord onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added coordinate does not have the same dimension as the coordinate buffer. + pub fn try_push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + // Note duplicated across buffer types; consider refactoring + match self.dim { + Dimension::XY => match coord.dim() { + geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XY for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYZ => match coord.dim() { + geo_traits::Dimensions::Xyz | geo_traits::Dimensions::Unknown(3) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYZ for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYM => match coord.dim() { + geo_traits::Dimensions::Xym | geo_traits::Dimensions::Unknown(3) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYM for this buffer; got {d:?}." + ))); + } + }, + Dimension::XYZM => match coord.dim() { + geo_traits::Dimensions::Xyzm | geo_traits::Dimensions::Unknown(4) => {} + d => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "coord dimension must be XYZM for this buffer; got {d:?}." + ))); + } + }, + } + + self.buffers[0].push(coord.x()); + self.buffers[1].push(coord.y()); + if let Some(z) = coord.nth(2) { + self.buffers[2].push(z); + }; + if let Some(m) = coord.nth(3) { + self.buffers[3].push(m); + }; + Ok(()) + } + + /// Push a valid coordinate with the given constant value + /// + /// Used in the case of point and rect arrays, where a `null` array value still needs to have + /// space allocated for it. + pub(crate) fn push_constant(&mut self, value: f64) { + for i in 0..self.dim.size() { + self.buffers[i].push(value); + } + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Panics + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub(crate) fn push_point(&mut self, point: &impl PointTrait) { + self.try_push_point(point).unwrap() + } + + /// Push a new point onto the end of this coordinate buffer + /// + /// ## Errors + /// + /// - If the added point does not have the same dimension as the coordinate buffer. + pub(crate) fn try_push_point( + &mut self, + point: &impl PointTrait, + ) -> GeoArrowResult<()> { + if let Some(coord) = point.coord() { + self.try_push_coord(&coord)?; + } else { + self.push_constant(f64::NAN); + }; + Ok(()) + } + + /// Construct a new builder and pre-fill it with coordinates from the provided iterator + pub fn from_coords<'a>( + coords: impl ExactSizeIterator + 'a)>, + dim: Dimension, + ) -> GeoArrowResult { + let mut buffer = SeparatedCoordBufferBuilder::with_capacity(coords.len(), dim); + for coord in coords { + buffer.try_push_coord(coord)?; + } + Ok(buffer) + } + + /// Consume the builder and convert to an immutable [`SeparatedCoordBuffer`] + pub fn finish(self) -> SeparatedCoordBuffer { + // Initialize buffers with empty array, then mutate into it + let mut buffers = core::array::from_fn(|_| vec![].into()); + for (i, buffer) in self.buffers.into_iter().enumerate() { + buffers[i] = buffer.into(); + } + SeparatedCoordBuffer::from_array(buffers, self.dim).unwrap() + } +} + +// #[cfg(test)] +// mod test { +// use wkt::types::Coord; + +// use super::*; + +// #[test] +// fn errors_when_pushing_incompatible_coord() { +// let mut builder = SeparatedCoordBufferBuilder::new(Dimension::XY); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: Some(0.0), +// m: None, +// }) +// .expect_err("Should err pushing XYZ to XY buffer"); + +// let mut builder = SeparatedCoordBufferBuilder::new(Dimension::XYZ); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: None, +// m: None, +// }) +// .expect_err("Should err pushing XY to XYZ buffer"); +// builder +// .try_push_coord(&Coord { +// x: 0.0, +// y: 0.0, +// z: Some(0.0), +// m: None, +// }) +// .unwrap(); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/builder/geo_trait_wrappers.rs b/src/geoarrow/geoarrow-array/src/builder/geo_trait_wrappers.rs new file mode 100644 index 0000000000..052624c0ad --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/geo_trait_wrappers.rs @@ -0,0 +1,509 @@ +//! Wrappers around `RectTrait`, `TriangleTrait`, and `LineTrait` to implement +//! `PolygonTrait`, `PolygonTrait` and `LineStringTrait` traits, respectively. +//! +//! This makes it easier to use `Rect`, `Triangle`, and `Line` types because we don't have to add +//! specialized code for them. + +use geo_traits::{ + CoordTrait, GeometryTrait, LineStringTrait, LineTrait, PolygonTrait, RectTrait, TriangleTrait, + UnimplementedGeometryCollection, UnimplementedLine, UnimplementedLineString, + UnimplementedMultiLineString, UnimplementedMultiPoint, UnimplementedMultiPolygon, + UnimplementedPoint, UnimplementedPolygon, UnimplementedRect, UnimplementedTriangle, +}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; +use wkt::WktNum; + +pub(crate) struct RectWrapper<'a, T: WktNum, R: RectTrait>(&'a R); + +impl<'a, T: WktNum, R: RectTrait> RectWrapper<'a, T, R> { + pub(crate) fn try_new(rect: &'a R) -> GeoArrowResult { + match rect.dim() { + geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => {} + dim => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Only 2d rects supported when pushing to polygon. Got dimension: {dim:?}", + ))); + } + }; + + Ok(Self(rect)) + } + + fn ll(&self) -> wkt::types::Coord { + let lower = self.0.min(); + wkt::types::Coord { + x: lower.x(), + y: lower.y(), + z: None, + m: None, + } + } + + fn ul(&self) -> wkt::types::Coord { + let lower = self.0.min(); + let upper = self.0.max(); + wkt::types::Coord { + x: lower.x(), + y: upper.y(), + z: None, + m: None, + } + } + + fn ur(&self) -> wkt::types::Coord { + let upper = self.0.max(); + wkt::types::Coord { + x: upper.x(), + y: upper.y(), + z: None, + m: None, + } + } + + fn lr(&self) -> wkt::types::Coord { + let lower = self.0.min(); + let upper = self.0.max(); + wkt::types::Coord { + x: upper.x(), + y: lower.y(), + z: None, + m: None, + } + } +} + +impl> PolygonTrait for RectWrapper<'_, T, R> { + type RingType<'a> + = &'a RectWrapper<'a, T, R> + where + Self: 'a; + + fn exterior(&self) -> Option> { + Some(self) + } + + fn num_interiors(&self) -> usize { + 0 + } + + unsafe fn interior_unchecked(&self, _: usize) -> Self::RingType<'_> { + unreachable!("interior_unchecked called on a rect") + } +} + +impl> GeometryTrait for RectWrapper<'_, T, R> { + type T = T; + type PointType<'a> + = UnimplementedPoint + where + Self: 'a; + type LineStringType<'a> + = UnimplementedLineString + where + Self: 'a; + type PolygonType<'a> + = RectWrapper<'a, T, R> + where + Self: 'a; + type MultiPointType<'a> + = UnimplementedMultiPoint + where + Self: 'a; + type MultiLineStringType<'a> + = UnimplementedMultiLineString + where + Self: 'a; + type MultiPolygonType<'a> + = UnimplementedMultiPolygon + where + Self: 'a; + type GeometryCollectionType<'a> + = UnimplementedGeometryCollection + where + Self: 'a; + type RectType<'a> + = UnimplementedRect + where + Self: 'a; + type TriangleType<'a> + = UnimplementedTriangle + where + Self: 'a; + type LineType<'a> + = UnimplementedLine + where + Self: 'a; + + fn dim(&self) -> geo_traits::Dimensions { + self.0.dim() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::Polygon(self) + } +} + +impl<'a, T: WktNum, R: RectTrait> LineStringTrait for &'a RectWrapper<'a, T, R> { + type CoordType<'b> + = wkt::types::Coord + where + Self: 'b; + + fn num_coords(&self) -> usize { + 5 + } + + unsafe fn coord_unchecked(&self, i: usize) -> Self::CoordType<'_> { + // Ref below because I always forget the ordering + // https://github.com/georust/geo/blob/76ad2a358bd079e9d47b1229af89608744d2635b/geo-types/src/geometry/rect.rs#L217-L225 + match i { + 0 => self.ll(), + 1 => self.ul(), + 2 => self.ur(), + 3 => self.lr(), + 4 => self.ll(), + _ => unreachable!("out of range for rect coord: {i}"), + } + } +} + +impl<'a, T: WktNum, R: RectTrait> GeometryTrait for &'a RectWrapper<'a, T, R> { + type T = T; + type PointType<'b> + = UnimplementedPoint + where + Self: 'b; + type LineStringType<'b> + = UnimplementedLineString + where + Self: 'b; + type PolygonType<'b> + = RectWrapper<'b, T, R> + where + Self: 'b; + type MultiPointType<'b> + = UnimplementedMultiPoint + where + Self: 'b; + type MultiLineStringType<'b> + = UnimplementedMultiLineString + where + Self: 'b; + type MultiPolygonType<'b> + = UnimplementedMultiPolygon + where + Self: 'b; + type GeometryCollectionType<'b> + = UnimplementedGeometryCollection + where + Self: 'b; + type RectType<'b> + = UnimplementedRect + where + Self: 'b; + type TriangleType<'b> + = UnimplementedTriangle + where + Self: 'b; + type LineType<'b> + = UnimplementedLine + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + self.0.dim() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::Polygon(self) + } +} + +pub(crate) struct TriangleWrapper<'a, T, Tri: TriangleTrait>(pub(crate) &'a Tri); + +impl> PolygonTrait for TriangleWrapper<'_, T, Tri> { + type RingType<'a> + = &'a TriangleWrapper<'a, T, Tri> + where + Self: 'a; + + fn exterior(&self) -> Option> { + Some(self) + } + + fn num_interiors(&self) -> usize { + 0 + } + + unsafe fn interior_unchecked(&self, _: usize) -> Self::RingType<'_> { + unreachable!("interior_unchecked called on a triangle") + } +} + +impl<'a, T, Tri: TriangleTrait> LineStringTrait for &'a TriangleWrapper<'a, T, Tri> { + type CoordType<'b> + = ::CoordType<'b> + where + Self: 'b; + + fn num_coords(&self) -> usize { + 4 + } + + unsafe fn coord_unchecked(&self, i: usize) -> Self::CoordType<'_> { + match i { + 0 => self.0.first(), + 1 => self.0.second(), + 2 => self.0.third(), + 3 => self.0.first(), + _ => unreachable!("out of range for triangle ring: {i}"), + } + } +} + +impl> GeometryTrait for TriangleWrapper<'_, T, Tri> { + type T = T; + type PointType<'a> + = UnimplementedPoint + where + Self: 'a; + type LineStringType<'a> + = UnimplementedLineString + where + Self: 'a; + type PolygonType<'a> + = TriangleWrapper<'a, T, Tri> + where + Self: 'a; + type MultiPointType<'a> + = UnimplementedMultiPoint + where + Self: 'a; + type MultiLineStringType<'a> + = UnimplementedMultiLineString + where + Self: 'a; + type MultiPolygonType<'a> + = UnimplementedMultiPolygon + where + Self: 'a; + type GeometryCollectionType<'a> + = UnimplementedGeometryCollection + where + Self: 'a; + type RectType<'a> + = UnimplementedRect + where + Self: 'a; + type TriangleType<'a> + = UnimplementedTriangle + where + Self: 'a; + type LineType<'a> + = UnimplementedLine + where + Self: 'a; + + fn dim(&self) -> geo_traits::Dimensions { + self.0.dim() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::Polygon(self) + } +} + +impl<'a, T, Tri: TriangleTrait> GeometryTrait for &'a TriangleWrapper<'a, T, Tri> { + type T = T; + type PointType<'b> + = UnimplementedPoint + where + Self: 'b; + type LineStringType<'b> + = UnimplementedLineString + where + Self: 'b; + type PolygonType<'b> + = TriangleWrapper<'b, T, Tri> + where + Self: 'b; + type MultiPointType<'b> + = UnimplementedMultiPoint + where + Self: 'b; + type MultiLineStringType<'b> + = UnimplementedMultiLineString + where + Self: 'b; + type MultiPolygonType<'b> + = UnimplementedMultiPolygon + where + Self: 'b; + type GeometryCollectionType<'b> + = UnimplementedGeometryCollection + where + Self: 'b; + type RectType<'b> + = UnimplementedRect + where + Self: 'b; + type TriangleType<'b> + = UnimplementedTriangle + where + Self: 'b; + type LineType<'b> + = UnimplementedLine + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + self.0.dim() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::Polygon(self) + } +} + +pub(crate) struct LineWrapper<'a, T, L: LineTrait>(pub(crate) &'a L); + +impl> LineStringTrait for LineWrapper<'_, T, L> { + type CoordType<'b> + = ::CoordType<'b> + where + Self: 'b; + + fn num_coords(&self) -> usize { + 2 + } + + unsafe fn coord_unchecked(&self, i: usize) -> Self::CoordType<'_> { + match i { + 0 => self.0.start(), + 1 => self.0.end(), + _ => unreachable!("out of range for line coord: {i}"), + } + } +} + +impl> GeometryTrait for LineWrapper<'_, T, L> { + type T = T; + type PointType<'a> + = UnimplementedPoint + where + Self: 'a; + type LineStringType<'a> + = LineWrapper<'a, T, L> + where + Self: 'a; + type PolygonType<'a> + = UnimplementedPolygon + where + Self: 'a; + type MultiPointType<'a> + = UnimplementedMultiPoint + where + Self: 'a; + type MultiLineStringType<'a> + = UnimplementedMultiLineString + where + Self: 'a; + type MultiPolygonType<'a> + = UnimplementedMultiPolygon + where + Self: 'a; + type GeometryCollectionType<'a> + = UnimplementedGeometryCollection + where + Self: 'a; + type RectType<'a> + = UnimplementedRect + where + Self: 'a; + type TriangleType<'a> + = UnimplementedTriangle + where + Self: 'a; + type LineType<'a> + = UnimplementedLine + where + Self: 'a; + + fn dim(&self) -> geo_traits::Dimensions { + self.0.dim() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::LineString(self) + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/geometry.rs b/src/geoarrow/geoarrow-array/src/builder/geometry.rs new file mode 100644 index 0000000000..3ce68a946a --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/geometry.rs @@ -0,0 +1,954 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use geo_traits::*; +use geoarrow_schema::{ + Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata, MultiLineStringType, + MultiPointType, MultiPolygonType, PointType, PolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{DimensionIndex, GenericWkbArray, GeometryArray}, + builder::{ + GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, + MultiPolygonBuilder, PointBuilder, PolygonBuilder, + geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + }, + capacity::GeometryCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, +}; + +pub(crate) const DEFAULT_PREFER_MULTI: bool = false; + +/// The GeoArrow equivalent to a `Vec>`: a mutable collection of Geometries. +/// +/// Each Geometry can have a different dimension. All geometries must have the same coordinate +/// type. +/// +/// This currently has the caveat that these geometries must be a _primitive_ geometry type. This +/// does not currently support nested GeometryCollection objects. +/// +/// Converting an [`GeometryBuilder`] into a [`GeometryArray`] is `O(1)`. +/// +/// # Invariants +/// +/// - All arrays must have the same coordinate layout (interleaved or separated) +#[derive(Debug)] +pub struct GeometryBuilder { + metadata: Arc, + + // Invariant: every item in `types` is `> 0 && < fields.len()` + types: Vec, + + /// An array of PointArray, ordered XY, XYZ, XYM, XYZM + points: [PointBuilder; 4], + line_strings: [LineStringBuilder; 4], + polygons: [PolygonBuilder; 4], + mpoints: [MultiPointBuilder; 4], + mline_strings: [MultiLineStringBuilder; 4], + mpolygons: [MultiPolygonBuilder; 4], + gcs: [GeometryCollectionBuilder; 4], + + // Invariant: `offsets.len() == types.len()` + offsets: Vec, + + /// Whether to prefer multi or single arrays for new geometries. + /// + /// E.g. if this is `true` and a Point geometry is added, it will be added to the + /// MultiPointBuilder. If this is `false`, the Point geometry will be added to the + /// PointBuilder. + /// + /// The idea is that always adding multi-geometries will make it easier to downcast later. + pub(crate) prefer_multi: bool, + + /// The number of nulls that has been deferred and are still to be written. + /// + /// Adding nulls is tricky. We often want to use this builder as a generic builder for data + /// from unknown sources, which then gets downcasted to an array of a specific type. + /// + /// In a large majority of the time, this builder will have only data of a single type, which + /// can then get downcasted to a simple array of a single geometry type and dimension. But in + /// order for this process to be easy, we want the nulls to be assigned to the same array type + /// as the actual data. + /// + /// When there's a valid geometry pushed before the null, we can add the null to an existing + /// non-null array type, but if there are no valid geometries yet, we don't know which array to + /// push the null to. This `deferred_nulls` is the number of initial null values that haven't + /// yet been written to an array, because we don't know which array to write them to. + deferred_nulls: usize, +} + +impl<'a> GeometryBuilder { + /// Creates a new empty [`GeometryBuilder`]. + pub fn new(typ: GeometryType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`GeometryBuilder`] with given capacity and no validity. + pub fn with_capacity(typ: GeometryType, capacity: GeometryCapacity) -> Self { + let coord_type = typ.coord_type(); + + let points = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + PointBuilder::with_capacity( + PointType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.point(dim), + ) + }); + let line_strings = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + LineStringBuilder::with_capacity( + LineStringType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.line_string(dim), + ) + }); + let polygons = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + PolygonBuilder::with_capacity( + PolygonType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.polygon(dim), + ) + }); + let mpoints = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + MultiPointBuilder::with_capacity( + MultiPointType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_point(dim), + ) + }); + let mline_strings = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + MultiLineStringBuilder::with_capacity( + MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_line_string(dim), + ) + }); + let mpolygons = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + MultiPolygonBuilder::with_capacity( + MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_polygon(dim), + ) + }); + let gcs = core::array::from_fn(|i| { + let dim = Dimension::from_order(i).unwrap(); + GeometryCollectionBuilder::with_capacity( + GeometryCollectionType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.geometry_collection(dim), + ) + }); + + // Don't store array metadata on child arrays + Self { + metadata: typ.metadata().clone(), + types: vec![], + points, + line_strings, + polygons, + mpoints, + mline_strings, + mpolygons, + gcs, + offsets: vec![], + deferred_nulls: 0, + prefer_multi: DEFAULT_PREFER_MULTI, + } + } + + /// Change whether to prefer multi or single arrays for new single-part geometries. + /// + /// If `true`, a new `Point` will be added to the `MultiPointBuilder` child array, a new + /// `LineString` will be added to the `MultiLineStringBuilder` child array, and a new `Polygon` + /// will be added to the `MultiPolygonBuilder` child array. + /// + /// This can be desired when the user wants to downcast the array to a single geometry array + /// later, as casting to a, say, `MultiPointArray` from a `GeometryArray` could be done + /// zero-copy. + /// + /// Note that only geometries added _after_ this method is called will be affected. + pub fn with_prefer_multi(self, prefer_multi: bool) -> Self { + Self { + prefer_multi, + gcs: self.gcs.map(|gc| gc.with_prefer_multi(prefer_multi)), + ..self + } + } + + /// Reserves capacity for at least `additional` more geometries. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, capacity: GeometryCapacity) { + let total_num_geoms = capacity.total_num_geoms(); + self.types.reserve(total_num_geoms); + self.offsets.reserve(total_num_geoms); + + capacity.points.iter().enumerate().for_each(|(i, cap)| { + self.points[i].reserve(*cap); + }); + capacity + .line_strings + .iter() + .enumerate() + .for_each(|(i, cap)| { + self.line_strings[i].reserve(*cap); + }); + capacity.polygons.iter().enumerate().for_each(|(i, cap)| { + self.polygons[i].reserve(*cap); + }); + capacity.mpoints.iter().enumerate().for_each(|(i, cap)| { + self.mpoints[i].reserve(*cap); + }); + capacity + .mline_strings + .iter() + .enumerate() + .for_each(|(i, cap)| { + self.mline_strings[i].reserve(*cap); + }); + capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| { + self.mpolygons[i].reserve(*cap); + }); + capacity.gcs.iter().enumerate().for_each(|(i, cap)| { + self.gcs[i].reserve(*cap); + }); + } + + /// Reserves the minimum capacity for at least `additional` more Geometries. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, capacity: GeometryCapacity) { + let total_num_geoms = capacity.total_num_geoms(); + + self.types.reserve_exact(total_num_geoms); + self.offsets.reserve_exact(total_num_geoms); + + capacity.points.iter().enumerate().for_each(|(i, cap)| { + self.points[i].reserve_exact(*cap); + }); + capacity + .line_strings + .iter() + .enumerate() + .for_each(|(i, cap)| { + self.line_strings[i].reserve_exact(*cap); + }); + capacity.polygons.iter().enumerate().for_each(|(i, cap)| { + self.polygons[i].reserve_exact(*cap); + }); + capacity.mpoints.iter().enumerate().for_each(|(i, cap)| { + self.mpoints[i].reserve_exact(*cap); + }); + capacity + .mline_strings + .iter() + .enumerate() + .for_each(|(i, cap)| { + self.mline_strings[i].reserve_exact(*cap); + }); + capacity.mpolygons.iter().enumerate().for_each(|(i, cap)| { + self.mpolygons[i].reserve_exact(*cap); + }); + capacity.gcs.iter().enumerate().for_each(|(i, cap)| { + self.gcs[i].reserve_exact(*cap); + }); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.points.iter_mut().for_each(PointBuilder::shrink_to_fit); + self.line_strings + .iter_mut() + .for_each(LineStringBuilder::shrink_to_fit); + self.polygons + .iter_mut() + .for_each(PolygonBuilder::shrink_to_fit); + self.mpoints + .iter_mut() + .for_each(MultiPointBuilder::shrink_to_fit); + self.mline_strings + .iter_mut() + .for_each(MultiLineStringBuilder::shrink_to_fit); + self.mpolygons + .iter_mut() + .for_each(MultiPolygonBuilder::shrink_to_fit); + self.gcs + .iter_mut() + .for_each(GeometryCollectionBuilder::shrink_to_fit); + + self.offsets.shrink_to_fit(); + self.types.shrink_to_fit(); + } + + /// Consume the builder and convert to an immutable [`GeometryArray`] + pub fn finish(mut self) -> GeometryArray { + // If there are still deferred nulls to be written, then there aren't any valid geometries + // in this array, and just choose a child to write them to. + if self.deferred_nulls > 0 { + let dim = Dimension::XY; + let child = &mut self.points[dim.order()]; + let type_id = child.geometry_type_id(); + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + } + + GeometryArray::new( + self.types.into(), + self.offsets.into(), + self.points.map(|arr| arr.finish()), + self.line_strings.map(|arr| arr.finish()), + self.polygons.map(|arr| arr.finish()), + self.mpoints.map(|arr| arr.finish()), + self.mline_strings.map(|arr| arr.finish()), + self.mpolygons.map(|arr| arr.finish()), + self.gcs.map(|arr| arr.finish()), + self.metadata, + ) + } + + /// Add a new Point to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPointBuilder` child + /// array. Otherwise, it will be stored in the `PointBuilder` child array. + #[inline] + fn push_point(&mut self, value: Option<&impl PointTrait>) -> GeoArrowResult<()> { + if let Some(point) = value { + let dim: Dimension = point.dim().try_into().unwrap(); + let array_idx = dim.order(); + + if self.prefer_multi { + let child = &mut self.mpoints[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_point(Some(point))?; + } else { + let child = &mut self.points[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_point(Some(point)); + } + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_type( + child: &mut B, + offsets: &mut Vec, + types: &mut Vec, + type_id: i8, + ) { + offsets.push(child.len().try_into().unwrap()); + types.push(type_id); + } + + #[inline] + fn add_point_type(&mut self, dim: Dimension) { + let child = &self.points[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new LineString to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiLineStringBuilder` child + /// array. Otherwise, it will be stored in the `LineStringBuilder` child array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_line_string( + &mut self, + value: Option<&impl LineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(line_string) = value { + let dim: Dimension = line_string.dim().try_into().unwrap(); + let array_idx = dim.order(); + + if self.prefer_multi { + let child = &mut self.mline_strings[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_line_string(Some(line_string))?; + } else { + let child = &mut self.line_strings[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_line_string(Some(line_string))?; + } + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_line_string_type(&mut self, dim: Dimension) { + let child = &self.line_strings[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new Polygon to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPolygonBuilder` child + /// array. Otherwise, it will be stored in the `PolygonBuilder` child array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_polygon(&mut self, value: Option<&impl PolygonTrait>) -> GeoArrowResult<()> { + if let Some(polygon) = value { + let dim: Dimension = polygon.dim().try_into().unwrap(); + let array_idx = dim.order(); + + if self.prefer_multi { + let child = &mut self.mpolygons[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_polygon(Some(polygon))?; + } else { + let child = &mut self.polygons[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_polygon(Some(polygon))?; + } + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_polygon_type(&mut self, dim: Dimension) { + let child = &self.polygons[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new MultiPoint to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_multi_point( + &mut self, + value: Option<&impl MultiPointTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_point) = value { + let dim: Dimension = multi_point.dim().try_into().unwrap(); + let array_idx = dim.order(); + + let child = &mut self.mpoints[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_multi_point(Some(multi_point))?; + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_multi_point_type(&mut self, dim: Dimension) { + let child = &self.mpoints[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new MultiLineString to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_multi_line_string( + &mut self, + value: Option<&impl MultiLineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_line_string) = value { + let dim: Dimension = multi_line_string.dim().try_into().unwrap(); + let array_idx = dim.order(); + + let child = &mut self.mline_strings[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_multi_line_string(Some(multi_line_string))?; + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_multi_line_string_type(&mut self, dim: Dimension) { + let child = &self.mline_strings[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new MultiPolygon to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_multi_polygon( + &mut self, + value: Option<&impl MultiPolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_polygon) = value { + let dim: Dimension = multi_polygon.dim().try_into().unwrap(); + let array_idx = dim.order(); + + let child = &mut self.mpolygons[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_multi_polygon(Some(multi_polygon))?; + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_multi_polygon_type(&mut self, dim: Dimension) { + let child = &self.mpolygons[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Add a new geometry to this builder + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&'a impl GeometryTrait>, + ) -> GeoArrowResult<()> { + use geo_traits::GeometryType::*; + + if let Some(geom) = value { + match geom.as_type() { + Point(g) => { + self.push_point(Some(g))?; + } + LineString(g) => { + self.push_line_string(Some(g))?; + } + Polygon(g) => { + self.push_polygon(Some(g))?; + } + MultiPoint(p) => self.push_multi_point(Some(p))?, + MultiLineString(p) => self.push_multi_line_string(Some(p))?, + MultiPolygon(p) => self.push_multi_polygon(Some(p))?, + GeometryCollection(gc) => { + if gc.num_geometries() == 1 { + self.push_geometry(Some(&gc.geometry(0).unwrap()))? + } else { + self.push_geometry_collection(Some(gc))? + } + } + Rect(r) => self.push_polygon(Some(&RectWrapper::try_new(r)?))?, + Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?, + Line(l) => self.push_line_string(Some(&LineWrapper(l)))?, + }; + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new GeometryCollection to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + fn push_geometry_collection( + &mut self, + value: Option<&impl GeometryCollectionTrait>, + ) -> GeoArrowResult<()> { + if let Some(gc) = value { + let dim: Dimension = gc.dim().try_into().unwrap(); + let array_idx = dim.order(); + + let child = &mut self.gcs[array_idx]; + let type_id = child.geometry_type_id(); + + Self::flush_deferred_nulls( + &mut self.deferred_nulls, + child, + &mut self.offsets, + &mut self.types, + type_id, + ); + Self::add_type(child, &mut self.offsets, &mut self.types, type_id); + child.push_geometry_collection(Some(gc))?; + } else { + self.push_null(); + }; + + Ok(()) + } + + #[inline] + fn add_geometry_collection_type(&mut self, dim: Dimension) { + let child = &self.gcs[dim.order()]; + self.offsets.push(child.len().try_into().unwrap()); + self.types.push(child.geometry_type_id()); + } + + /// Push a null to this builder. + /// + /// Adding null values to a union array is tricky, because you don't want to add a null to a + /// child that would otherwise be totally empty. Ideally, as few children as possible exist and + /// are non-empty. + /// + /// We handle that by pushing nulls to the first non-empty child we find. If no underlying + /// arrays are non-empty, we add to an internal counter instead. Once the first non-empty + /// geometry is pushed, then we flush all the "deferred nulls" to that child. + #[inline] + pub fn push_null(&mut self) { + // Iterate through each dimension, then iterate through each child type. If a child exists, + // push a null to it. + // + // Note that we must **also** call `add_*_type` so that the offsets are correct to point + // the union array to the child. + for dim in [ + Dimension::XY, + Dimension::XYZ, + Dimension::XYM, + Dimension::XYZM, + ] { + let dim_idx = dim.order(); + if !self.points[dim_idx].is_empty() { + self.add_point_type(dim); + self.points[dim_idx].push_null(); + return; + } + if !self.line_strings[dim_idx].is_empty() { + self.add_line_string_type(dim); + self.line_strings[dim_idx].push_null(); + return; + } + if !self.polygons[dim_idx].is_empty() { + self.add_polygon_type(dim); + self.polygons[dim_idx].push_null(); + return; + } + if !self.mpoints[dim_idx].is_empty() { + self.add_multi_point_type(dim); + self.mpoints[dim_idx].push_null(); + return; + } + if !self.mline_strings[dim_idx].is_empty() { + self.add_multi_line_string_type(dim); + self.mline_strings[dim_idx].push_null(); + return; + } + if !self.mpolygons[dim_idx].is_empty() { + self.add_multi_polygon_type(dim); + self.mpolygons[dim_idx].push_null(); + return; + } + if !self.gcs[dim_idx].is_empty() { + self.add_geometry_collection_type(dim); + self.gcs[dim_idx].push_null(); + return; + } + } + + self.deferred_nulls += 1; + } + + /// Flush any deferred nulls to the desired array builder. + fn flush_deferred_nulls( + deferred_nulls: &mut usize, + child: &mut B, + offsets: &mut Vec, + types: &mut Vec, + type_id: i8, + ) { + let offset = child.len().try_into().unwrap(); + // For each null we also have to update the offsets and types + for _ in 0..*deferred_nulls { + offsets.push(offset); + types.push(type_id); + child.push_null(); + } + + *deferred_nulls = 0; + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_geom| self.push_geometry(maybe_geom)) + .unwrap(); + } + + /// Create this builder from a slice of nullable Geometries. + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: GeometryType, + ) -> GeoArrowResult { + let capacity = GeometryCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, GeometryType)> for GeometryBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, GeometryType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for GeometryBuilder { + fn len(&self) -> usize { + self.types.len() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::CoordType; +// use wkt::wkt; + +// use super::*; +// use crate::GeoArrowArray; + +// #[test] +// fn all_items_null() { +// // Testing the behavior of deferred nulls when there are no valid geometries. +// let typ = GeometryType::new(Default::default()); +// let mut builder = GeometryBuilder::new(typ); + +// builder.push_null(); +// builder.push_null(); +// builder.push_null(); + +// let array = builder.finish(); +// assert_eq!(array.logical_null_count(), 3); + +// // We expect the nulls to be placed in (canonically) the first child +// assert_eq!(array.points[0].logical_null_count(), 3); +// } + +// #[test] +// fn deferred_nulls() { +// let coord_type = CoordType::Interleaved; +// let typ = GeometryType::new(Default::default()).with_coord_type(coord_type); + +// let mut builder = GeometryBuilder::new(typ); +// builder.push_null(); +// builder.push_null(); + +// let linestring_arr = crate::test::linestring::array(coord_type, Dimension::XYZ); +// let linestring_arr_null_count = linestring_arr.logical_null_count(); + +// // Push the geometries from the linestring arr onto the geometry builder +// for geom in linestring_arr.iter() { +// builder +// .push_geometry(geom.transpose().unwrap().as_ref()) +// .unwrap(); +// } + +// let geom_arr = builder.finish(); + +// // Since there are 2 nulls pushed manually and a third from the LineString arr +// let total_expected_null_count = 2 + linestring_arr_null_count; +// assert_eq!(geom_arr.logical_null_count(), total_expected_null_count); + +// // All nulls should be in the XYZ linestring child +// assert_eq!( +// geom_arr.line_strings[Dimension::XYZ.order()].logical_null_count(), +// total_expected_null_count +// ); +// } + +// #[test] +// fn later_nulls_after_deferred_nulls_pushed_directly() { +// let coord_type = CoordType::Interleaved; +// let typ = GeometryType::new(Default::default()).with_coord_type(coord_type); + +// let mut builder = GeometryBuilder::new(typ); +// builder.push_null(); +// builder.push_null(); + +// let point = wkt! { POINT Z (30. 10. 40.) }; +// builder.push_point(Some(&point)).unwrap(); + +// let ls = wkt! { LINESTRING (30. 10., 10. 30., 40. 40.) }; +// builder.push_line_string(Some(&ls)).unwrap(); + +// builder.push_null(); +// builder.push_null(); + +// let geom_arr = builder.finish(); + +// assert_eq!(geom_arr.logical_null_count(), 4); + +// // The first two nulls get added to the point z child because those are deferred and the +// // point z is the first non-null geometry added. +// assert_eq!( +// geom_arr.points[Dimension::XYZ.order()].logical_null_count(), +// 2 +// ); + +// // The last two nulls get added to the linestring XY child because the current +// // implementation looks through all XY arrays then all XYZ then etc looking for the first +// // non-empty array. Since the linestring XY child is non-empty, the last nulls get pushed +// // here. +// assert_eq!( +// geom_arr.line_strings[Dimension::XY.order()].logical_null_count(), +// 2 +// ); +// } + +// // Test pushing nulls that are added after a valid geometry has been pushed. +// #[test] +// fn nulls_no_deferred() { +// let coord_type = CoordType::Interleaved; +// let typ = GeometryType::new(Default::default()).with_coord_type(coord_type); + +// let mut builder = GeometryBuilder::new(typ); +// let point = wkt! { POINT Z (30. 10. 40.) }; +// builder.push_point(Some(&point)).unwrap(); +// builder.push_null(); +// builder.push_null(); + +// let geom_arr = builder.finish(); +// assert_eq!(geom_arr.logical_null_count(), 2); +// // All nulls should be in point XYZ child. +// assert_eq!( +// geom_arr.points[Dimension::XYZ.order()].logical_null_count(), +// 2 +// ); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/builder/geometrycollection.rs b/src/geoarrow/geoarrow-array/src/builder/geometrycollection.rs new file mode 100644 index 0000000000..e7e2167cd6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/geometrycollection.rs @@ -0,0 +1,371 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, + MultiPolygonTrait, PointTrait, PolygonTrait, +}; +use geoarrow_schema::{ + Dimension, GeometryCollectionType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, GeometryCollectionArray}, + builder::{ + MixedGeometryBuilder, OffsetsBuilder, + geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + }, + capacity::GeometryCollectionCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of +/// GeometryCollections. +/// +/// Converting an [`GeometryCollectionBuilder`] into a [`GeometryCollectionArray`] is `O(1)`. +#[derive(Debug)] +pub struct GeometryCollectionBuilder { + data_type: GeometryCollectionType, + + pub(crate) geoms: MixedGeometryBuilder, + + pub(crate) geom_offsets: OffsetsBuilder, + + pub(crate) validity: NullBufferBuilder, +} + +impl<'a> GeometryCollectionBuilder { + /// Creates a new empty [`GeometryCollectionBuilder`]. + pub fn new(typ: GeometryCollectionType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new empty [`GeometryCollectionBuilder`] with the provided + /// [capacity][GeometryCollectionCapacity]. + pub fn with_capacity( + typ: GeometryCollectionType, + capacity: GeometryCollectionCapacity, + ) -> Self { + Self { + geoms: MixedGeometryBuilder::with_capacity_and_options( + typ.dimension(), + capacity.mixed_capacity, + typ.coord_type(), + ), + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), + validity: NullBufferBuilder::new(capacity.geom_capacity), + data_type: typ, + } + } + + /// Change whether to prefer multi or single arrays for new single-part geometries. + /// + /// If `true`, a new `Point` will be added to the `MultiPointBuilder` child array, a new + /// `LineString` will be added to the `MultiLineStringBuilder` child array, and a new `Polygon` + /// will be added to the `MultiPolygonBuilder` child array. + /// + /// This can be desired when the user wants to downcast the array to a single geometry array + /// later, as casting to a, say, `MultiPointArray` from a `GeometryCollectionArray` could be + /// done zero-copy. + /// + /// Note that only geometries added _after_ this method is called will be affected. + pub fn with_prefer_multi(self, prefer_multi: bool) -> Self { + Self { + geoms: self.geoms.with_prefer_multi(prefer_multi), + ..self + } + } + + /// Reserves capacity for at least `additional` more GeometryCollections. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: GeometryCollectionCapacity) { + self.geoms.reserve(additional.mixed_capacity); + self.geom_offsets.reserve(additional.geom_capacity); + } + + /// Reserves the minimum capacity for at least `additional` more GeometryCollections. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: GeometryCollectionCapacity) { + self.geoms.reserve_exact(additional.mixed_capacity); + self.geom_offsets.reserve_exact(additional.geom_capacity); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.geoms.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Consume the builder and convert to an immutable [`GeometryCollectionArray`] + pub fn finish(mut self) -> GeometryCollectionArray { + let validity = self.validity.finish(); + GeometryCollectionArray::new( + self.geoms.finish(), + self.geom_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Push a Point onto the end of this builder + #[inline] + fn push_point(&mut self, value: Option<&impl PointTrait>) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_point(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a LineString onto the end of this builder + #[inline] + fn push_line_string( + &mut self, + value: Option<&impl LineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_line_string(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a Polygon onto the end of this builder + #[inline] + fn push_polygon(&mut self, value: Option<&impl PolygonTrait>) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_polygon(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a MultiPoint onto the end of this builder + #[inline] + fn push_multi_point( + &mut self, + value: Option<&impl MultiPointTrait>, + ) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_multi_point(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a MultiLineString onto the end of this builder + #[inline] + fn push_multi_line_string( + &mut self, + value: Option<&impl MultiLineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_multi_line_string(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a MultiPolygon onto the end of this builder + #[inline] + fn push_multi_polygon( + &mut self, + value: Option<&impl MultiPolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(geom) = value { + self.geoms.push_multi_polygon(geom)?; + self.geom_offsets.try_push_usize(1)?; + self.validity.append(value.is_some()); + } else { + self.push_null(); + } + Ok(()) + } + + /// Push a Geometry onto the end of this builder + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + use geo_traits::GeometryType::*; + + if let Some(g) = value { + match g.as_type() { + Point(p) => self.push_point(Some(p))?, + LineString(p) => { + self.push_line_string(Some(p))?; + } + Polygon(p) => self.push_polygon(Some(p))?, + MultiPoint(p) => self.push_multi_point(Some(p))?, + MultiLineString(p) => self.push_multi_line_string(Some(p))?, + MultiPolygon(p) => self.push_multi_polygon(Some(p))?, + GeometryCollection(p) => self.push_geometry_collection(Some(p))?, + Rect(r) => self.push_polygon(Some(&RectWrapper::try_new(r)?))?, + Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?, + Line(l) => self.push_line_string(Some(&LineWrapper(l)))?, + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Push a GeometryCollection onto the end of this builder + #[inline] + pub fn push_geometry_collection( + &mut self, + value: Option<&impl GeometryCollectionTrait>, + ) -> GeoArrowResult<()> { + if let Some(gc) = value { + let num_geoms = gc.num_geometries(); + for g in gc.geometries() { + self.geoms.push_geometry(&g)?; + } + self.try_push_length(num_geoms)?; + } else { + self.push_null(); + } + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_gc| self.push_geometry_collection(maybe_gc)) + .unwrap(); + } + + #[inline] + pub(crate) fn try_push_length(&mut self, geom_offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(geom_offsets_length)?; + self.validity.append(true); + Ok(()) + } + + #[inline] + pub(crate) fn push_null(&mut self) { + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_geometry_collections( + geoms: &[impl GeometryCollectionTrait], + typ: GeometryCollectionType, + ) -> GeoArrowResult { + let capacity = + GeometryCollectionCapacity::from_geometry_collections(geoms.iter().map(Some))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + Ok(array) + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometry_collections( + geoms: &[Option>], + typ: GeometryCollectionType, + ) -> GeoArrowResult { + let capacity = GeometryCollectionCapacity::from_geometry_collections( + geoms.iter().map(|x| x.as_ref()), + )?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + Ok(array) + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: GeometryCollectionType, + ) -> GeoArrowResult { + let capacity = + GeometryCollectionCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + for geom in geoms { + array.push_geometry(geom.as_ref())?; + } + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, GeometryCollectionType)> + for GeometryCollectionBuilder +{ + type Error = GeoArrowError; + + fn try_from( + (value, typ): (GenericWkbArray, GeometryCollectionType), + ) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for GeometryCollectionBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for GeometryCollectionBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 7; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/linestring.rs b/src/geoarrow/geoarrow-array/src/builder/linestring.rs new file mode 100644 index 0000000000..03e53e9ecf --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/linestring.rs @@ -0,0 +1,286 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait}; +use geoarrow_schema::{ + Dimension, LineStringType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, LineStringArray}, + builder::{CoordBufferBuilder, OffsetsBuilder, geo_trait_wrappers::LineWrapper}, + capacity::LineStringCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of LineStrings. +/// +/// Converting an [`LineStringBuilder`] into a [`LineStringArray`] is `O(1)`. +#[derive(Debug)] +pub struct LineStringBuilder { + data_type: LineStringType, + + pub(crate) coords: CoordBufferBuilder, + + /// Offsets into the coordinate array where each geometry starts + pub(crate) geom_offsets: OffsetsBuilder, + + /// Validity is only defined at the geometry level + pub(crate) validity: NullBufferBuilder, +} + +impl LineStringBuilder { + /// Creates a new empty [`LineStringBuilder`]. + pub fn new(typ: LineStringType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`LineStringBuilder`] with a capacity. + pub fn with_capacity(typ: LineStringType, capacity: LineStringCapacity) -> Self { + let coords = CoordBufferBuilder::with_capacity( + capacity.coord_capacity, + typ.coord_type(), + typ.dimension(), + ); + Self { + coords, + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity()), + validity: NullBufferBuilder::new(capacity.geom_capacity()), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more LineStrings. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: LineStringCapacity) { + self.coords.reserve(additional.coord_capacity()); + self.geom_offsets.reserve(additional.geom_capacity()); + } + + /// Reserves the minimum capacity for at least `additional` more LineStrings. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: LineStringCapacity) { + self.coords.reserve_exact(additional.coord_capacity()); + self.geom_offsets.reserve_exact(additional.geom_capacity()); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Needs to be called when a valid value was extended to this array. + /// This is a relatively low level function, prefer `try_push` when you can. + #[inline] + pub(crate) fn try_push_length(&mut self, geom_offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(geom_offsets_length)?; + self.validity.append(true); + Ok(()) + } + + /// Add a valid but empty LineString to the end of this array. + #[inline] + pub fn push_empty(&mut self) { + self.geom_offsets.extend_constant(1); + self.validity.append(true); + } + + /// Add a new null value to the end of this array. + #[inline] + pub(crate) fn push_null(&mut self) { + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Consume the builder and convert to an immutable [`LineStringArray`] + pub fn finish(mut self) -> LineStringArray { + let validity = self.validity.finish(); + LineStringArray::new( + self.coords.finish(), + self.geom_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_line_strings(geoms: &[impl LineStringTrait], typ: LineStringType) -> Self { + let capacity = LineStringCapacity::from_line_strings(geoms.iter().map(Some)); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_line_strings( + geoms: &[Option>], + typ: LineStringType, + ) -> Self { + let capacity = LineStringCapacity::from_line_strings(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } + + /// Add a new LineString to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_line_string( + &mut self, + value: Option<&impl LineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(line_string) = value { + let num_coords = line_string.num_coords(); + for coord in line_string.coords() { + self.coords.try_push_coord(&coord)?; + } + self.try_push_length(num_coords)?; + } else { + self.push_null(); + } + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_line_string(maybe_multi_point)) + .unwrap(); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Push a raw coordinate to the underlying coordinate array. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw coordinates to the array upholds the + /// necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + self.coords.try_push_coord(coord) + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not LineString or a MultiLineString with length 1. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::LineString(g) => self.push_line_string(Some(g))?, + GeometryType::MultiLineString(ml) => { + let num_line_strings = ml.num_line_strings(); + if num_line_strings == 0 { + self.push_empty(); + } else if num_line_strings == 1 { + self.push_line_string(Some(&ml.line_string(0).unwrap()))? + } else { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiLineString with only one LineString in LineStringBuilder, got {num_line_strings} line strings", + ))); + } + } + GeometryType::Line(l) => self.push_line_string(Some(&LineWrapper(l)))?, + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected LineString, got {}", + gt.name() + ))); + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: LineStringType, + ) -> GeoArrowResult { + let capacity = LineStringCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, LineStringType)> for LineStringBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, LineStringType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for LineStringBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for LineStringBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 2; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/mixed.rs b/src/geoarrow/geoarrow-array/src/builder/mixed.rs new file mode 100644 index 0000000000..bf0ba93d11 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/mixed.rs @@ -0,0 +1,366 @@ +use geo_traits::*; +use geoarrow_schema::{ + CoordType, Dimension, LineStringType, MultiLineStringType, MultiPointType, MultiPolygonType, + PointType, PolygonType, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{ + array::MixedGeometryArray, + builder::{ + LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, MultiPolygonBuilder, + PointBuilder, PolygonBuilder, + geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + }, + capacity::MixedCapacity, + trait_::GeoArrowArrayBuilder, +}; + +pub(crate) const DEFAULT_PREFER_MULTI: bool = false; + +/// The GeoArrow equivalent to a `Vec>`: a mutable collection of Geometries, all +/// of which have the same dimension. +/// +/// This currently has the caveat that these geometries must be a _primitive_ geometry type. This +/// does not currently support nested GeometryCollection objects. +/// +/// Converting an [`MixedGeometryBuilder`] into a [`MixedGeometryArray`] is `O(1)`. +/// +/// # Invariants +/// +/// - All arrays must have the same dimension +/// - All arrays must have the same coordinate layout (interleaved or separated) +#[derive(Debug)] +pub(crate) struct MixedGeometryBuilder { + /// The dimension of this builder. + /// + /// All underlying arrays must contain a coordinate buffer of this same dimension. + dim: Dimension, + + // Invariant: every item in `types` is `> 0 && < fields.len()` + types: Vec, + + pub(crate) points: PointBuilder, + pub(crate) line_strings: LineStringBuilder, + pub(crate) polygons: PolygonBuilder, + pub(crate) multi_points: MultiPointBuilder, + pub(crate) multi_line_strings: MultiLineStringBuilder, + pub(crate) multi_polygons: MultiPolygonBuilder, + + // Invariant: `offsets.len() == types.len()` + offsets: Vec, + + /// Whether to prefer multi or single arrays for new geometries. + /// + /// E.g. if this is `true` and a Point geometry is added, it will be added to the + /// MultiPointBuilder. If this is `false`, the Point geometry will be added to the + /// PointBuilder. + /// + /// The idea is that always adding multi-geometries will make it easier to downcast later. + pub(crate) prefer_multi: bool, +} + +impl MixedGeometryBuilder { + pub(crate) fn with_capacity_and_options( + dim: Dimension, + capacity: MixedCapacity, + coord_type: CoordType, + ) -> Self { + // Don't store array metadata on child arrays + Self { + dim, + types: vec![], + points: PointBuilder::with_capacity( + PointType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.point, + ), + line_strings: LineStringBuilder::with_capacity( + LineStringType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.line_string, + ), + polygons: PolygonBuilder::with_capacity( + PolygonType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.polygon, + ), + multi_points: MultiPointBuilder::with_capacity( + MultiPointType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_point, + ), + multi_line_strings: MultiLineStringBuilder::with_capacity( + MultiLineStringType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_line_string, + ), + multi_polygons: MultiPolygonBuilder::with_capacity( + MultiPolygonType::new(dim, Default::default()).with_coord_type(coord_type), + capacity.multi_polygon, + ), + offsets: vec![], + prefer_multi: DEFAULT_PREFER_MULTI, + } + } + + pub(crate) fn with_prefer_multi(self, prefer_multi: bool) -> Self { + Self { + prefer_multi, + ..self + } + } + + pub(crate) fn reserve(&mut self, capacity: MixedCapacity) { + let total_num_geoms = capacity.total_num_geoms(); + self.types.reserve(total_num_geoms); + self.offsets.reserve(total_num_geoms); + self.points.reserve(capacity.point); + self.line_strings.reserve(capacity.line_string); + self.polygons.reserve(capacity.polygon); + self.multi_points.reserve(capacity.multi_point); + self.multi_line_strings.reserve(capacity.multi_line_string); + self.multi_polygons.reserve(capacity.multi_polygon); + } + + pub(crate) fn reserve_exact(&mut self, capacity: MixedCapacity) { + let total_num_geoms = capacity.total_num_geoms(); + self.types.reserve_exact(total_num_geoms); + self.offsets.reserve_exact(total_num_geoms); + self.points.reserve_exact(capacity.point); + self.line_strings.reserve_exact(capacity.line_string); + self.polygons.reserve_exact(capacity.polygon); + self.multi_points.reserve_exact(capacity.multi_point); + self.multi_line_strings + .reserve_exact(capacity.multi_line_string); + self.multi_polygons.reserve_exact(capacity.multi_polygon); + } + + /// Shrinks the capacity of self to fit. + pub(crate) fn shrink_to_fit(&mut self) { + self.types.shrink_to_fit(); + self.offsets.shrink_to_fit(); + self.points.shrink_to_fit(); + self.line_strings.shrink_to_fit(); + self.polygons.shrink_to_fit(); + self.multi_points.shrink_to_fit(); + self.multi_line_strings.shrink_to_fit(); + self.multi_polygons.shrink_to_fit(); + } + + pub(crate) fn finish(self) -> MixedGeometryArray { + MixedGeometryArray::new( + self.types.into(), + self.offsets.into(), + Some(self.points.finish()), + Some(self.line_strings.finish()), + Some(self.polygons.finish()), + Some(self.multi_points.finish()), + Some(self.multi_line_strings.finish()), + Some(self.multi_polygons.finish()), + ) + } + + /// Add a new Point to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPointBuilder` child + /// array. Otherwise, it will be stored in the `PointBuilder` child array. + #[inline] + pub(crate) fn push_point(&mut self, value: &impl PointTrait) -> GeoArrowResult<()> { + if self.prefer_multi { + self.add_multi_point_type(); + self.multi_points.push_point(Some(value)) + } else { + self.add_point_type(); + self.points.push_point(Some(value)); + Ok(()) + } + } + + #[inline] + fn add_point_type(&mut self) { + self.offsets.push(self.points.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(1), + Dimension::XYZ => self.types.push(11), + Dimension::XYM => self.types.push(21), + Dimension::XYZM => self.types.push(31), + } + } + + /// Add a new LineString to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiLineStringBuilder` child + /// array. Otherwise, it will be stored in the `LineStringBuilder` child array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub(crate) fn push_line_string( + &mut self, + value: &impl LineStringTrait, + ) -> GeoArrowResult<()> { + if self.prefer_multi { + self.add_multi_line_string_type(); + self.multi_line_strings.push_line_string(Some(value)) + } else { + self.add_line_string_type(); + self.line_strings.push_line_string(Some(value)) + } + } + + #[inline] + fn add_line_string_type(&mut self) { + self.offsets + .push(self.line_strings.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(2), + Dimension::XYZ => self.types.push(12), + Dimension::XYM => self.types.push(22), + Dimension::XYZM => self.types.push(32), + } + } + + /// Add a new Polygon to the end of this array. + /// + /// If `self.prefer_multi` is `true`, it will be stored in the `MultiPolygonBuilder` child + /// array. Otherwise, it will be stored in the `PolygonBuilder` child array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub(crate) fn push_polygon( + &mut self, + value: &impl PolygonTrait, + ) -> GeoArrowResult<()> { + if self.prefer_multi { + self.add_multi_polygon_type(); + self.multi_polygons.push_polygon(Some(value)) + } else { + self.add_polygon_type(); + self.polygons.push_polygon(Some(value)) + } + } + + #[inline] + fn add_polygon_type(&mut self) { + self.offsets.push(self.polygons.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(3), + Dimension::XYZ => self.types.push(13), + Dimension::XYM => self.types.push(23), + Dimension::XYZM => self.types.push(33), + } + } + + /// Add a new MultiPoint to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub(crate) fn push_multi_point( + &mut self, + value: &impl MultiPointTrait, + ) -> GeoArrowResult<()> { + self.add_multi_point_type(); + self.multi_points.push_multi_point(Some(value)) + } + + #[inline] + fn add_multi_point_type(&mut self) { + self.offsets + .push(self.multi_points.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(4), + Dimension::XYZ => self.types.push(14), + Dimension::XYM => self.types.push(24), + Dimension::XYZM => self.types.push(34), + } + } + + /// Add a new MultiLineString to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub(crate) fn push_multi_line_string( + &mut self, + value: &impl MultiLineStringTrait, + ) -> GeoArrowResult<()> { + self.add_multi_line_string_type(); + self.multi_line_strings.push_multi_line_string(Some(value)) + } + + #[inline] + fn add_multi_line_string_type(&mut self) { + self.offsets + .push(self.multi_line_strings.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(5), + Dimension::XYZ => self.types.push(15), + Dimension::XYM => self.types.push(25), + Dimension::XYZM => self.types.push(35), + } + } + + /// Add a new MultiPolygon to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub(crate) fn push_multi_polygon( + &mut self, + value: &impl MultiPolygonTrait, + ) -> GeoArrowResult<()> { + self.add_multi_polygon_type(); + self.multi_polygons.push_multi_polygon(Some(value)) + } + + #[inline] + fn add_multi_polygon_type(&mut self) { + self.offsets + .push(self.multi_polygons.len().try_into().unwrap()); + match self.dim { + Dimension::XY => self.types.push(6), + Dimension::XYZ => self.types.push(16), + Dimension::XYM => self.types.push(26), + Dimension::XYZM => self.types.push(36), + } + } + + #[inline] + pub(crate) fn push_geometry( + &mut self, + geom: &'_ impl GeometryTrait, + ) -> GeoArrowResult<()> { + use geo_traits::GeometryType::*; + + match geom.as_type() { + Point(g) => { + self.push_point(g)?; + } + LineString(g) => { + self.push_line_string(g)?; + } + Polygon(g) => { + self.push_polygon(g)?; + } + MultiPoint(p) => self.push_multi_point(p)?, + MultiLineString(p) => self.push_multi_line_string(p)?, + MultiPolygon(p) => self.push_multi_polygon(p)?, + GeometryCollection(gc) => { + if gc.num_geometries() == 1 { + self.push_geometry(&gc.geometry(0).unwrap())? + } else { + return Err(GeoArrowError::InvalidGeoArrow( + "nested geometry collections not supported in GeoArrow".to_string(), + )); + } + } + Rect(r) => self.push_polygon(&RectWrapper::try_new(r)?)?, + Triangle(tri) => self.push_polygon(&TriangleWrapper(tri))?, + Line(l) => self.push_line_string(&LineWrapper(l))?, + }; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/mod.rs b/src/geoarrow/geoarrow-array/src/builder/mod.rs new file mode 100644 index 0000000000..6e332d3c3d --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/mod.rs @@ -0,0 +1,30 @@ +//! Push-based APIs for constructing arrays. + +mod coord; +pub(crate) mod geo_trait_wrappers; +mod geometry; +mod geometrycollection; +mod linestring; +mod mixed; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod offsets; +mod point; +mod polygon; +mod rect; +mod wkb; + +pub use coord::{CoordBufferBuilder, InterleavedCoordBufferBuilder, SeparatedCoordBufferBuilder}; +pub use geometry::GeometryBuilder; +pub use geometrycollection::GeometryCollectionBuilder; +pub use linestring::LineStringBuilder; +pub(crate) use mixed::MixedGeometryBuilder; +pub use multilinestring::MultiLineStringBuilder; +pub use multipoint::MultiPointBuilder; +pub use multipolygon::MultiPolygonBuilder; +pub(crate) use offsets::OffsetsBuilder; +pub use point::PointBuilder; +pub use polygon::PolygonBuilder; +pub use rect::RectBuilder; +pub use wkb::WkbBuilder; diff --git a/src/geoarrow/geoarrow-array/src/builder/multilinestring.rs b/src/geoarrow/geoarrow-array/src/builder/multilinestring.rs new file mode 100644 index 0000000000..868c767c0c --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/multilinestring.rs @@ -0,0 +1,383 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait}; +use geoarrow_schema::{ + Dimension, MultiLineStringType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, MultiLineStringArray}, + builder::{CoordBufferBuilder, OffsetsBuilder}, + capacity::MultiLineStringCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of +/// MultiLineStrings. +/// +/// Converting an [`MultiLineStringBuilder`] into a [`MultiLineStringArray`] is `O(1)`. +#[derive(Debug)] +pub struct MultiLineStringBuilder { + data_type: MultiLineStringType, + + pub(crate) coords: CoordBufferBuilder, + + /// OffsetsBuilder into the ring array where each geometry starts + pub(crate) geom_offsets: OffsetsBuilder, + + /// OffsetsBuilder into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetsBuilder, + + /// Validity is only defined at the geometry level + pub(crate) validity: NullBufferBuilder, +} + +impl MultiLineStringBuilder { + /// Creates a new empty [`MultiLineStringBuilder`]. + pub fn new(typ: MultiLineStringType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`MultiLineStringBuilder`] with a capacity. + pub fn with_capacity(typ: MultiLineStringType, capacity: MultiLineStringCapacity) -> Self { + let coords = CoordBufferBuilder::with_capacity( + capacity.coord_capacity, + typ.coord_type(), + typ.dimension(), + ); + Self { + coords, + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), + ring_offsets: OffsetsBuilder::with_capacity(capacity.ring_capacity), + validity: NullBufferBuilder::new(capacity.geom_capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more MultiLineStrings. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: MultiLineStringCapacity) { + self.coords.reserve(additional.coord_capacity); + self.ring_offsets.reserve(additional.ring_capacity); + self.geom_offsets.reserve(additional.geom_capacity); + } + + /// Reserves the minimum capacity for at least `additional` more MultiLineStrings. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: MultiLineStringCapacity) { + self.coords.reserve_exact(additional.coord_capacity); + self.ring_offsets.reserve_exact(additional.ring_capacity); + self.geom_offsets.reserve_exact(additional.geom_capacity); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + self.ring_offsets.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// The canonical method to create a [`MultiLineStringBuilder`] out of its internal + /// components. + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// - if the validity is not `None` and its length is different from the number of geometries + /// - if the largest ring offset does not match the number of coordinates + /// - if the largest geometry offset does not match the size of ring offsets + pub fn try_new( + coords: CoordBufferBuilder, + geom_offsets: OffsetsBuilder, + ring_offsets: OffsetsBuilder, + validity: NullBufferBuilder, + data_type: MultiLineStringType, + ) -> GeoArrowResult { + // check( + // &coords.clone().into(), + // &geom_offsets.clone().into(), + // &ring_offsets.clone().into(), + // validity.as_ref().map(|x| x.len()), + // )?; + Ok(Self { + coords, + geom_offsets, + ring_offsets, + validity, + data_type, + }) + } + + /// Push a raw offset to the underlying geometry offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_geom_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(offsets_length)?; + self.validity.append(true); + Ok(()) + } + + /// Push a raw offset to the underlying ring offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_ring_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.ring_offsets.try_push_usize(offsets_length)?; + Ok(()) + } + + /// Consume the builder and convert to an immutable [`MultiLineStringArray`] + pub fn finish(mut self) -> MultiLineStringArray { + let validity = self.validity.finish(); + + MultiLineStringArray::new( + self.coords.finish(), + self.geom_offsets.finish(), + self.ring_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Add a new LineString to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_line_string( + &mut self, + value: Option<&impl LineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(line_string) = value { + // Total number of linestrings in this multilinestring + let num_line_strings = 1; + self.geom_offsets.try_push_usize(num_line_strings)?; + + // For each ring: + // - Get ring + // - Add ring's # of coords to self.ring_offsets + // - Push ring's coords to self.coords + + self.ring_offsets.try_push_usize(line_string.num_coords())?; + + for coord in line_string.coords() { + self.coords.push_coord(&coord); + } + + self.validity.append(true); + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new MultiLineString to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_multi_line_string( + &mut self, + value: Option<&impl MultiLineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_line_string) = value { + // Total number of linestrings in this multilinestring + let num_line_strings = multi_line_string.num_line_strings(); + self.geom_offsets.try_push_usize(num_line_strings)?; + + // For each ring: + // - Get ring + // - Add ring's # of coords to self.ring_offsets + // - Push ring's coords to self.coords + + // Number of coords for each ring + for line_string in multi_line_string.line_strings() { + self.ring_offsets.try_push_usize(line_string.num_coords())?; + + for coord in line_string.coords() { + self.coords.push_coord(&coord); + } + } + + self.validity.append(true); + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not LineString or MultiLineString. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::LineString(g) => self.push_line_string(Some(g))?, + GeometryType::MultiLineString(g) => self.push_multi_line_string(Some(g))?, + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiLineString compatible geometry, got {}", + gt.name() + ))); + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_multi_line_string(maybe_multi_point)) + .unwrap(); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Push a raw coordinate to the underlying coordinate array. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw coordinates + /// to the array upholds the necessary invariants of the array. + #[inline] + pub(crate) fn push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + self.coords.push_coord(coord); + Ok(()) + } + + #[inline] + pub(crate) fn push_null(&mut self) { + // NOTE! Only the geom_offsets array needs to get extended, because the next geometry will + // point to the same ring array location + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_multi_line_strings( + geoms: &[impl MultiLineStringTrait], + typ: MultiLineStringType, + ) -> Self { + let capacity = MultiLineStringCapacity::from_multi_line_strings(geoms.iter().map(Some)); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_multi_line_strings( + geoms: &[Option>], + typ: MultiLineStringType, + ) -> Self { + let capacity = + MultiLineStringCapacity::from_multi_line_strings(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: MultiLineStringType, + ) -> GeoArrowResult { + let capacity = MultiLineStringCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, MultiLineStringType)> + for MultiLineStringBuilder +{ + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, MultiLineStringType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for MultiLineStringBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for MultiLineStringBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 5; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/multipoint.rs b/src/geoarrow/geoarrow-array/src/builder/multipoint.rs new file mode 100644 index 0000000000..03d3b7f858 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/multipoint.rs @@ -0,0 +1,282 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{CoordTrait, GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; +use geoarrow_schema::{ + Dimension, MultiPointType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, MultiPointArray}, + builder::{CoordBufferBuilder, OffsetsBuilder}, + capacity::MultiPointCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of MultiPoints. +/// +/// Converting an [`MultiPointBuilder`] into a [`MultiPointArray`] is `O(1)`. +#[derive(Debug)] +pub struct MultiPointBuilder { + data_type: MultiPointType, + + coords: CoordBufferBuilder, + + geom_offsets: OffsetsBuilder, + + /// Validity is only defined at the geometry level + validity: NullBufferBuilder, +} + +impl MultiPointBuilder { + /// Creates a new empty [`MultiPointBuilder`]. + pub fn new(typ: MultiPointType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`MultiPointBuilder`] with a capacity. + pub fn with_capacity(typ: MultiPointType, capacity: MultiPointCapacity) -> Self { + let coords = CoordBufferBuilder::with_capacity( + capacity.coord_capacity, + typ.coord_type(), + typ.dimension(), + ); + Self { + coords, + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), + validity: NullBufferBuilder::new(capacity.geom_capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more MultiPoints. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, capacity: MultiPointCapacity) { + self.coords.reserve(capacity.coord_capacity); + self.geom_offsets.reserve(capacity.geom_capacity); + } + + /// Reserves the minimum capacity for at least `additional` more MultiPoints. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, capacity: MultiPointCapacity) { + self.coords.reserve_exact(capacity.coord_capacity); + self.geom_offsets.reserve_exact(capacity.geom_capacity); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Consume the builder and convert to an immutable [`MultiPointArray`] + pub fn finish(mut self) -> MultiPointArray { + let validity = self.validity.finish(); + MultiPointArray::new( + self.coords.finish(), + self.geom_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_multi_point(maybe_multi_point)) + .unwrap(); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Add a new Point to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_point(&mut self, value: Option<&impl PointTrait>) -> GeoArrowResult<()> { + if let Some(point) = value { + self.coords.push_point(point); + self.try_push_length(1)?; + } else { + self.push_null(); + } + + Ok(()) + } + + /// Add a new MultiPoint to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_multi_point( + &mut self, + value: Option<&impl MultiPointTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_point) = value { + let num_points = multi_point.num_points(); + for point in multi_point.points() { + self.coords.push_point(&point); + } + self.try_push_length(num_points)?; + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Point or MultiPoint. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::Point(g) => self.push_point(Some(g))?, + GeometryType::MultiPoint(g) => self.push_multi_point(Some(g))?, + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiPoint compatible geometry, got {}", + gt.name() + ))); + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Push a raw coordinate to the underlying coordinate array. + /// + /// # Invariant + /// + /// Care must be taken to ensure that pushing raw coordinates to the array upholds the + /// necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + self.coords.try_push_coord(coord) + } + + /// Needs to be called when a valid value was extended to this array. + /// This is a relatively low level function, prefer `try_push` when you can. + #[inline] + pub(crate) fn try_push_length(&mut self, geom_offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(geom_offsets_length)?; + self.validity.append(true); + Ok(()) + } + + #[inline] + pub(crate) fn push_null(&mut self) { + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_multi_points(geoms: &[impl MultiPointTrait], typ: MultiPointType) -> Self { + let capacity = MultiPointCapacity::from_multi_points(geoms.iter().map(Some)); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_multi_points( + geoms: &[Option>], + typ: MultiPointType, + ) -> Self { + let capacity = MultiPointCapacity::from_multi_points(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: MultiPointType, + ) -> GeoArrowResult { + let capacity = MultiPointCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, MultiPointType)> for MultiPointBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, MultiPointType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for MultiPointBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for MultiPointBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 4; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/multipolygon.rs b/src/geoarrow/geoarrow-array/src/builder/multipolygon.rs new file mode 100644 index 0000000000..713cced377 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/multipolygon.rs @@ -0,0 +1,403 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{ + CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiPolygonTrait, PolygonTrait, +}; +use geoarrow_schema::{ + Dimension, MultiPolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, MultiPolygonArray}, + builder::{CoordBufferBuilder, OffsetsBuilder, geo_trait_wrappers::RectWrapper}, + capacity::MultiPolygonCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of MultiPolygons. +/// +/// Converting an [`MultiPolygonBuilder`] into a [`MultiPolygonArray`] is `O(1)`. +#[derive(Debug)] +pub struct MultiPolygonBuilder { + data_type: MultiPolygonType, + + pub(crate) coords: CoordBufferBuilder, + + /// OffsetsBuilder into the polygon array where each geometry starts + pub(crate) geom_offsets: OffsetsBuilder, + + /// OffsetsBuilder into the ring array where each polygon starts + pub(crate) polygon_offsets: OffsetsBuilder, + + /// OffsetsBuilder into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetsBuilder, + + /// Validity is only defined at the geometry level + pub(crate) validity: NullBufferBuilder, +} + +impl MultiPolygonBuilder { + /// Creates a new empty [`MultiPolygonBuilder`]. + pub fn new(typ: MultiPolygonType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`MultiPolygonBuilder`] with a capacity. + pub fn with_capacity(typ: MultiPolygonType, capacity: MultiPolygonCapacity) -> Self { + let coords = CoordBufferBuilder::with_capacity( + capacity.coord_capacity, + typ.coord_type(), + typ.dimension(), + ); + Self { + coords, + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), + polygon_offsets: OffsetsBuilder::with_capacity(capacity.polygon_capacity), + ring_offsets: OffsetsBuilder::with_capacity(capacity.ring_capacity), + validity: NullBufferBuilder::new(capacity.geom_capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more MultiPolygons. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: MultiPolygonCapacity) { + self.coords.reserve(additional.coord_capacity); + self.ring_offsets.reserve(additional.ring_capacity); + self.polygon_offsets.reserve(additional.polygon_capacity); + self.geom_offsets.reserve(additional.geom_capacity); + } + + /// Reserves the minimum capacity for at least `additional` more MultiPolygons. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: MultiPolygonCapacity) { + self.coords.reserve_exact(additional.coord_capacity); + self.ring_offsets.reserve_exact(additional.ring_capacity); + self.polygon_offsets + .reserve_exact(additional.polygon_capacity); + self.geom_offsets.reserve_exact(additional.geom_capacity); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + self.ring_offsets.shrink_to_fit(); + self.polygon_offsets.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Consume the builder and convert to an immutable [`MultiPolygonArray`] + pub fn finish(mut self) -> MultiPolygonArray { + let validity = self.validity.finish(); + + MultiPolygonArray::new( + self.coords.finish(), + self.geom_offsets.finish(), + self.polygon_offsets.finish(), + self.ring_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Add a new Polygon to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_polygon( + &mut self, + value: Option<&impl PolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(polygon) = value { + let exterior_ring = polygon.exterior(); + if exterior_ring.is_none() { + self.push_empty()?; + return Ok(()); + } + + if let Some(ext_ring) = polygon.exterior() { + // Total number of polygons in this MultiPolygon + let num_polygons = 1; + self.geom_offsets.try_push_usize(num_polygons)?; + + for coord in ext_ring.coords() { + self.coords.push_coord(&coord); + } + + // Total number of rings in this Multipolygon + self.polygon_offsets + .try_push_usize(polygon.num_interiors() + 1)?; + + // Number of coords for each ring + self.ring_offsets.try_push_usize(ext_ring.num_coords())?; + + for int_ring in polygon.interiors() { + self.ring_offsets.try_push_usize(int_ring.num_coords())?; + + for coord in int_ring.coords() { + self.coords.push_coord(&coord); + } + } + } else { + let num_polygons = 0; + self.geom_offsets.try_push_usize(num_polygons)?; + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Add a new MultiPolygon to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_multi_polygon( + &mut self, + value: Option<&impl MultiPolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_polygon) = value { + // Total number of polygons in this MultiPolygon + let num_polygons = multi_polygon.num_polygons(); + self.try_push_geom_offset(num_polygons)?; + + // Iterate over polygons + for polygon in multi_polygon.polygons() { + // Here we unwrap the exterior ring because a polygon inside a multi polygon should + // never be empty. + let ext_ring = polygon.exterior().unwrap(); + for coord in ext_ring.coords() { + self.coords.push_coord(&coord); + } + + // Total number of rings in this Multipolygon + self.polygon_offsets + .try_push_usize(polygon.num_interiors() + 1)?; + + // Number of coords for each ring + self.ring_offsets.try_push_usize(ext_ring.num_coords())?; + + for int_ring in polygon.interiors() { + self.ring_offsets.try_push_usize(int_ring.num_coords())?; + + for coord in int_ring.coords() { + self.coords.push_coord(&coord); + } + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Polygon or MultiPolygon. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::Polygon(g) => self.push_polygon(Some(g))?, + GeometryType::MultiPolygon(g) => self.push_multi_polygon(Some(g))?, + GeometryType::Rect(g) => self.push_polygon(Some(&RectWrapper::try_new(g)?))?, + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiPolygon compatible geometry, got {}", + gt.name() + ))); + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_polygon| self.push_multi_polygon(maybe_multi_polygon)) + .unwrap(); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Push a raw offset to the underlying geometry offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + pub(crate) fn try_push_geom_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(offsets_length)?; + self.validity.append(true); + Ok(()) + } + + /// Push a raw offset to the underlying polygon offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_polygon_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.polygon_offsets.try_push_usize(offsets_length)?; + Ok(()) + } + + /// Push a raw offset to the underlying ring offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_ring_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.ring_offsets.try_push_usize(offsets_length)?; + Ok(()) + } + + /// Push a raw coordinate to the underlying coordinate array. + /// + /// # Safety + /// + /// This is marked as unsafe because care must be taken to ensure that pushing raw coordinates + /// to the array upholds the necessary invariants of the array. + #[inline] + pub unsafe fn push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + self.coords.push_coord(coord); + Ok(()) + } + + #[inline] + pub(crate) fn push_empty(&mut self) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(0)?; + self.validity.append(true); + Ok(()) + } + + #[inline] + pub(crate) fn push_null(&mut self) { + // NOTE! Only the geom_offsets array needs to get extended, because the next geometry will + // point to the same polygon array location + // Note that we don't use self.try_push_geom_offset because that sets validity to true + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_multi_polygons( + geoms: &[impl MultiPolygonTrait], + typ: MultiPolygonType, + ) -> Self { + let capacity = MultiPolygonCapacity::from_multi_polygons(geoms.iter().map(Some)); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_multi_polygons( + geoms: &[Option>], + typ: MultiPolygonType, + ) -> Self { + let capacity = MultiPolygonCapacity::from_multi_polygons(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: MultiPolygonType, + ) -> GeoArrowResult { + let capacity = MultiPolygonCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, MultiPolygonType)> for MultiPolygonBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, MultiPolygonType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for MultiPolygonBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for MultiPolygonBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 6; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/offsets.rs b/src/geoarrow/geoarrow-array/src/builder/offsets.rs new file mode 100644 index 0000000000..d776fee99d --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/offsets.rs @@ -0,0 +1,142 @@ +//! This was originally copied from arrow2. + +use std::hint::unreachable_unchecked; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::OffsetBuffer; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +/// A wrapper type of [`Vec`] representing the invariants of Arrow's offsets. +/// It is guaranteed to (sound to assume that): +/// * every element is `>= 0` +/// * element at position `i` is >= than element at position `i-1`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct OffsetsBuilder(Vec); + +impl Default for OffsetsBuilder { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl OffsetsBuilder { + /// Returns an empty [`OffsetsBuilder`] (i.e. with a single element, the zero) + #[inline] + pub(crate) fn new() -> Self { + Self(vec![O::zero()]) + } + + /// Returns a new [`OffsetsBuilder`] with a capacity, allocating at least `capacity + 1` + /// entries. + pub(crate) fn with_capacity(capacity: usize) -> Self { + let mut offsets = Vec::with_capacity(capacity + 1); + offsets.push(O::zero()); + Self(offsets) + } + + /// Reserves `additional` entries. + pub(crate) fn reserve(&mut self, additional: usize) { + self.0.reserve(additional); + } + + /// Reserves exactly `additional` entries. + pub(crate) fn reserve_exact(&mut self, additional: usize) { + self.0.reserve_exact(additional); + } + + /// Shrinks the capacity of self to fit. + pub(crate) fn shrink_to_fit(&mut self) { + self.0.shrink_to_fit(); + } + + /// Pushes a new element with a given length. + /// # Error + /// This function errors iff the new last item is larger than what `O` supports. + /// # Implementation + /// This function: + /// * checks that this length does not overflow + #[inline] + pub(crate) fn try_push_usize(&mut self, length: usize) -> GeoArrowResult<()> { + let length = O::usize_as(length); + + let old_length = self.last(); + // let new_length = old_length.checked_add(&length).ok_or(Error::Overflow)?; + let new_length = *old_length + length; + self.0.push(new_length); + Ok(()) + } + + /// Returns the last offset of this container. + #[inline] + pub(crate) fn last(&self) -> &O { + match self.0.last() { + Some(element) => element, + None => unsafe { unreachable_unchecked() }, + } + } + + /// Returns the length an array with these offsets would be. + #[inline] + pub(crate) fn len_proxy(&self) -> usize { + self.0.len() - 1 + } + + #[inline] + /// Returns the number of offsets in this container. + pub(crate) fn len(&self) -> usize { + self.0.len() + } + + /// Returns the byte slice stored in this buffer + #[inline] + pub(crate) fn as_slice(&self) -> &[O] { + self.0.as_slice() + } + + /// Extends itself with `additional` elements equal to the last offset. + /// This is useful to extend offsets with empty values, e.g. for null slots. + #[inline] + pub(crate) fn extend_constant(&mut self, additional: usize) { + let offset = *self.last(); + if additional == 1 { + self.0.push(offset) + } else { + self.0.resize(self.len() + additional, offset) + } + } + + pub(crate) fn finish(self) -> OffsetBuffer { + OffsetBuffer::new(self.0.into()) + } +} + +impl From> for OffsetsBuilder { + fn from(offsets: OffsetsBuilder) -> Self { + // this conversion is lossless and uphelds all invariants + Self( + offsets + .as_slice() + .iter() + .map(|x| *x as i64) + .collect::>(), + ) + } +} + +impl TryFrom> for OffsetsBuilder { + type Error = GeoArrowError; + + fn try_from(offsets: OffsetsBuilder) -> GeoArrowResult { + i32::try_from(*offsets.last()).map_err(|_| GeoArrowError::Overflow)?; + + // this conversion is lossless and uphelds all invariants + Ok(Self( + offsets + .as_slice() + .iter() + .map(|x| *x as i32) + .collect::>(), + )) + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/point.rs b/src/geoarrow/geoarrow-array/src/builder/point.rs new file mode 100644 index 0000000000..68ab3c58dd --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/point.rs @@ -0,0 +1,288 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{CoordTrait, GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; +use geoarrow_schema::{ + Dimension, PointType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, PointArray}, + builder::CoordBufferBuilder, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of Points. +/// +/// Converting an [`PointBuilder`] into a [`PointArray`] is `O(1)`. +#[derive(Debug)] +pub struct PointBuilder { + data_type: PointType, + pub(crate) coords: CoordBufferBuilder, + pub(crate) validity: NullBufferBuilder, +} + +impl PointBuilder { + /// Creates a new empty [`PointBuilder`]. + pub fn new(typ: PointType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`PointBuilder`] with a capacity. + pub fn with_capacity(typ: PointType, capacity: usize) -> Self { + let coords = CoordBufferBuilder::with_capacity(capacity, typ.coord_type(), typ.dimension()); + Self { + coords, + validity: NullBufferBuilder::new(capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more points to be inserted + /// in the given `Vec`. The collection may reserve more space to + /// speculatively avoid frequent reallocations. After calling `reserve`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: usize) { + self.coords.reserve(additional); + } + + /// Reserves the minimum capacity for at least `additional` more points. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: usize) { + self.coords.reserve_exact(additional); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Consume the builder and convert to an immutable [`PointArray`] + pub fn finish(mut self) -> PointArray { + let validity = self.validity.finish(); + PointArray::new( + self.coords.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Add a new coord to the end of this array, interpreting the coord as a non-empty point. + /// + /// ## Panics + /// + /// - If the added coordinate does not have the same dimension as the point array. + #[inline] + pub fn push_coord(&mut self, value: Option<&impl CoordTrait>) { + self.try_push_coord(value).unwrap() + } + + /// Add a new point to the end of this array. + /// + /// ## Panics + /// + /// - If the added point does not have the same dimension as the point array. + #[inline] + pub fn push_point(&mut self, value: Option<&impl PointTrait>) { + self.try_push_point(value).unwrap() + } + + /// Add a new coord to the end of this array, where the coord is a non-empty point + /// + /// ## Errors + /// + /// - If the added coordinate does not have the same dimension as the point array. + #[inline] + pub fn try_push_coord( + &mut self, + value: Option<&impl CoordTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + self.coords.try_push_coord(value)?; + self.validity.append(true); + } else { + self.push_null() + }; + Ok(()) + } + + /// Add a new point to the end of this array. + /// + /// ## Errors + /// + /// - If the added point does not have the same dimension as the point array. + #[inline] + pub fn try_push_point( + &mut self, + value: Option<&impl PointTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + self.coords.try_push_point(value)?; + self.validity.append(true); + } else { + self.push_null() + }; + Ok(()) + } + + /// Add a valid but empty point to the end of this array. + #[inline] + pub fn push_empty(&mut self) { + self.coords.push_constant(f64::NAN); + self.validity.append_non_null(); + } + + /// Add a new null value to the end of this array. + #[inline] + pub fn push_null(&mut self) { + self.coords.push_constant(f64::NAN); + self.validity.append_null(); + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Point or a MultiPoint with length 1. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::Point(p) => self.push_point(Some(p)), + GeometryType::MultiPoint(mp) => { + let num_points = mp.num_points(); + if num_points == 0 { + self.push_empty(); + } else if num_points == 1 { + self.push_point(Some(&mp.point(0).unwrap())) + } else { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiPoint with only one point in PointBuilder, got {num_points} points", + ))); + } + } + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected point, got {}", + gt.name() + ))); + } + } + } else { + self.push_null() + }; + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .for_each(|maybe_polygon| self.push_point(maybe_polygon)); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_points<'a>( + geoms: impl ExactSizeIterator + 'a)>, + typ: PointType, + ) -> Self { + let mut mutable_array = Self::with_capacity(typ, geoms.len()); + geoms + .into_iter() + .for_each(|maybe_point| mutable_array.push_point(Some(maybe_point))); + mutable_array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_points<'a>( + geoms: impl ExactSizeIterator + 'a)>>, + typ: PointType, + ) -> Self { + let mut mutable_array = Self::with_capacity(typ, geoms.len()); + geoms + .into_iter() + .for_each(|maybe_point| mutable_array.push_point(maybe_point)); + mutable_array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: PointType, + ) -> GeoArrowResult { + let capacity = geoms.len(); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, PointType)> for PointBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, PointType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for PointBuilder { + fn len(&self) -> usize { + self.coords.len() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for PointBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 1; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/polygon.rs b/src/geoarrow/geoarrow-array/src/builder/polygon.rs new file mode 100644 index 0000000000..dc4a0d7474 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/polygon.rs @@ -0,0 +1,381 @@ +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::NullBufferBuilder; +use geo_traits::{ + CoordTrait, GeometryTrait, GeometryType, LineStringTrait, MultiPolygonTrait, PolygonTrait, + RectTrait, +}; +use geoarrow_schema::{ + Dimension, PolygonType, + error::{GeoArrowError, GeoArrowResult}, + type_id::GeometryTypeId, +}; + +use crate::{ + GeoArrowArray, + array::{GenericWkbArray, PolygonArray}, + builder::{ + CoordBufferBuilder, OffsetsBuilder, + geo_trait_wrappers::{RectWrapper, TriangleWrapper}, + }, + capacity::PolygonCapacity, + trait_::{GeoArrowArrayAccessor, GeoArrowArrayBuilder}, + util::GeometryTypeName, +}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of Polygons. +/// +/// Converting an [`PolygonBuilder`] into a [`PolygonArray`] is `O(1)`. +#[derive(Debug)] +pub struct PolygonBuilder { + data_type: PolygonType, + + pub(crate) coords: CoordBufferBuilder, + + /// OffsetsBuilder into the ring array where each geometry starts + pub(crate) geom_offsets: OffsetsBuilder, + + /// OffsetsBuilder into the coordinate array where each ring starts + pub(crate) ring_offsets: OffsetsBuilder, + + /// Validity is only defined at the geometry level + pub(crate) validity: NullBufferBuilder, +} + +impl PolygonBuilder { + /// Creates a new empty [`PolygonBuilder`]. + pub fn new(typ: PolygonType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`PolygonBuilder`] with given capacity and no validity. + pub fn with_capacity(typ: PolygonType, capacity: PolygonCapacity) -> Self { + let coords = CoordBufferBuilder::with_capacity( + capacity.coord_capacity, + typ.coord_type(), + typ.dimension(), + ); + Self { + coords, + geom_offsets: OffsetsBuilder::with_capacity(capacity.geom_capacity), + ring_offsets: OffsetsBuilder::with_capacity(capacity.ring_capacity), + validity: NullBufferBuilder::new(capacity.geom_capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more Polygons. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, capacity: PolygonCapacity) { + self.coords.reserve(capacity.coord_capacity); + self.ring_offsets.reserve(capacity.ring_capacity); + self.geom_offsets.reserve(capacity.geom_capacity); + } + + /// Reserves the minimum capacity for at least `additional` more Polygons. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, capacity: PolygonCapacity) { + self.coords.reserve_exact(capacity.coord_capacity); + self.ring_offsets.reserve_exact(capacity.ring_capacity); + self.geom_offsets.reserve_exact(capacity.geom_capacity); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.coords.shrink_to_fit(); + self.ring_offsets.shrink_to_fit(); + self.geom_offsets.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// Push a raw offset to the underlying geometry offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_geom_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.geom_offsets.try_push_usize(offsets_length)?; + self.validity.append(true); + Ok(()) + } + + /// Push a raw offset to the underlying ring offsets buffer. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw offsets + /// upholds the necessary invariants of the array. + #[inline] + #[allow(dead_code)] + pub(crate) fn try_push_ring_offset(&mut self, offsets_length: usize) -> GeoArrowResult<()> { + self.ring_offsets.try_push_usize(offsets_length)?; + Ok(()) + } + + /// Consume the builder and convert to an immutable [`PolygonArray`] + pub fn finish(mut self) -> PolygonArray { + let validity = self.validity.finish(); + + PolygonArray::new( + self.coords.finish(), + self.geom_offsets.finish(), + self.ring_offsets.finish(), + validity, + self.data_type.metadata().clone(), + ) + } + + /// Add a new Polygon to the end of this array. + /// + /// # Errors + /// + /// This function errors iff the new last item is larger than what O supports. + #[inline] + pub fn push_polygon( + &mut self, + value: Option<&impl PolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(polygon) = value { + let exterior_ring = polygon.exterior(); + if exterior_ring.is_none() { + self.push_empty(); + return Ok(()); + } + + // - Get exterior ring + // - Add exterior ring's # of coords self.ring_offsets + // - Push ring's coords to self.coords + let ext_ring = polygon.exterior().unwrap(); + self.ring_offsets.try_push_usize(ext_ring.num_coords())?; + for coord in ext_ring.coords() { + self.coords.push_coord(&coord); + } + + // Total number of rings in this polygon + let num_interiors = polygon.num_interiors(); + self.geom_offsets.try_push_usize(num_interiors + 1)?; + + // For each interior ring: + // - Get ring + // - Add ring's # of coords to self.ring_offsets + // - Push ring's coords to self.coords + for int_ring in polygon.interiors() { + self.ring_offsets.try_push_usize(int_ring.num_coords())?; + for coord in int_ring.coords() { + self.coords.push_coord(&coord); + } + } + + self.validity.append(true); + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new Rect to this builder + #[inline] + pub fn push_rect(&mut self, value: Option<&impl RectTrait>) -> GeoArrowResult<()> { + if let Some(rect) = value { + let rect_wrapper = RectWrapper::try_new(rect)?; + self.push_polygon(Some(&rect_wrapper))?; + } else { + self.push_null(); + } + Ok(()) + } + + /// Add a new geometry to this builder + /// + /// This will error if the geometry type is not Polygon, a MultiPolygon of length 1, or Rect. + #[inline] + pub fn push_geometry( + &mut self, + value: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(value) = value { + match value.as_type() { + GeometryType::Polygon(g) => self.push_polygon(Some(g))?, + GeometryType::MultiPolygon(mp) => { + let num_polygons = mp.num_polygons(); + if num_polygons == 0 { + self.push_empty(); + } else if num_polygons == 1 { + self.push_polygon(Some(&mp.polygon(0).unwrap()))? + } else { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiPolygon with only one polygon in PolygonBuilder, got {num_polygons} polygons", + ))); + } + } + GeometryType::Rect(g) => self.push_rect(Some(g))?, + GeometryType::Triangle(tri) => self.push_polygon(Some(&TriangleWrapper(tri)))?, + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected Polygon compatible geometry, got {}", + gt.name() + ))); + } + } + } else { + self.push_null(); + }; + Ok(()) + } + + /// Extend this builder with the given geometries + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_polygon| self.push_polygon(maybe_polygon)) + .unwrap(); + } + + /// Extend this builder with the given geometries + pub fn extend_from_geometry_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms.into_iter().try_for_each(|g| self.push_geometry(g))?; + Ok(()) + } + + /// Push a raw coordinate to the underlying coordinate array. + /// + /// # Invariants + /// + /// Care must be taken to ensure that pushing raw coordinates to the array upholds the + /// necessary invariants of the array. + #[inline] + pub(crate) fn push_coord(&mut self, coord: &impl CoordTrait) -> GeoArrowResult<()> { + self.coords.push_coord(coord); + Ok(()) + } + + #[inline] + pub(crate) fn push_empty(&mut self) { + self.geom_offsets.extend_constant(1); + self.validity.append(true); + } + + #[inline] + pub(crate) fn push_null(&mut self) { + // NOTE! Only the geom_offsets array needs to get extended, because the next geometry will + // point to the same ring array location + self.geom_offsets.extend_constant(1); + self.validity.append(false); + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_polygons(geoms: &[impl PolygonTrait], typ: PolygonType) -> Self { + let capacity = PolygonCapacity::from_polygons(geoms.iter().map(Some)); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_polygons( + geoms: &[Option>], + typ: PolygonType, + ) -> Self { + let capacity = PolygonCapacity::from_polygons(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } + + /// Construct a new builder, pre-filling it with the provided geometries + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: PolygonType, + ) -> GeoArrowResult { + let capacity = PolygonCapacity::from_geometries(geoms.iter().map(|x| x.as_ref()))?; + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_geometry_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } +} + +impl TryFrom<(GenericWkbArray, PolygonType)> for PolygonBuilder { + type Error = GeoArrowError; + + fn try_from((value, typ): (GenericWkbArray, PolygonType)) -> GeoArrowResult { + let wkb_objects = value + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + Self::from_nullable_geometries(&wkb_objects, typ) + } +} + +impl GeoArrowArrayBuilder for PolygonBuilder { + fn len(&self) -> usize { + self.geom_offsets.len_proxy() + } + + fn push_null(&mut self) { + self.push_null(); + } + + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + self.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.finish()) + } +} + +impl GeometryTypeId for PolygonBuilder { + const GEOMETRY_TYPE_OFFSET: i8 = 3; + + fn dimension(&self) -> Dimension { + self.data_type.dimension() + } +} + +// #[cfg(test)] +// mod test { +// use geo::BoundingRect; +// use geo_traits::to_geo::ToGeoPolygon; +// use geo_types::{Rect, coord}; +// use geoarrow_schema::{Dimension, PolygonType}; + +// use crate::{GeoArrowArrayAccessor, builder::PolygonBuilder}; + +// #[test] +// fn test_push_rect() { +// let mut builder = PolygonBuilder::new(PolygonType::new(Dimension::XY, Default::default())); + +// let rect = Rect::new(coord! { x: 10., y: 20. }, coord! { x: 30., y: 10. }); +// builder.push_rect(Some(&rect)).unwrap(); +// let array = builder.finish(); + +// let polygon = array.value(0).unwrap().to_polygon(); +// let bounding_rect = polygon.bounding_rect().unwrap(); +// assert_eq!(rect, bounding_rect); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/builder/rect.rs b/src/geoarrow/geoarrow-array/src/builder/rect.rs new file mode 100644 index 0000000000..bc61797d0a --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/rect.rs @@ -0,0 +1,165 @@ +use arrow_buffer::NullBufferBuilder; +use geo_traits::{CoordTrait, RectTrait}; +use geoarrow_schema::{ + BoxType, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{array::RectArray, builder::SeparatedCoordBufferBuilder, scalar::Rect}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of Rects. +/// +/// Converting an [`RectBuilder`] into a [`RectArray`] is `O(1)`. +#[derive(Debug)] +pub struct RectBuilder { + pub(crate) data_type: BoxType, + pub(crate) lower: SeparatedCoordBufferBuilder, + pub(crate) upper: SeparatedCoordBufferBuilder, + pub(crate) validity: NullBufferBuilder, +} + +impl RectBuilder { + /// Creates a new empty [`RectBuilder`]. + pub fn new(typ: BoxType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Creates a new [`RectBuilder`] with a capacity. + pub fn with_capacity(typ: BoxType, capacity: usize) -> Self { + Self { + lower: SeparatedCoordBufferBuilder::with_capacity(capacity, typ.dimension()), + upper: SeparatedCoordBufferBuilder::with_capacity(capacity, typ.dimension()), + validity: NullBufferBuilder::new(capacity), + data_type: typ, + } + } + + /// Reserves capacity for at least `additional` more Rects. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + pub fn reserve(&mut self, additional: usize) { + self.lower.reserve(additional); + self.upper.reserve(additional); + } + + /// Reserves the minimum capacity for at least `additional` more Rects. + /// + /// Unlike [`reserve`], this will not deliberately over-allocate to speculatively avoid + /// frequent allocations. After calling `reserve_exact`, capacity will be greater than or equal + /// to `self.len() + additional`. Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Self::reserve + pub fn reserve_exact(&mut self, additional: usize) { + self.lower.reserve_exact(additional); + self.upper.reserve_exact(additional); + } + + /// Shrinks the capacity of self to fit. + pub fn shrink_to_fit(&mut self) { + self.lower.shrink_to_fit(); + self.upper.shrink_to_fit(); + // self.validity.shrink_to_fit(); + } + + /// The canonical method to create a [`RectBuilder`] out of its internal components. + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Errors + /// + /// This function errors iff: + /// + /// - The validity is not `None` and its length is different from the number of geometries + pub fn try_new( + lower: SeparatedCoordBufferBuilder, + upper: SeparatedCoordBufferBuilder, + validity: NullBufferBuilder, + data_type: BoxType, + ) -> GeoArrowResult { + if lower.len() != upper.len() { + return Err(GeoArrowError::InvalidGeoArrow( + "Lower and upper lengths must match".to_string(), + )); + } + Ok(Self { + lower, + upper, + validity, + data_type, + }) + } + + /// Consume the builder and convert to an immutable [`RectArray`] + pub fn finish(mut self) -> RectArray { + RectArray::new( + self.lower.finish(), + self.upper.finish(), + self.validity.finish(), + self.data_type.metadata().clone(), + ) + } + + /// Add a new Rect to the end of this builder. + #[inline] + pub fn push_rect(&mut self, value: Option<&impl RectTrait>) { + if let Some(value) = value { + let min_coord = value.min(); + let max_coord = value.max(); + + self.lower.push_coord(&min_coord); + self.upper.push_coord(&max_coord); + self.validity.append_non_null() + } else { + // Since it's a struct, we still need to push coords when null + self.lower.push_constant(f64::NAN); + self.upper.push_constant(f64::NAN); + self.validity.append_null(); + } + } + + /// Add a new null value to the end of this builder. + #[inline] + pub fn push_null(&mut self) { + self.push_rect(None::<&Rect>); + } + + /// Push min and max coordinates of a rect to the builder. + #[inline] + pub fn push_min_max(&mut self, min: &impl CoordTrait, max: &impl CoordTrait) { + self.lower.push_coord(min); + self.upper.push_coord(max); + self.validity.append_non_null() + } + + /// Create this builder from a iterator of Rects. + pub fn from_rects<'a>( + geoms: impl ExactSizeIterator + 'a)>, + typ: BoxType, + ) -> Self { + let mut mutable_array = Self::with_capacity(typ, geoms.len()); + geoms + .into_iter() + .for_each(|rect| mutable_array.push_rect(Some(rect))); + mutable_array + } + + /// Create this builder from a iterator of nullable Rects. + pub fn from_nullable_rects<'a>( + geoms: impl ExactSizeIterator + 'a)>>, + typ: BoxType, + ) -> Self { + let mut mutable_array = Self::with_capacity(typ, geoms.len()); + geoms + .into_iter() + .for_each(|maybe_rect| mutable_array.push_rect(maybe_rect)); + mutable_array + } +} diff --git a/src/geoarrow/geoarrow-array/src/builder/wkb.rs b/src/geoarrow/geoarrow-array/src/builder/wkb.rs new file mode 100644 index 0000000000..a6b8e9b1d6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/builder/wkb.rs @@ -0,0 +1,338 @@ +use arrow_array::{OffsetSizeTrait, builder::GenericBinaryBuilder}; +use geo_traits::GeometryTrait; +use geoarrow_schema::{ + WkbType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkb::{ + Endianness, + reader::Wkb, + writer::{WriteOptions, write_geometry}, +}; + +use crate::{array::GenericWkbArray, capacity::WkbCapacity}; + +/// The GeoArrow equivalent to `Vec>`: a mutable collection of Wkb buffers. +/// +/// Converting a [`WkbBuilder`] into a [`GenericWkbArray`] is `O(1)`. +#[derive(Debug)] +pub struct WkbBuilder(GenericBinaryBuilder, WkbType); + +impl WkbBuilder { + /// Creates a new empty [`WkbBuilder`]. + pub fn new(typ: WkbType) -> Self { + Self::with_capacity(typ, Default::default()) + } + + /// Initializes a new [`WkbBuilder`] with a pre-allocated capacity of slots and values. + pub fn with_capacity(typ: WkbType, capacity: WkbCapacity) -> Self { + Self( + GenericBinaryBuilder::with_capacity( + capacity.offsets_capacity, + capacity.buffer_capacity, + ), + typ, + ) + } + + // Upstream APIs don't exist for this yet. To implement this without upstream changes, we could + // change to using manual `Vec`'s ourselves + // pub fn reserve(&mut self, capacity: WkbCapacity) { + // } + + /// Push a Geometry onto the end of this builder + #[inline] + pub fn push_geometry( + &mut self, + geom: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + if let Some(geom) = geom { + let wkb_options = WriteOptions { + endianness: Endianness::LittleEndian, + }; + write_geometry(&mut self.0, geom, &wkb_options) + .map_err(|err| GeoArrowError::Wkb(err.to_string()))?; + self.0.append_value("") + } else { + self.0.append_null() + }; + Ok(()) + } + + /// Extend this builder from an iterator of Geometries. + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult<()> { + geoms + .into_iter() + .try_for_each(|maybe_geom| self.push_geometry(maybe_geom))?; + Ok(()) + } + + /// Create this builder from a slice of nullable Geometries. + pub fn from_nullable_geometries( + geoms: &[Option>], + typ: WkbType, + ) -> GeoArrowResult { + let capacity = WkbCapacity::from_geometries(geoms.iter().map(|x| x.as_ref())); + let mut array = Self::with_capacity(typ, capacity); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref()))?; + Ok(array) + } + + /// Push raw WKB bytes onto the end of this builder. + /// + /// This method validates that the input bytes represent valid WKB before appending. + /// If the bytes are `None`, a null value is appended. + /// + /// # Errors + /// + /// Returns an error if the input bytes are not valid WKB format. + /// + /// # Example + /// + /// ``` + /// use geoarrow_array::builder::WkbBuilder; + /// use geoarrow_array::GeoArrowArray; + /// use geoarrow_schema::WkbType; + /// + /// let mut builder = WkbBuilder::::new(WkbType::default()); + /// + /// // Valid WKB for a Point(1.0, 2.0) in little-endian + /// let wkb_bytes = vec![ + /// 0x01, // Little-endian + /// 0x01, 0x00, 0x00, 0x00, // Point type + /// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + /// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + /// ]; + /// + /// builder.push_wkb(Some(&wkb_bytes)).unwrap(); + /// builder.push_wkb(None).unwrap(); // Append null + /// + /// let array = builder.finish(); + /// assert_eq!(array.len(), 2); + /// ``` + #[inline] + pub fn push_wkb(&mut self, wkb: Option<&[u8]>) -> GeoArrowResult<()> { + if let Some(bytes) = wkb { + // Validate that the bytes are valid WKB + Wkb::try_new(bytes).map_err(|err| GeoArrowError::Wkb(err.to_string()))?; + self.0.append_value(bytes); + } else { + self.0.append_null(); + } + Ok(()) + } + + /// Push raw WKB bytes onto the end of this builder without validation. + /// + /// This method directly appends the input bytes to the underlying buffer without + /// validating that they represent valid WKB. If the bytes are `None`, a null value + /// is appended. + /// + /// # Safety + /// + /// This function is unsafe because it does not validate that the input bytes are + /// valid WKB format. Calling this with invalid WKB data may result in undefined + /// behavior when the resulting array is used with operations that assume valid WKB. + /// + /// The caller must ensure that: + /// - The bytes represent valid WKB according to the OGC WKB specification + /// - The byte order (endianness) is correctly specified in the WKB header + /// - The geometry type and coordinates are properly encoded + /// + /// # Example + /// + /// ``` + /// use geoarrow_array::builder::WkbBuilder; + /// use geoarrow_array::GeoArrowArray; + /// use geoarrow_schema::WkbType; + /// + /// let mut builder = WkbBuilder::::new(WkbType::default()); + /// + /// // Valid WKB for a Point(1.0, 2.0) in little-endian + /// let wkb_bytes = vec![ + /// 0x01, // Little-endian + /// 0x01, 0x00, 0x00, 0x00, // Point type + /// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0 + /// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0 + /// ]; + /// + /// unsafe { + /// builder.push_wkb_unchecked(Some(&wkb_bytes)); + /// builder.push_wkb_unchecked(None); // Append null + /// } + /// + /// let array = builder.finish(); + /// assert_eq!(array.len(), 2); + /// ``` + #[inline] + pub unsafe fn push_wkb_unchecked(&mut self, wkb: Option<&[u8]>) { + if let Some(bytes) = wkb { + self.0.append_value(bytes); + } else { + self.0.append_null(); + } + } + + /// Consume this builder and convert to a [GenericWkbArray]. + /// + /// This is `O(1)`. + pub fn finish(mut self) -> GenericWkbArray { + GenericWkbArray::new(self.0.finish(), self.1.metadata().clone()) + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; +// use crate::trait_::GeoArrowArray; + +// /// Valid WKB for Point(1.0, 2.0) in little-endian format +// fn point_wkb() -> Vec { +// let point = geo::Point::new(1.0, 2.0); +// let mut buf = Vec::new(); +// wkb::writer::write_point(&mut buf, &point, &Default::default()).unwrap(); +// buf +// } + +// /// Valid WKB for Point(3.0, 4.0) in little-endian format +// fn point_wkb_2() -> Vec { +// let point = geo::Point::new(3.0, 4.0); +// let mut buf = Vec::new(); +// wkb::writer::write_point(&mut buf, &point, &Default::default()).unwrap(); +// buf +// } + +// /// Invalid WKB (too short) +// fn invalid_wkb() -> Vec { +// vec![0x01, 0x01] +// } + +// #[test] +// fn test_push_raw_valid() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb = point_wkb(); + +// // Should succeed with valid WKB +// builder.push_wkb(Some(&wkb)).unwrap(); + +// let array = builder.finish(); +// assert_eq!(array.len(), 1); +// assert!(!array.is_null(0)); +// } + +// #[test] +// fn test_push_raw_multiple() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb1 = point_wkb(); +// let wkb2 = point_wkb_2(); + +// builder.push_wkb(Some(&wkb1)).unwrap(); +// builder.push_wkb(Some(&wkb2)).unwrap(); + +// let array = builder.finish(); +// assert_eq!(array.len(), 2); +// assert!(!array.is_null(0)); +// assert!(!array.is_null(1)); +// } + +// #[test] +// fn test_push_raw_null() { +// let mut builder = WkbBuilder::::new(WkbType::default()); + +// // Push null value +// builder.push_wkb(None).unwrap(); + +// let array = builder.finish(); +// assert_eq!(array.len(), 1); +// assert!(array.is_null(0)); +// } + +// #[test] +// fn test_push_raw_mixed_with_nulls() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb = point_wkb(); + +// builder.push_wkb(Some(&wkb)).unwrap(); +// builder.push_wkb(None).unwrap(); +// builder.push_wkb(Some(&wkb)).unwrap(); + +// let array = builder.finish(); +// assert_eq!(array.len(), 3); +// assert!(!array.is_null(0)); +// assert!(array.is_null(1)); +// assert!(!array.is_null(2)); +// } + +// #[test] +// fn test_push_raw_invalid() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let invalid = invalid_wkb(); + +// // Should fail with invalid WKB +// let result = builder.push_wkb(Some(&invalid)); +// assert!(result.is_err()); +// } + +// #[test] +// fn test_push_raw_unchecked_valid() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb = point_wkb(); + +// unsafe { +// builder.push_wkb_unchecked(Some(&wkb)); +// } + +// let array = builder.finish(); +// assert_eq!(array.len(), 1); +// assert!(!array.is_null(0)); +// } + +// #[test] +// fn test_push_raw_unchecked_null() { +// let mut builder = WkbBuilder::::new(WkbType::default()); + +// unsafe { +// builder.push_wkb_unchecked(None); +// } + +// let array = builder.finish(); +// assert_eq!(array.len(), 1); +// assert!(array.is_null(0)); +// } + +// #[test] +// fn test_push_raw_unchecked_multiple() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb1 = point_wkb(); +// let wkb2 = point_wkb_2(); + +// unsafe { +// builder.push_wkb_unchecked(Some(&wkb1)); +// builder.push_wkb_unchecked(None); +// builder.push_wkb_unchecked(Some(&wkb2)); +// } + +// let array = builder.finish(); +// assert_eq!(array.len(), 3); +// assert!(!array.is_null(0)); +// assert!(array.is_null(1)); +// assert!(!array.is_null(2)); +// } + +// #[test] +// fn test_push_raw_with_i64_offset() { +// let mut builder = WkbBuilder::::new(WkbType::default()); +// let wkb = point_wkb(); + +// builder.push_wkb(Some(&wkb)).unwrap(); +// builder.push_wkb(None).unwrap(); + +// let array = builder.finish(); +// assert_eq!(array.len(), 2); +// assert!(!array.is_null(0)); +// assert!(array.is_null(1)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/capacity/geometry.rs b/src/geoarrow/geoarrow-array/src/capacity/geometry.rs new file mode 100644 index 0000000000..5f6aece5bc --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/geometry.rs @@ -0,0 +1,373 @@ +use std::ops::AddAssign; + +use geo_traits::*; +use geoarrow_schema::{Dimension, error::GeoArrowResult}; +use wkt::WktNum; + +use crate::{ + array::DimensionIndex, + builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + capacity::{ + GeometryCollectionCapacity, LineStringCapacity, MultiLineStringCapacity, + MultiPointCapacity, MultiPolygonCapacity, PolygonCapacity, + }, +}; + +/// A counter for the buffer sizes of a [`GeometryArray`][crate::array::GeometryArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Default, Debug, Clone, Copy)] +pub struct GeometryCapacity { + /// The number of null geometries. Ideally the builder will assign these to any array that has + /// already been allocated. Otherwise we don't know where to assign them. + nulls: usize, + + /// Simple: just the total number of points, nulls included + pub(crate) points: [usize; 4], + /// An array of [LineStringCapacity], ordered XY, XYZ, XYM, XYZM + pub(crate) line_strings: [LineStringCapacity; 4], + pub(crate) polygons: [PolygonCapacity; 4], + pub(crate) mpoints: [MultiPointCapacity; 4], + pub(crate) mline_strings: [MultiLineStringCapacity; 4], + pub(crate) mpolygons: [MultiPolygonCapacity; 4], + pub(crate) gcs: [GeometryCollectionCapacity; 4], + + /// Whether to prefer multi or single arrays for new geometries. + prefer_multi: bool, +} + +impl GeometryCapacity { + /// Create a new capacity with known sizes. + /// + /// Note that the ordering within each array must be XY, XYZ, XYM, XYZM. + #[allow(clippy::too_many_arguments)] + pub fn new( + nulls: usize, + points: [usize; 4], + line_strings: [LineStringCapacity; 4], + polygons: [PolygonCapacity; 4], + mpoints: [MultiPointCapacity; 4], + mline_strings: [MultiLineStringCapacity; 4], + mpolygons: [MultiPolygonCapacity; 4], + gcs: [GeometryCollectionCapacity; 4], + ) -> Self { + Self { + nulls, + points, + line_strings, + polygons, + mpoints, + mline_strings, + mpolygons, + gcs, + prefer_multi: false, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Default::default() + } + + /// Set whether this capacity counter should prefer allocating "single-type" geometries like + /// Point/LineString/Polygon in the arrays of their "Multi" counterparts. + pub fn with_prefer_multi(mut self, prefer_multi: bool) -> Self { + self.prefer_multi = prefer_multi; + self + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + if self.points.iter().any(|c| *c > 0) { + return false; + } + + if self.line_strings.iter().any(|c| !c.is_empty()) { + return false; + } + + if self.polygons.iter().any(|c| !c.is_empty()) { + return false; + } + + if self.mpoints.iter().any(|c| !c.is_empty()) { + return false; + } + + if self.mline_strings.iter().any(|c| !c.is_empty()) { + return false; + } + + if self.mpolygons.iter().any(|c| !c.is_empty()) { + return false; + } + + if self.gcs.iter().any(|c| !c.is_empty()) { + return false; + } + + true + } + + /// The total number of geometries across all geometry types. + pub fn total_num_geoms(&self) -> usize { + let mut total = 0; + + self.points.iter().for_each(|c| { + total += c; + }); + self.line_strings.iter().for_each(|c| { + total += c.geom_capacity(); + }); + self.polygons.iter().for_each(|c| { + total += c.geom_capacity(); + }); + self.mpoints.iter().for_each(|c| { + total += c.geom_capacity(); + }); + self.mline_strings.iter().for_each(|c| { + total += c.geom_capacity(); + }); + self.mpolygons.iter().for_each(|c| { + total += c.geom_capacity(); + }); + self.gcs.iter().for_each(|c| { + total += c.geom_capacity(); + }); + + total + } + + /// Access point capacity + pub fn point(&self, dim: Dimension) -> usize { + self.points[dim.order()] + } + + /// Access LineString capacity + pub fn line_string(&self, dim: Dimension) -> LineStringCapacity { + self.line_strings[dim.order()] + } + + /// Access Polygon capacity + pub fn polygon(&self, dim: Dimension) -> PolygonCapacity { + self.polygons[dim.order()] + } + + /// Access MultiPoint capacity + pub fn multi_point(&self, dim: Dimension) -> MultiPointCapacity { + self.mpoints[dim.order()] + } + + /// Access point capacity + pub fn multi_line_string(&self, dim: Dimension) -> MultiLineStringCapacity { + self.mline_strings[dim.order()] + } + + /// Access point capacity + pub fn multi_polygon(&self, dim: Dimension) -> MultiPolygonCapacity { + self.mpolygons[dim.order()] + } + + /// Access GeometryCollection capacity + pub fn geometry_collection(&self, dim: Dimension) -> GeometryCollectionCapacity { + self.gcs[dim.order()] + } + + /// Add the capacity of the given Point + #[inline] + fn add_point(&mut self, point: Option<&impl PointTrait>) -> GeoArrowResult<()> { + if let Some(point) = point { + let dim = Dimension::try_from(point.dim())?; + if self.prefer_multi { + self.mpoints[dim.order()].add_point(Some(point)); + } else { + self.points[dim.order()] += 1; + } + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given LineString + #[inline] + fn add_line_string( + &mut self, + line_string: Option<&impl LineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(line_string) = line_string { + let dim = Dimension::try_from(line_string.dim())?; + if self.prefer_multi { + self.mline_strings[dim.order()].add_line_string(Some(line_string)); + } else { + self.line_strings[dim.order()].add_line_string(Some(line_string)); + } + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given Polygon + #[inline] + fn add_polygon(&mut self, polygon: Option<&impl PolygonTrait>) -> GeoArrowResult<()> { + if let Some(polygon) = polygon { + let dim = Dimension::try_from(polygon.dim())?; + if self.prefer_multi { + self.mpolygons[dim.order()].add_polygon(Some(polygon)); + } else { + self.polygons[dim.order()].add_polygon(Some(polygon)); + } + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given MultiPoint + #[inline] + fn add_multi_point( + &mut self, + multi_point: Option<&impl MultiPointTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_point) = multi_point { + self.multi_point(multi_point.dim().try_into()?) + .add_multi_point(Some(multi_point)); + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given MultiLineString + #[inline] + fn add_multi_line_string( + &mut self, + multi_line_string: Option<&impl MultiLineStringTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_line_string) = multi_line_string { + self.multi_line_string(multi_line_string.dim().try_into()?) + .add_multi_line_string(Some(multi_line_string)); + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given MultiPolygon + #[inline] + fn add_multi_polygon( + &mut self, + multi_polygon: Option<&impl MultiPolygonTrait>, + ) -> GeoArrowResult<()> { + if let Some(multi_polygon) = multi_polygon { + self.multi_polygon(multi_polygon.dim().try_into()?) + .add_multi_polygon(Some(multi_polygon)); + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given Geometry + #[inline] + pub fn add_geometry( + &mut self, + geom: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + use geo_traits::GeometryType; + + if let Some(geom) = geom { + match geom.as_type() { + GeometryType::Point(g) => self.add_point(Some(g)), + GeometryType::LineString(g) => self.add_line_string(Some(g)), + GeometryType::Polygon(g) => self.add_polygon(Some(g)), + GeometryType::MultiPoint(p) => self.add_multi_point(Some(p)), + GeometryType::MultiLineString(p) => self.add_multi_line_string(Some(p)), + GeometryType::MultiPolygon(p) => self.add_multi_polygon(Some(p)), + GeometryType::GeometryCollection(p) => self.add_geometry_collection(Some(p)), + GeometryType::Rect(r) => self.add_polygon(Some(&RectWrapper::try_new(r)?)), + GeometryType::Triangle(tri) => self.add_polygon(Some(&TriangleWrapper(tri))), + GeometryType::Line(l) => self.add_line_string(Some(&LineWrapper(l))), + }?; + } else { + self.nulls += 1; + } + Ok(()) + } + + /// Add the capacity of the given GeometryCollection + #[inline] + fn add_geometry_collection( + &mut self, + gc: Option<&impl GeometryCollectionTrait>, + ) -> GeoArrowResult<()> { + if let Some(gc) = gc { + self.gcs[Dimension::try_from(gc.dim())?.order()].add_geometry_collection(Some(gc))?; + } else { + self.nulls += 1; + }; + Ok(()) + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a, T: WktNum>( + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for maybe_geom in geoms.into_iter() { + counter.add_geometry(maybe_geom)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self) -> usize { + let mut count = 0; + + self.points + .iter() + .enumerate() + .for_each(|(idx, c)| count += c * Dimension::from_order(idx).unwrap().size() * 8); + self.line_strings + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + self.polygons + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + self.mpoints + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + self.mline_strings + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + self.mpolygons + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + self.gcs + .iter() + .enumerate() + .for_each(|(idx, c)| count += c.num_bytes(Dimension::from_order(idx).unwrap())); + + count + } +} + +impl AddAssign for GeometryCapacity { + fn add_assign(&mut self, rhs: Self) { + self.nulls += rhs.nulls; + + self.points = core::array::from_fn(|i| self.points[i] + rhs.points[i]); + self.line_strings = core::array::from_fn(|i| self.line_strings[i] + rhs.line_strings[i]); + self.polygons = core::array::from_fn(|i| self.polygons[i] + rhs.polygons[i]); + self.mpoints = core::array::from_fn(|i| self.mpoints[i] + rhs.mpoints[i]); + self.mline_strings = core::array::from_fn(|i| self.mline_strings[i] + rhs.mline_strings[i]); + self.mpolygons = core::array::from_fn(|i| self.mpolygons[i] + rhs.mpolygons[i]); + self.gcs = core::array::from_fn(|i| self.gcs[i] + rhs.gcs[i]); + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/geometrycollection.rs b/src/geoarrow/geoarrow-array/src/capacity/geometrycollection.rs new file mode 100644 index 0000000000..88c8bb76f3 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/geometrycollection.rs @@ -0,0 +1,172 @@ +use std::ops::Add; + +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait, + MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait, +}; +use geoarrow_schema::{Dimension, error::GeoArrowResult}; +use wkt::WktNum; + +use crate::{ + builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + capacity::MixedCapacity, +}; + +/// A counter for the buffer sizes of a +/// [`GeometryCollectionArray`][crate::array::GeometryCollectionArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct GeometryCollectionCapacity { + pub(crate) mixed_capacity: MixedCapacity, + pub(crate) geom_capacity: usize, +} + +impl GeometryCollectionCapacity { + /// Create a new capacity with known sizes. + pub fn new(mixed_capacity: MixedCapacity, geom_capacity: usize) -> Self { + Self { + mixed_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(MixedCapacity::new_empty(), 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.mixed_capacity.is_empty() && self.geom_capacity == 0 + } + + /// The geometry offset buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + #[inline] + fn add_valid_point(&mut self, _geom: &impl PointTrait) { + self.mixed_capacity.add_point(); + } + + #[inline] + fn add_valid_line_string(&mut self, geom: &impl LineStringTrait) { + self.mixed_capacity.add_line_string(geom); + } + + #[inline] + fn add_valid_polygon(&mut self, geom: &impl PolygonTrait) { + self.mixed_capacity.add_polygon(geom); + } + + #[inline] + fn add_valid_multi_point(&mut self, geom: &impl MultiPointTrait) { + self.mixed_capacity.add_multi_point(geom); + } + + #[inline] + fn add_valid_multi_line_string(&mut self, geom: &impl MultiLineStringTrait) { + self.mixed_capacity.add_multi_line_string(geom); + } + + #[inline] + fn add_valid_multi_polygon(&mut self, geom: &impl MultiPolygonTrait) { + self.mixed_capacity.add_multi_polygon(geom); + } + + #[inline] + fn add_valid_geometry_collection( + &mut self, + geom: &impl GeometryCollectionTrait, + ) -> GeoArrowResult<()> { + for g in geom.geometries() { + self.mixed_capacity.add_geometry(&g)? + } + Ok(()) + } + + /// Add a Geometry to this capacity counter. + #[inline] + pub fn add_geometry( + &mut self, + geom: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()> { + use GeometryType::*; + if let Some(geom) = geom { + match geom.as_type() { + Point(p) => self.add_valid_point(p), + LineString(p) => self.add_valid_line_string(p), + Polygon(p) => self.add_valid_polygon(p), + MultiPoint(p) => self.add_valid_multi_point(p), + MultiLineString(p) => self.add_valid_multi_line_string(p), + MultiPolygon(p) => self.add_valid_multi_polygon(p), + GeometryCollection(p) => self.add_valid_geometry_collection(p)?, + Rect(r) => self.add_valid_polygon(&RectWrapper::try_new(r)?), + Triangle(tri) => self.add_valid_polygon(&TriangleWrapper(tri)), + Line(l) => self.add_valid_line_string(&LineWrapper(l)), + } + }; + Ok(()) + } + + /// Add a GeometryCollection to this capacity counter. + #[inline] + pub fn add_geometry_collection<'a, T: WktNum>( + &mut self, + geom: Option<&'a (impl GeometryCollectionTrait + 'a)>, + ) -> GeoArrowResult<()> { + if let Some(geom) = geom { + self.add_valid_geometry_collection(geom)?; + } + self.geom_capacity += 1; + Ok(()) + } + + /// Create a capacity counter from an iterator of GeometryCollections. + pub fn from_geometry_collections<'a, T: WktNum>( + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for maybe_geom in geoms.into_iter() { + counter.add_geometry_collection(maybe_geom)?; + } + Ok(counter) + } + + /// Create a capacity counter from an iterator of Geometries. + pub fn from_geometries<'a, T: WktNum>( + geoms: impl Iterator + 'a)>>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for maybe_geom in geoms.into_iter() { + counter.add_geometry(maybe_geom)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity; + (offsets_byte_width * num_offsets) + self.mixed_capacity.num_bytes(dim) + } +} + +impl Default for GeometryCollectionCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for GeometryCollectionCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let mixed_capacity = self.mixed_capacity + rhs.mixed_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + + Self::new(mixed_capacity, geom_capacity) + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/linestring.rs b/src/geoarrow/geoarrow-array/src/capacity/linestring.rs new file mode 100644 index 0000000000..ae1cf9e4b6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/linestring.rs @@ -0,0 +1,130 @@ +use std::ops::Add; + +use geo_traits::{GeometryTrait, GeometryType, LineStringTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::util::GeometryTypeName; + +/// A counter for the buffer sizes of a [`LineStringArray`][crate::array::LineStringArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct LineStringCapacity { + pub(crate) coord_capacity: usize, + pub(crate) geom_capacity: usize, +} + +impl LineStringCapacity { + /// Create a new capacity with known sizes. + pub fn new(coord_capacity: usize, geom_capacity: usize) -> Self { + Self { + coord_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.coord_capacity == 0 && self.geom_capacity == 0 + } + + /// Add a LineString to this capacity counter. + #[inline] + pub fn add_line_string(&mut self, maybe_line_string: Option<&impl LineStringTrait>) { + self.geom_capacity += 1; + if let Some(line_string) = maybe_line_string { + self.add_valid_line_string(line_string); + } + } + + #[inline] + fn add_valid_line_string(&mut self, line_string: &impl LineStringTrait) { + self.coord_capacity += line_string.num_coords(); + } + + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be LineString + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + self.geom_capacity += 1; + + if let Some(g) = value { + match g.as_type() { + GeometryType::LineString(p) => self.add_valid_line_string(p), + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected LineString, got {}", + gt.name() + ))); + } + } + }; + Ok(()) + } + + /// The coordinate buffer capacity + pub fn coord_capacity(&self) -> usize { + self.coord_capacity + } + + /// The geometry offset buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + /// Create a capacity counter from an iterator of LineStrings. + pub fn from_line_strings<'a>( + geoms: impl Iterator>, + ) -> Self { + let mut counter = Self::new_empty(); + + for maybe_line_string in geoms.into_iter() { + counter.add_line_string(maybe_line_string); + } + + counter + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a>( + geoms: impl Iterator>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for g in geoms.into_iter() { + counter.add_geometry(g)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity; + (offsets_byte_width * num_offsets) + (self.coord_capacity * dim.size() * 8) + } +} + +impl Default for LineStringCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for LineStringCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let coord_capacity = self.coord_capacity + rhs.coord_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + Self::new(coord_capacity, geom_capacity) + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/mixed.rs b/src/geoarrow/geoarrow-array/src/capacity/mixed.rs new file mode 100644 index 0000000000..3b01c47e3a --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/mixed.rs @@ -0,0 +1,172 @@ +use std::ops::Add; + +use geo_traits::*; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkt::WktNum; + +use crate::{ + builder::geo_trait_wrappers::{LineWrapper, RectWrapper, TriangleWrapper}, + capacity::{ + LineStringCapacity, MultiLineStringCapacity, MultiPointCapacity, MultiPolygonCapacity, + PolygonCapacity, + }, +}; + +/// A counter for the buffer sizes of a [`MixedGeometryArray`][crate::array::MixedGeometryArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Default, Debug, Clone, Copy)] +pub struct MixedCapacity { + /// Simple: just the total number of points, nulls included + pub(crate) point: usize, + pub(crate) line_string: LineStringCapacity, + pub(crate) polygon: PolygonCapacity, + pub(crate) multi_point: MultiPointCapacity, + pub(crate) multi_line_string: MultiLineStringCapacity, + pub(crate) multi_polygon: MultiPolygonCapacity, +} + +impl MixedCapacity { + /// Create a new capacity with known sizes. + pub(crate) fn new( + point: usize, + line_string: LineStringCapacity, + polygon: PolygonCapacity, + multi_point: MultiPointCapacity, + multi_line_string: MultiLineStringCapacity, + multi_polygon: MultiPolygonCapacity, + ) -> Self { + Self { + point, + line_string, + polygon, + multi_point, + multi_line_string, + multi_polygon, + } + } + + /// Create a new empty capacity. + pub(crate) fn new_empty() -> Self { + Self { + point: 0, + line_string: LineStringCapacity::new_empty(), + polygon: PolygonCapacity::new_empty(), + multi_point: MultiPointCapacity::new_empty(), + multi_line_string: MultiLineStringCapacity::new_empty(), + multi_polygon: MultiPolygonCapacity::new_empty(), + } + } + + /// Return `true` if the capacity is empty. + pub(crate) fn is_empty(&self) -> bool { + self.point == 0 + && self.line_string.is_empty() + && self.polygon.is_empty() + && self.multi_point.is_empty() + && self.multi_line_string.is_empty() + && self.multi_polygon.is_empty() + } + + pub(crate) fn total_num_geoms(&self) -> usize { + let mut total = 0; + total += self.point; + total += self.line_string.geom_capacity(); + total += self.polygon.geom_capacity(); + total += self.multi_point.geom_capacity(); + total += self.multi_line_string.geom_capacity(); + total += self.multi_polygon.geom_capacity(); + total + } + + #[inline] + pub(crate) fn add_point(&mut self) { + self.point += 1; + } + + #[inline] + pub(crate) fn add_line_string(&mut self, line_string: &impl LineStringTrait) { + self.line_string.add_line_string(Some(line_string)); + } + + #[inline] + pub(crate) fn add_polygon(&mut self, polygon: &impl PolygonTrait) { + self.polygon.add_polygon(Some(polygon)); + } + + #[inline] + pub(crate) fn add_multi_point(&mut self, multi_point: &impl MultiPointTrait) { + self.multi_point.add_multi_point(Some(multi_point)); + } + + #[inline] + pub(crate) fn add_multi_line_string(&mut self, multi_line_string: &impl MultiLineStringTrait) { + self.multi_line_string + .add_multi_line_string(Some(multi_line_string)); + } + + #[inline] + pub(crate) fn add_multi_polygon(&mut self, multi_polygon: &impl MultiPolygonTrait) { + self.multi_polygon.add_multi_polygon(Some(multi_polygon)); + } + + #[inline] + pub(crate) fn add_geometry( + &mut self, + geom: &impl GeometryTrait, + ) -> GeoArrowResult<()> { + match geom.as_type() { + geo_traits::GeometryType::Point(_) => self.add_point(), + geo_traits::GeometryType::LineString(g) => self.add_line_string(g), + geo_traits::GeometryType::Polygon(g) => self.add_polygon(g), + geo_traits::GeometryType::MultiPoint(p) => self.add_multi_point(p), + geo_traits::GeometryType::MultiLineString(p) => self.add_multi_line_string(p), + geo_traits::GeometryType::MultiPolygon(p) => self.add_multi_polygon(p), + geo_traits::GeometryType::GeometryCollection(_) => { + return Err(GeoArrowError::InvalidGeoArrow( + "nested geometry collections not supported in GeoArrow".to_string(), + )); + } + geo_traits::GeometryType::Rect(r) => self.add_polygon(&RectWrapper::try_new(r)?), + geo_traits::GeometryType::Triangle(tri) => self.add_polygon(&TriangleWrapper(tri)), + geo_traits::GeometryType::Line(l) => self.add_line_string(&LineWrapper(l)), + }; + Ok(()) + } + + /// The number of bytes an array with this capacity would occupy. + pub(crate) fn num_bytes(&self, dim: Dimension) -> usize { + let mut count = self.point * dim.size() * 8; + count += self.line_string.num_bytes(dim); + count += self.polygon.num_bytes(dim); + count += self.multi_point.num_bytes(dim); + count += self.multi_line_string.num_bytes(dim); + count += self.multi_polygon.num_bytes(dim); + count + } +} + +impl Add for MixedCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let point = self.point + rhs.point; + let line_string = self.line_string + rhs.line_string; + let polygon = self.polygon + rhs.polygon; + let multi_point = self.multi_point + rhs.multi_point; + let multi_line_string = self.multi_line_string + rhs.multi_line_string; + let multi_polygon = self.multi_polygon + rhs.multi_polygon; + + Self::new( + point, + line_string, + polygon, + multi_point, + multi_line_string, + multi_polygon, + ) + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/mod.rs b/src/geoarrow/geoarrow-array/src/capacity/mod.rs new file mode 100644 index 0000000000..b1ff62bd41 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/mod.rs @@ -0,0 +1,27 @@ +//! Counters for managing buffer lengths for each geometry array type. +//! +//! The most memory-efficient way to construct an array from a set of geometries is to make a +//! first pass over these geometries to count exactly how big each underlying buffer of the Arrow +//! array must be, allocate _once_ for exactly what you need, and then fill those buffers in a +//! second pass. Capacity counters help with this process. + +mod geometry; +mod geometrycollection; +mod linestring; +mod mixed; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod wkb; + +pub use geometry::GeometryCapacity; +pub use geometrycollection::GeometryCollectionCapacity; +pub use linestring::LineStringCapacity; +pub(crate) use mixed::MixedCapacity; +pub use multilinestring::MultiLineStringCapacity; +pub use multipoint::MultiPointCapacity; +pub use multipolygon::MultiPolygonCapacity; +pub use polygon::PolygonCapacity; +pub use wkb::WkbCapacity; diff --git a/src/geoarrow/geoarrow-array/src/capacity/multilinestring.rs b/src/geoarrow/geoarrow-array/src/capacity/multilinestring.rs new file mode 100644 index 0000000000..c6eafa1d33 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/multilinestring.rs @@ -0,0 +1,166 @@ +use std::ops::{Add, AddAssign}; + +use geo_traits::{GeometryTrait, GeometryType, LineStringTrait, MultiLineStringTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{capacity::LineStringCapacity, util::GeometryTypeName}; + +/// A counter for the buffer sizes of a +/// [`MultiLineStringArray`][crate::array::MultiLineStringArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct MultiLineStringCapacity { + pub(crate) coord_capacity: usize, + pub(crate) ring_capacity: usize, + pub(crate) geom_capacity: usize, +} + +impl MultiLineStringCapacity { + /// Create a new capacity with known sizes. + pub fn new(coord_capacity: usize, ring_capacity: usize, geom_capacity: usize) -> Self { + Self { + coord_capacity, + ring_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.coord_capacity == 0 && self.ring_capacity == 0 && self.geom_capacity == 0 + } + + /// The coordinate buffer capacity + pub fn coord_capacity(&self) -> usize { + self.coord_capacity + } + + /// The ring offset buffer capacity + pub fn ring_capacity(&self) -> usize { + self.ring_capacity + } + + /// The geometry offset buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + /// Add the capacity of the given LineString + #[inline] + pub fn add_line_string(&mut self, maybe_line_string: Option<&impl LineStringTrait>) { + self.geom_capacity += 1; + if let Some(line_string) = maybe_line_string { + // A single line string + self.ring_capacity += 1; + self.coord_capacity += line_string.num_coords(); + } + } + + /// Add the capacity of the given MultiLineString + #[inline] + pub fn add_multi_line_string(&mut self, multi_line_string: Option<&impl MultiLineStringTrait>) { + self.geom_capacity += 1; + if let Some(multi_line_string) = multi_line_string { + // Total number of rings in this polygon + let num_line_strings = multi_line_string.num_line_strings(); + self.ring_capacity += num_line_strings; + + for line_string in multi_line_string.line_strings() { + self.coord_capacity += line_string.num_coords(); + } + } + } + + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either LineString or MultiLineString + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + if let Some(geom) = value { + match geom.as_type() { + GeometryType::LineString(g) => self.add_line_string(Some(g)), + GeometryType::MultiLineString(g) => self.add_multi_line_string(Some(g)), + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected LineString or MultiLineString, got {}", + gt.name() + ))); + } + } + } else { + self.geom_capacity += 1; + }; + Ok(()) + } + + /// Construct a new counter pre-filled with the given MultiLineStrings + pub fn from_multi_line_strings<'a>( + geoms: impl Iterator>, + ) -> Self { + let mut counter = Self::new_empty(); + for maybe_multi_line_string in geoms.into_iter() { + counter.add_multi_line_string(maybe_multi_line_string); + } + counter + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a>( + geoms: impl Iterator>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for g in geoms.into_iter() { + counter.add_geometry(g)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity + self.ring_capacity; + (offsets_byte_width * num_offsets) + (self.coord_capacity * dim.size() * 8) + } +} + +impl Default for MultiLineStringCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for MultiLineStringCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let coord_capacity = self.coord_capacity + rhs.coord_capacity; + let ring_capacity = self.ring_capacity + rhs.ring_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + Self::new(coord_capacity, ring_capacity, geom_capacity) + } +} + +impl AddAssign for MultiLineStringCapacity { + fn add_assign(&mut self, rhs: Self) { + self.coord_capacity += rhs.coord_capacity; + self.ring_capacity += rhs.ring_capacity; + self.geom_capacity += rhs.geom_capacity; + } +} + +impl AddAssign for MultiLineStringCapacity { + fn add_assign(&mut self, rhs: LineStringCapacity) { + self.coord_capacity += rhs.coord_capacity(); + self.ring_capacity += rhs.geom_capacity(); + self.geom_capacity += rhs.geom_capacity(); + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/multipoint.rs b/src/geoarrow/geoarrow-array/src/capacity/multipoint.rs new file mode 100644 index 0000000000..c13f3db52e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/multipoint.rs @@ -0,0 +1,153 @@ +use std::ops::{Add, AddAssign}; + +use geo_traits::{GeometryTrait, GeometryType, MultiPointTrait, PointTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::util::GeometryTypeName; + +/// A counter for the buffer sizes of a [`MultiPointArray`][crate::array::MultiPointArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct MultiPointCapacity { + pub(crate) coord_capacity: usize, + pub(crate) geom_capacity: usize, +} + +impl MultiPointCapacity { + /// Create a new capacity with known sizes. + pub fn new(coord_capacity: usize, geom_capacity: usize) -> Self { + Self { + coord_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.coord_capacity == 0 && self.geom_capacity == 0 + } + + /// Add the capacity of a point + #[inline] + pub fn add_point(&mut self, point: Option<&impl PointTrait>) { + self.geom_capacity += 1; + if let Some(point) = point { + self.add_valid_point(point) + } + } + + #[inline] + fn add_valid_point(&mut self, _point: &impl PointTrait) { + self.coord_capacity += 1; + } + + /// Add the capacity of the given MultiPoint + #[inline] + pub fn add_multi_point(&mut self, maybe_multi_point: Option<&impl MultiPointTrait>) { + self.geom_capacity += 1; + + if let Some(multi_point) = maybe_multi_point { + self.add_valid_multi_point(multi_point); + } + } + + #[inline] + fn add_valid_multi_point(&mut self, multi_point: &impl MultiPointTrait) { + self.coord_capacity += multi_point.num_points(); + } + + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Point or MultiPoint + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + self.geom_capacity += 1; + + if let Some(g) = value { + match g.as_type() { + GeometryType::Point(p) => self.add_valid_point(p), + GeometryType::MultiPoint(p) => self.add_valid_multi_point(p), + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected Point or MultiPoint, got {}", + gt.name() + ))); + } + } + }; + Ok(()) + } + + /// The coordinate buffer capacity + pub fn coord_capacity(&self) -> usize { + self.coord_capacity + } + + /// The geometry offsets buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + /// Construct a new counter pre-filled with the given MultiPoints + pub fn from_multi_points<'a>( + geoms: impl Iterator>, + ) -> Self { + let mut counter = Self::new_empty(); + + for maybe_line_string in geoms.into_iter() { + counter.add_multi_point(maybe_line_string); + } + + counter + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a>( + geoms: impl Iterator>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for g in geoms.into_iter() { + counter.add_geometry(g)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity; + (offsets_byte_width * num_offsets) + (self.coord_capacity * dim.size() * 8) + } +} + +impl Default for MultiPointCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for MultiPointCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let coord_capacity = self.coord_capacity + rhs.coord_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + Self::new(coord_capacity, geom_capacity) + } +} + +impl AddAssign for MultiPointCapacity { + fn add_assign(&mut self, rhs: Self) { + self.coord_capacity += rhs.coord_capacity; + self.geom_capacity += rhs.geom_capacity; + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/multipolygon.rs b/src/geoarrow/geoarrow-array/src/capacity/multipolygon.rs new file mode 100644 index 0000000000..6b87badcc0 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/multipolygon.rs @@ -0,0 +1,214 @@ +use std::ops::{Add, AddAssign}; + +use geo_traits::{GeometryTrait, GeometryType, LineStringTrait, MultiPolygonTrait, PolygonTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::{capacity::PolygonCapacity, util::GeometryTypeName}; + +/// A counter for the buffer sizes of a [`MultiPolygonArray`][crate::array::MultiPolygonArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct MultiPolygonCapacity { + pub(crate) coord_capacity: usize, + pub(crate) ring_capacity: usize, + pub(crate) polygon_capacity: usize, + pub(crate) geom_capacity: usize, +} + +impl MultiPolygonCapacity { + /// Create a new capacity with known sizes. + pub fn new( + coord_capacity: usize, + ring_capacity: usize, + polygon_capacity: usize, + geom_capacity: usize, + ) -> Self { + Self { + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0, 0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.coord_capacity == 0 + && self.ring_capacity == 0 + && self.polygon_capacity == 0 + && self.geom_capacity == 0 + } + + /// The coordinate buffer capacity + pub fn coord_capacity(&self) -> usize { + self.coord_capacity + } + + /// The ring offset buffer capacity + pub fn ring_capacity(&self) -> usize { + self.ring_capacity + } + + /// The polygon offset buffer capacity + pub fn polygon_capacity(&self) -> usize { + self.polygon_capacity + } + + /// The geometry offset buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + /// Add the capacity of the given Polygon + #[inline] + pub fn add_polygon<'a>(&mut self, polygon: Option<&'a (impl PolygonTrait + 'a)>) { + self.geom_capacity += 1; + if let Some(polygon) = polygon { + // A single polygon + self.polygon_capacity += 1; + + // Total number of rings in this polygon + let num_interiors = polygon.num_interiors(); + self.ring_capacity += num_interiors + 1; + + // Number of coords for each ring + if let Some(exterior) = polygon.exterior() { + self.coord_capacity += exterior.num_coords(); + } + + for int_ring in polygon.interiors() { + self.coord_capacity += int_ring.num_coords(); + } + } + } + + /// Add the capacity of the given MultiPolygon + #[inline] + pub fn add_multi_polygon<'a>( + &mut self, + multi_polygon: Option<&'a (impl MultiPolygonTrait + 'a)>, + ) { + self.geom_capacity += 1; + + if let Some(multi_polygon) = multi_polygon { + // Total number of polygons in this MultiPolygon + let num_polygons = multi_polygon.num_polygons(); + self.polygon_capacity += num_polygons; + + for polygon in multi_polygon.polygons() { + // Total number of rings in this MultiPolygon + self.ring_capacity += polygon.num_interiors() + 1; + + // Number of coords for each ring + if let Some(exterior) = polygon.exterior() { + self.coord_capacity += exterior.num_coords(); + } + + for int_ring in polygon.interiors() { + self.coord_capacity += int_ring.num_coords(); + } + } + } + } + + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Polygon or MultiPolygon + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + if let Some(geom) = value { + match geom.as_type() { + GeometryType::Polygon(g) => self.add_polygon(Some(g)), + GeometryType::MultiPolygon(g) => self.add_multi_polygon(Some(g)), + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected MultiPolygon, got {}", + gt.name() + ))); + } + } + } else { + self.geom_capacity += 1; + }; + Ok(()) + } + + /// Construct a new counter pre-filled with the given MultiPolygons + pub fn from_multi_polygons<'a>( + geoms: impl Iterator>, + ) -> Self { + let mut counter = Self::new_empty(); + for maybe_multi_polygon in geoms.into_iter() { + counter.add_multi_polygon(maybe_multi_polygon); + } + counter + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a>( + geoms: impl Iterator>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for g in geoms.into_iter() { + counter.add_geometry(g)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity + self.polygon_capacity + self.ring_capacity; + (offsets_byte_width * num_offsets) + (self.coord_capacity * dim.size() * 8) + } +} + +impl Default for MultiPolygonCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for MultiPolygonCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let coord_capacity = self.coord_capacity + rhs.coord_capacity; + let ring_capacity = self.ring_capacity + rhs.ring_capacity; + let polygon_capacity = self.polygon_capacity + rhs.polygon_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + Self::new( + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + ) + } +} + +impl AddAssign for MultiPolygonCapacity { + fn add_assign(&mut self, rhs: Self) { + self.coord_capacity += rhs.coord_capacity; + self.ring_capacity += rhs.ring_capacity; + self.polygon_capacity += rhs.polygon_capacity; + self.geom_capacity += rhs.geom_capacity; + } +} + +impl AddAssign for MultiPolygonCapacity { + fn add_assign(&mut self, rhs: PolygonCapacity) { + self.coord_capacity += rhs.coord_capacity(); + self.ring_capacity += rhs.ring_capacity(); + self.polygon_capacity += rhs.geom_capacity(); + self.geom_capacity += rhs.geom_capacity(); + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/point.rs b/src/geoarrow/geoarrow-array/src/capacity/point.rs new file mode 100644 index 0000000000..628e38d354 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/point.rs @@ -0,0 +1,72 @@ +#![allow(dead_code)] + +use std::ops::Add; + +use geo_traits::{GeometryTrait, GeometryType, PointTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +/// A counter for the buffer sizes of a [`PointArray`][crate::array::PointArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct PointCapacity { + pub(crate) geom_capacity: usize, +} + +impl PointCapacity { + /// Create a new capacity with known size. + pub fn new(geom_capacity: usize) -> Self { + Self { geom_capacity } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.geom_capacity == 0 + } + + /// Add the capacity of the given Point + #[inline] + pub fn add_point(&mut self, _point: Option<&impl PointTrait>) { + self.geom_capacity += 1; + } + + /// Add the capacity of the given Geometry + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + if let Some(g) = value { + match g.as_type() { + GeometryType::Point(p) => self.add_point(Some(p)), + + _ => { + return Err(GeoArrowError::IncorrectGeometryType( + "Expected point in PointCapacity".to_string(), + )); + } + } + } else { + self.geom_capacity += 1; + }; + Ok(()) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + self.geom_capacity * dim.size() * 8 + } +} + +impl Add for PointCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self::new(self.geom_capacity + rhs.geom_capacity) + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/polygon.rs b/src/geoarrow/geoarrow-array/src/capacity/polygon.rs new file mode 100644 index 0000000000..c3e3c0ae94 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/polygon.rs @@ -0,0 +1,164 @@ +use std::ops::Add; + +use geo_traits::{GeometryTrait, GeometryType, LineStringTrait, PolygonTrait, RectTrait}; +use geoarrow_schema::{ + Dimension, + error::{GeoArrowError, GeoArrowResult}, +}; + +use crate::util::GeometryTypeName; + +/// A counter for the buffer sizes of a [`PolygonArray`][crate::array::PolygonArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct PolygonCapacity { + pub(crate) coord_capacity: usize, + pub(crate) ring_capacity: usize, + pub(crate) geom_capacity: usize, +} + +impl PolygonCapacity { + /// Create a new capacity with known sizes. + pub fn new(coord_capacity: usize, ring_capacity: usize, geom_capacity: usize) -> Self { + Self { + coord_capacity, + ring_capacity, + geom_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.coord_capacity == 0 && self.ring_capacity == 0 && self.geom_capacity == 0 + } + + /// The coordinate buffer capacity + pub fn coord_capacity(&self) -> usize { + self.coord_capacity + } + + /// The ring offset buffer capacity + pub fn ring_capacity(&self) -> usize { + self.ring_capacity + } + + /// The geometry offset buffer capacity + pub fn geom_capacity(&self) -> usize { + self.geom_capacity + } + + /// Add the capacity of the given Polygon + #[inline] + pub fn add_polygon<'a>(&mut self, polygon: Option<&'a (impl PolygonTrait + 'a)>) { + self.geom_capacity += 1; + if let Some(polygon) = polygon { + // Total number of rings in this polygon + let num_interiors = polygon.num_interiors(); + self.ring_capacity += num_interiors + 1; + + // Number of coords for each ring + if let Some(exterior) = polygon.exterior() { + self.coord_capacity += exterior.num_coords(); + } + + for int_ring in polygon.interiors() { + self.coord_capacity += int_ring.num_coords(); + } + } + } + + /// Add the capacity of the given Rect + #[inline] + pub fn add_rect<'a>(&mut self, rect: Option<&'a (impl RectTrait + 'a)>) { + self.geom_capacity += 1; + if rect.is_some() { + // A rect is a simple polygon with only one ring + self.ring_capacity += 1; + // A rect is a closed polygon with 5 coordinates + self.coord_capacity += 5; + } + } + + /// Add the capacity of the given Geometry + /// + /// The type of the geometry must be either Polygon or Rect + #[inline] + pub fn add_geometry(&mut self, value: Option<&impl GeometryTrait>) -> GeoArrowResult<()> { + if let Some(geom) = value { + match geom.as_type() { + GeometryType::Polygon(g) => self.add_polygon(Some(g)), + GeometryType::Rect(g) => self.add_rect(Some(g)), + gt => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Expected polygon, got {}", + gt.name() + ))); + } + } + } else { + self.geom_capacity += 1; + }; + Ok(()) + } + + /// Construct a new counter pre-filled with the given Polygons + pub fn from_polygons<'a>( + geoms: impl Iterator>, + ) -> Self { + let mut counter = Self::new_empty(); + for maybe_polygon in geoms.into_iter() { + counter.add_polygon(maybe_polygon); + } + counter + } + + /// Construct a new counter pre-filled with the given Rects + pub fn from_rects<'a>(geoms: impl Iterator>) -> Self { + let mut counter = Self::new_empty(); + for maybe_rect in geoms.into_iter() { + counter.add_rect(maybe_rect); + } + counter + } + + /// Construct a new counter pre-filled with the given geometries + pub fn from_geometries<'a>( + geoms: impl Iterator>, + ) -> GeoArrowResult { + let mut counter = Self::new_empty(); + for g in geoms.into_iter() { + counter.add_geometry(g)?; + } + Ok(counter) + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self, dim: Dimension) -> usize { + let offsets_byte_width = 4; + let num_offsets = self.geom_capacity + self.ring_capacity; + (offsets_byte_width * num_offsets) + (self.coord_capacity * dim.size() * 8) + } +} + +impl Default for PolygonCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for PolygonCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let coord_capacity = self.coord_capacity + rhs.coord_capacity; + let ring_capacity = self.ring_capacity + rhs.ring_capacity; + let geom_capacity = self.geom_capacity + rhs.geom_capacity; + Self::new(coord_capacity, ring_capacity, geom_capacity) + } +} diff --git a/src/geoarrow/geoarrow-array/src/capacity/wkb.rs b/src/geoarrow/geoarrow-array/src/capacity/wkb.rs new file mode 100644 index 0000000000..077e95f938 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/capacity/wkb.rs @@ -0,0 +1,88 @@ +use std::ops::Add; + +use arrow_array::OffsetSizeTrait; +use geo_traits::GeometryTrait; +use wkb::writer::geometry_wkb_size; + +/// A counter for the buffer sizes of a [`GenericWkbArray`][crate::array::GenericWkbArray]. +/// +/// This can be used to reduce allocations by allocating once for exactly the array size you need. +#[derive(Debug, Clone, Copy)] +pub struct WkbCapacity { + pub(crate) buffer_capacity: usize, + pub(crate) offsets_capacity: usize, +} + +impl WkbCapacity { + /// Create a new capacity with known sizes. + pub fn new(buffer_capacity: usize, offsets_capacity: usize) -> Self { + Self { + buffer_capacity, + offsets_capacity, + } + } + + /// Create a new empty capacity. + pub fn new_empty() -> Self { + Self::new(0, 0) + } + + /// Return `true` if the capacity is empty. + pub fn is_empty(&self) -> bool { + self.buffer_capacity == 0 && self.offsets_capacity == 0 + } + + /// The capacity of the underlying data buffer + pub fn buffer_capacity(&self) -> usize { + self.buffer_capacity + } + + /// The capacity of the underlying offsets buffer + pub fn offsets_capacity(&self) -> usize { + self.offsets_capacity + } + + /// Add a Geometry to this capacity counter. + #[inline] + pub fn add_geometry<'a>(&mut self, geom: Option<&'a (impl GeometryTrait + 'a)>) { + if let Some(geom) = geom { + self.buffer_capacity += geometry_wkb_size(geom); + } + self.offsets_capacity += 1; + } + + /// Create a capacity counter from an iterator of Geometries. + pub fn from_geometries<'a>( + geoms: impl Iterator + 'a)>>, + ) -> Self { + let mut counter = Self::new_empty(); + for maybe_geom in geoms.into_iter() { + counter.add_geometry(maybe_geom); + } + counter + } + + /// The number of bytes an array with this capacity would occupy. + pub fn num_bytes(&self) -> usize { + let offsets_byte_width = if O::IS_LARGE { 8 } else { 4 }; + let num_offsets = self.offsets_capacity; + (offsets_byte_width * num_offsets) + self.buffer_capacity + } +} + +impl Default for WkbCapacity { + fn default() -> Self { + Self::new_empty() + } +} + +impl Add for WkbCapacity { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let buffer_capacity = self.buffer_capacity + rhs.buffer_capacity; + let offsets_capacity = self.offsets_capacity + rhs.offsets_capacity; + + Self::new(buffer_capacity, offsets_capacity) + } +} diff --git a/src/geoarrow/geoarrow-array/src/cast.rs b/src/geoarrow/geoarrow-array/src/cast.rs new file mode 100644 index 0000000000..31aa6b7c10 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/cast.rs @@ -0,0 +1,1295 @@ +//! Helper functions for downcasting [`dyn GeoArrowArray`][GeoArrowArray] to concrete types and for +//! converting between GeoArrow array representations. + +use std::sync::Arc; + +use arrow_array::{ + OffsetSizeTrait, + builder::{BinaryViewBuilder, GenericByteBuilder, GenericStringBuilder, StringViewBuilder}, + cast::AsArray, +}; +use geoarrow_schema::{ + GeoArrowType, WkbType, + error::{GeoArrowError, GeoArrowResult}, +}; +use wkb::{Endianness, writer::WriteOptions}; + +use crate::{ + GeoArrowArrayAccessor, IntoArrow, + array::*, + builder::{ + GeometryBuilder, GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, + MultiPointBuilder, MultiPolygonBuilder, PointBuilder, PolygonBuilder, WkbBuilder, + }, + trait_::GeoArrowArray, +}; + +/// Helpers for downcasting a [`GeoArrowArray`] to a concrete implementation. +/// +/// ``` +/// use std::sync::Arc; +/// use arrow_array::{Int32Array, RecordBatch}; +/// use arrow_schema::{Schema, Field, DataType, ArrowError}; +/// use geo_types::point; +/// +/// use geoarrow_array::array::PointArray; +/// use geoarrow_array::builder::PointBuilder; +/// use geoarrow_array::cast::AsGeoArrowArray; +/// use geoarrow_array::GeoArrowArray; +/// use geo_traits::CoordTrait; +/// use geoarrow_schema::{Dimension, PointType}; +/// +/// let point1 = point!(x: 1., y: 2.); +/// let point2 = point!(x: 3., y: 4.); +/// let point3 = point!(x: 5., y: 6.); +/// let geoms = [point1, point2, point3]; +/// +/// let geom_type = PointType::new(Dimension::XY, Default::default()); +/// let point_array = PointBuilder::from_points(geoms.iter(), geom_type).finish(); +/// +/// let generic_array: Arc = Arc::new(point_array.clone()); +/// +/// let point_array2 = generic_array.as_point(); +/// assert_eq!(&point_array, point_array2); +/// ``` +pub trait AsGeoArrowArray { + /// Downcast this to a [`PointArray`] returning `None` if not possible + fn as_point_opt(&self) -> Option<&PointArray>; + + /// Downcast this to a [`PointArray`] panicking if not possible + #[inline] + fn as_point(&self) -> &PointArray { + self.as_point_opt().unwrap() + } + + /// Downcast this to a [`LineStringArray`] with `i32` offsets returning `None` if not possible + fn as_line_string_opt(&self) -> Option<&LineStringArray>; + + /// Downcast this to a [`LineStringArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_line_string(&self) -> &LineStringArray { + self.as_line_string_opt().unwrap() + } + + /// Downcast this to a [`PolygonArray`] with `i32` offsets returning `None` if not possible + fn as_polygon_opt(&self) -> Option<&PolygonArray>; + + /// Downcast this to a [`PolygonArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_polygon(&self) -> &PolygonArray { + self.as_polygon_opt().unwrap() + } + + /// Downcast this to a [`MultiPointArray`] with `i32` offsets returning `None` if not possible + fn as_multi_point_opt(&self) -> Option<&MultiPointArray>; + + /// Downcast this to a [`MultiPointArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_multi_point(&self) -> &MultiPointArray { + self.as_multi_point_opt().unwrap() + } + + /// Downcast this to a [`MultiLineStringArray`] with `i32` offsets returning `None` if not + /// possible + fn as_multi_line_string_opt(&self) -> Option<&MultiLineStringArray>; + + /// Downcast this to a [`MultiLineStringArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_multi_line_string(&self) -> &MultiLineStringArray { + self.as_multi_line_string_opt().unwrap() + } + + /// Downcast this to a [`MultiPolygonArray`] with `i32` offsets returning `None` if not + /// possible + fn as_multi_polygon_opt(&self) -> Option<&MultiPolygonArray>; + + /// Downcast this to a [`MultiPolygonArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_multi_polygon(&self) -> &MultiPolygonArray { + self.as_multi_polygon_opt().unwrap() + } + + /// Downcast this to a [`GeometryCollectionArray`] with `i32` offsets returning `None` if not + /// possible + fn as_geometry_collection_opt(&self) -> Option<&GeometryCollectionArray>; + + /// Downcast this to a [`GeometryCollectionArray`] with `i32` offsets panicking if not possible + #[inline] + fn as_geometry_collection(&self) -> &GeometryCollectionArray { + self.as_geometry_collection_opt().unwrap() + } + + /// Downcast this to a [`RectArray`] returning `None` if not possible + fn as_rect_opt(&self) -> Option<&RectArray>; + + /// Downcast this to a [`RectArray`] panicking if not possible + #[inline] + fn as_rect(&self) -> &RectArray { + self.as_rect_opt().unwrap() + } + + /// Downcast this to a [`GeometryArray`] returning `None` if not possible + fn as_geometry_opt(&self) -> Option<&GeometryArray>; + + /// Downcast this to a [`GeometryArray`] panicking if not possible + #[inline] + fn as_geometry(&self) -> &GeometryArray { + self.as_geometry_opt().unwrap() + } + + /// Downcast this to a [`GenericWkbArray`] with `O` offsets returning `None` if not possible + fn as_wkb_opt(&self) -> Option<&GenericWkbArray>; + + /// Downcast this to a [`GenericWkbArray`] with `O` offsets panicking if not possible + #[inline] + fn as_wkb(&self) -> &GenericWkbArray { + self.as_wkb_opt::().unwrap() + } + + /// Downcast this to a [`WkbViewArray`] returning `None` if not possible + fn as_wkb_view_opt(&self) -> Option<&WkbViewArray>; + + /// Downcast this to a [`WkbViewArray`] panicking if not possible + #[inline] + fn as_wkb_view(&self) -> &WkbViewArray { + self.as_wkb_view_opt().unwrap() + } + + /// Downcast this to a [`GenericWktArray`] with `O` offsets returning `None` if not possible + fn as_wkt_opt(&self) -> Option<&GenericWktArray>; + + /// Downcast this to a [`GenericWktArray`] with `O` offsets panicking if not possible + #[inline] + fn as_wkt(&self) -> &GenericWktArray { + self.as_wkt_opt::().unwrap() + } + + /// Downcast this to a [`WktViewArray`] returning `None` if not possible + fn as_wkt_view_opt(&self) -> Option<&WktViewArray>; + + /// Downcast this to a [`WktViewArray`] panicking if not possible + #[inline] + fn as_wkt_view(&self) -> &WktViewArray { + self.as_wkt_view_opt().unwrap() + } +} + +// `dyn GeoArrowArray + '_` is the same as upstream Arrow +impl AsGeoArrowArray for dyn GeoArrowArray + '_ { + #[inline] + fn as_point_opt(&self) -> Option<&PointArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_line_string_opt(&self) -> Option<&LineStringArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_polygon_opt(&self) -> Option<&PolygonArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_point_opt(&self) -> Option<&MultiPointArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_line_string_opt(&self) -> Option<&MultiLineStringArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_polygon_opt(&self) -> Option<&MultiPolygonArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_geometry_collection_opt(&self) -> Option<&GeometryCollectionArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_rect_opt(&self) -> Option<&RectArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_geometry_opt(&self) -> Option<&GeometryArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_wkb_opt(&self) -> Option<&GenericWkbArray> { + self.as_any().downcast_ref::>() + } + + #[inline] + fn as_wkb_view_opt(&self) -> Option<&WkbViewArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_wkt_opt(&self) -> Option<&GenericWktArray> { + self.as_any().downcast_ref::>() + } + + #[inline] + fn as_wkt_view_opt(&self) -> Option<&WktViewArray> { + self.as_any().downcast_ref::() + } +} + +impl AsGeoArrowArray for Arc { + #[inline] + fn as_point_opt(&self) -> Option<&PointArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_line_string_opt(&self) -> Option<&LineStringArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_polygon_opt(&self) -> Option<&PolygonArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_point_opt(&self) -> Option<&MultiPointArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_line_string_opt(&self) -> Option<&MultiLineStringArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_multi_polygon_opt(&self) -> Option<&MultiPolygonArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_geometry_collection_opt(&self) -> Option<&GeometryCollectionArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_rect_opt(&self) -> Option<&RectArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_geometry_opt(&self) -> Option<&GeometryArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_wkb_opt(&self) -> Option<&GenericWkbArray> { + self.as_any().downcast_ref::>() + } + + #[inline] + fn as_wkb_view_opt(&self) -> Option<&WkbViewArray> { + self.as_any().downcast_ref::() + } + + #[inline] + fn as_wkt_opt(&self) -> Option<&GenericWktArray> { + self.as_any().downcast_ref::>() + } + + #[inline] + fn as_wkt_view_opt(&self) -> Option<&WktViewArray> { + self.as_any().downcast_ref::() + } +} + +/// Convert a [GeoArrowArray] to a [`GenericWkbArray`]. +pub fn to_wkb(arr: &dyn GeoArrowArray) -> GeoArrowResult> { + use GeoArrowType::*; + match arr.data_type() { + Point(_) => impl_to_wkb(arr.as_point()), + LineString(_) => impl_to_wkb(arr.as_line_string()), + Polygon(_) => impl_to_wkb(arr.as_polygon()), + MultiPoint(_) => impl_to_wkb(arr.as_multi_point()), + MultiLineString(_) => impl_to_wkb(arr.as_multi_line_string()), + MultiPolygon(_) => impl_to_wkb(arr.as_multi_polygon()), + Geometry(_) => impl_to_wkb(arr.as_geometry()), + GeometryCollection(_) => impl_to_wkb(arr.as_geometry_collection()), + Rect(_) => impl_to_wkb(arr.as_rect()), + Wkb(typ) => { + // Note that here O is the _target_ offset type + if O::IS_LARGE { + // We need to convert from i32 to i64 + let large_arr: GenericWkbArray = arr.as_wkb::().clone().into(); + let array = large_arr.to_array_ref().as_binary::().clone(); + Ok(GenericWkbArray::new(array, typ.metadata().clone())) + } else { + // Since O is already i32, we can just go via ArrayRef, and use .as_binary to cast + // to O + let array = arr.as_wkb::().to_array_ref(); + let array = array.as_binary::().clone(); + Ok(GenericWkbArray::new(array, typ.metadata().clone())) + } + } + LargeWkb(typ) => { + if O::IS_LARGE { + // Since O is already i64, we can just go via ArrayRef, and use .as_binary to cast + // to O + let array = arr.as_wkb::().to_array_ref(); + let array = array.as_binary::().clone(); + Ok(GenericWkbArray::new(array, typ.metadata().clone())) + } else { + // We need to convert from i64 to i32 + let small_arr: GenericWkbArray = arr.as_wkb::().clone().try_into()?; + let array = small_arr.to_array_ref().as_binary::().clone(); + Ok(GenericWkbArray::new(array, typ.metadata().clone())) + } + } + WkbView(_) => { + let wkb_view_arr = arr.as_wkb_view(); + let metadata = wkb_view_arr.data_type().metadata().clone(); + let array = wkb_view_arr.clone().into_arrow(); + + let mut builder = GenericByteBuilder::with_capacity(arr.len(), 0); + array.iter().for_each(|value| builder.append_option(value)); + Ok(GenericWkbArray::new(builder.finish(), metadata)) + } + Wkt(_) => impl_to_wkb(arr.as_wkt::()), + LargeWkt(_) => impl_to_wkb(arr.as_wkt::()), + WktView(_) => impl_to_wkb(arr.as_wkt_view()), + } +} + +fn impl_to_wkb<'a, O: OffsetSizeTrait>( + geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult> { + let geoms = geo_arr + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + let wkb_type = WkbType::new(geo_arr.data_type().metadata().clone()); + Ok(WkbBuilder::from_nullable_geometries(geoms.as_slice(), wkb_type)?.finish()) +} + +/// Convert a [GeoArrowArray] to a [`WkbViewArray`]. +pub fn to_wkb_view(arr: &dyn GeoArrowArray) -> GeoArrowResult { + use GeoArrowType::*; + match arr.data_type() { + Point(_) => impl_to_wkb_view(arr.as_point()), + LineString(_) => impl_to_wkb_view(arr.as_line_string()), + Polygon(_) => impl_to_wkb_view(arr.as_polygon()), + MultiPoint(_) => impl_to_wkb_view(arr.as_multi_point()), + MultiLineString(_) => impl_to_wkb_view(arr.as_multi_line_string()), + MultiPolygon(_) => impl_to_wkb_view(arr.as_multi_polygon()), + Geometry(_) => impl_to_wkb_view(arr.as_geometry()), + GeometryCollection(_) => impl_to_wkb_view(arr.as_geometry_collection()), + Rect(_) => impl_to_wkb_view(arr.as_rect()), + Wkb(_) => wkb_array_to_wkb_view(arr.as_wkb::()), + LargeWkb(_) => wkb_array_to_wkb_view(arr.as_wkb::()), + WkbView(_) => Ok(arr.as_wkb_view().clone()), + Wkt(_) => impl_to_wkb_view(arr.as_wkt::()), + LargeWkt(_) => impl_to_wkb_view(arr.as_wkt::()), + WktView(_) => impl_to_wkb_view(arr.as_wkt_view()), + } +} + +/// Convert an arbitrary GeoArrowArray to a WkbViewArray. +/// +/// This function will parse each geometry and re-encode it as WKB. +fn impl_to_wkb_view<'a>( + geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let geoms = geo_arr + .iter() + .map(|x| x.transpose()) + .collect::>>()?; + + let mut builder = BinaryViewBuilder::with_capacity(geo_arr.len()); + let wkb_options = WriteOptions { + endianness: Endianness::LittleEndian, + }; + for maybe_geom in geoms { + if let Some(geom) = maybe_geom { + let mut buf = Vec::new(); + wkb::writer::write_geometry(&mut buf, &geom, &wkb_options).unwrap(); + builder.append_value(buf); + } else { + builder.append_null(); + } + } + + let binary_view_arr = builder.finish(); + Ok(WkbViewArray::new( + binary_view_arr, + geo_arr.data_type().metadata().clone(), + )) +} + +/// A fast path of converting to WkbViewArray that does not parse and re-encode WKB buffers +fn wkb_array_to_wkb_view( + arr: &GenericWkbArray, +) -> GeoArrowResult { + let metadata = arr.data_type().metadata().clone(); + let mut builder = BinaryViewBuilder::with_capacity(arr.len()); + + for value in arr.inner().iter() { + if let Some(bytes) = value { + builder.append_value(bytes); + } else { + builder.append_null(); + } + } + + Ok(WkbViewArray::new(builder.finish(), metadata)) +} + +/// Parse a [`GenericWkbArray`] or [`WkbViewArray`] to a [`GeoArrowArray`] with the designated +/// [`GeoArrowType`]. +/// +/// Note that the GeoArrow metadata on the new array is taken from `to_type` **not** the original +/// array. Ensure you construct the [GeoArrowType] with the correct metadata. +/// +/// Note that this will be slow if converting from a WKB array to another WKB-typed array. If +/// possible, use the `From` impls on WKB-typed arrays. +pub fn from_wkb<'a, A: GenericWkbArrayType<'a>>( + arr: &'a A, + to_type: GeoArrowType, +) -> GeoArrowResult> { + // Make this a callback so that we don't actually generate this vec when converting from WKB to + // WKT or WKB + let geoms_fn = || { + arr.iter() + .map(|g| g.transpose()) + .collect::>>() + }; + + use GeoArrowType::*; + let result: Arc = match to_type { + Point(typ) => Arc::new(PointBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()), + LineString(typ) => { + Arc::new(LineStringBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + Polygon(typ) => { + Arc::new(PolygonBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiPoint(typ) => { + Arc::new(MultiPointBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiLineString(typ) => { + Arc::new(MultiLineStringBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiPolygon(typ) => { + Arc::new(MultiPolygonBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + GeometryCollection(typ) => Arc::new( + GeometryCollectionBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish(), + ), + Rect(_) => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Cannot decode WKB geometries to Rect geometry type in from_wkb {to_type:?}", + ))); + } + Geometry(typ) => { + Arc::new(GeometryBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + Wkb(typ) => { + let mut wkb_arr = to_wkb::(arr)?; + wkb_arr.data_type = typ; + Arc::new(wkb_arr) + } + LargeWkb(typ) => { + let mut wkb_arr = to_wkb::(arr)?; + wkb_arr.data_type = typ; + Arc::new(wkb_arr) + } + WkbView(typ) => { + let mut wkb_view_arr = to_wkb_view(arr)?; + wkb_view_arr.data_type = typ; + Arc::new(wkb_view_arr) + } + Wkt(typ) => { + let mut wkt_arr = to_wkt::(arr)?; + wkt_arr.data_type = typ; + Arc::new(wkt_arr) + } + LargeWkt(typ) => { + let mut wkt_arr = to_wkt::(arr)?; + wkt_arr.data_type = typ; + Arc::new(wkt_arr) + } + WktView(typ) => { + let mut wkt_view_arr = to_wkt_view(arr)?; + wkt_view_arr.data_type = typ; + Arc::new(wkt_view_arr) + } + }; + Ok(result) +} + +/// Convert a [GeoArrowArray] to a [`GenericWktArray`]. +pub fn to_wkt(arr: &dyn GeoArrowArray) -> GeoArrowResult> { + use GeoArrowType::*; + match arr.data_type() { + Point(_) => impl_to_wkt(arr.as_point()), + LineString(_) => impl_to_wkt(arr.as_line_string()), + Polygon(_) => impl_to_wkt(arr.as_polygon()), + MultiPoint(_) => impl_to_wkt(arr.as_multi_point()), + MultiLineString(_) => impl_to_wkt(arr.as_multi_line_string()), + MultiPolygon(_) => impl_to_wkt(arr.as_multi_polygon()), + Geometry(_) => impl_to_wkt(arr.as_geometry()), + GeometryCollection(_) => impl_to_wkt(arr.as_geometry_collection()), + Rect(_) => impl_to_wkt(arr.as_rect()), + Wkb(_) => impl_to_wkt(arr.as_wkb::()), + LargeWkb(_) => impl_to_wkt(arr.as_wkb::()), + WkbView(_) => impl_to_wkt(arr.as_wkb_view()), + Wkt(typ) => { + if O::IS_LARGE { + let large_arr: GenericWktArray = arr.as_wkt::().clone().into(); + let array = large_arr.to_array_ref().as_string::().clone(); + Ok(GenericWktArray::new(array, typ.metadata().clone())) + } else { + // Since O is already i32, we can just go via ArrayRef, and use .as_string to cast + // to O + let array = arr.as_wkt::().to_array_ref(); + let array = array.as_string::().clone(); + Ok(GenericWktArray::new(array, typ.metadata().clone())) + } + } + LargeWkt(typ) => { + if O::IS_LARGE { + // Since O is already i64, we can just go via ArrayRef, and use .as_string to cast + // to O + let array = arr.as_wkt::().to_array_ref(); + let array = array.as_string::().clone(); + Ok(GenericWktArray::new(array, typ.metadata().clone())) + } else { + let small_arr: GenericWktArray = arr.as_wkt::().clone().try_into()?; + let array = small_arr.to_array_ref().as_string::().clone(); + Ok(GenericWktArray::new(array, typ.metadata().clone())) + } + } + WktView(_) => { + let wkt_view_arr = arr.as_wkt_view(); + let metadata = wkt_view_arr.data_type().metadata().clone(); + let array = wkt_view_arr.clone().into_arrow(); + + let mut builder = GenericStringBuilder::with_capacity(arr.len(), 0); + array.iter().for_each(|value| builder.append_option(value)); + Ok(GenericWktArray::new(builder.finish(), metadata)) + } + } +} + +fn impl_to_wkt<'a, O: OffsetSizeTrait>( + geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult> { + let metadata = geo_arr.data_type().metadata().clone(); + let mut builder = GenericStringBuilder::with_capacity(geo_arr.len(), 0); + + for maybe_geom in geo_arr.iter() { + if let Some(geom) = maybe_geom { + wkt::to_wkt::write_geometry(&mut builder, &geom?) + .map_err(|err| GeoArrowError::External(Box::new(err)))?; + builder.append_value(""); + } else { + builder.append_null(); + } + } + + Ok(GenericWktArray::new(builder.finish(), metadata)) +} + +/// Convert a [GeoArrowArray] to a [`WktViewArray`]. +pub fn to_wkt_view(arr: &dyn GeoArrowArray) -> GeoArrowResult { + use GeoArrowType::*; + match arr.data_type() { + Point(_) => impl_to_wkt_view(arr.as_point()), + LineString(_) => impl_to_wkt_view(arr.as_line_string()), + Polygon(_) => impl_to_wkt_view(arr.as_polygon()), + MultiPoint(_) => impl_to_wkt_view(arr.as_multi_point()), + MultiLineString(_) => impl_to_wkt_view(arr.as_multi_line_string()), + MultiPolygon(_) => impl_to_wkt_view(arr.as_multi_polygon()), + Geometry(_) => impl_to_wkt_view(arr.as_geometry()), + GeometryCollection(_) => impl_to_wkt_view(arr.as_geometry_collection()), + Rect(_) => impl_to_wkt_view(arr.as_rect()), + Wkb(_) => impl_to_wkt_view(arr.as_wkb::()), + LargeWkb(_) => impl_to_wkt_view(arr.as_wkb::()), + WkbView(_) => impl_to_wkt_view(arr.as_wkb_view()), + Wkt(_) => wkt_array_to_wkt_view(arr.as_wkt::()), + LargeWkt(_) => wkt_array_to_wkt_view(arr.as_wkt::()), + WktView(_) => Ok(arr.as_wkt_view().clone()), + } +} + +/// Convert an arbitrary GeoArrowArray to a WktViewArray. +/// +/// This function will parse each geometry and re-encode it as WKT. +fn impl_to_wkt_view<'a>( + geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let metadata = geo_arr.data_type().metadata().clone(); + let mut builder = StringViewBuilder::with_capacity(geo_arr.len()); + + for maybe_geom in geo_arr.iter() { + if let Some(geom) = maybe_geom { + let mut s = String::new(); + wkt::to_wkt::write_geometry(&mut s, &geom?) + .map_err(|err| GeoArrowError::External(Box::new(err)))?; + builder.append_value(s); + } else { + builder.append_null(); + } + } + + Ok(WktViewArray::new(builder.finish(), metadata)) +} + +/// A fast path of converting to WktViewArray that does not parse and re-encode WKT buffers +fn wkt_array_to_wkt_view( + arr: &GenericWktArray, +) -> GeoArrowResult { + let metadata = arr.data_type().metadata().clone(); + let mut builder = StringViewBuilder::with_capacity(arr.len()); + + for value in arr.inner().iter() { + if let Some(s) = value { + builder.append_value(s); + } else { + builder.append_null(); + } + } + + Ok(WktViewArray::new(builder.finish(), metadata)) +} + +/// Parse a [`GenericWktArray`] or [`WktViewArray`] to a [`GeoArrowArray`] with the designated +/// [`GeoArrowType`]. +/// +/// Note that the GeoArrow metadata on the new array is taken from `to_type` **not** the original +/// array. Ensure you construct the [GeoArrowType] with the correct metadata. +pub fn from_wkt( + arr: &A, + to_type: GeoArrowType, +) -> GeoArrowResult> { + // Make this a callback so that we don't actually generate this vec when converting from WKT to + // WKT or WKB + let geoms_fn = || { + arr.iter() + .map(|g| g.transpose()) + .collect::>>() + }; + + use GeoArrowType::*; + let result: Arc = match to_type { + Point(typ) => Arc::new(PointBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()), + LineString(typ) => { + Arc::new(LineStringBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + Polygon(typ) => { + Arc::new(PolygonBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiPoint(typ) => { + Arc::new(MultiPointBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiLineString(typ) => { + Arc::new(MultiLineStringBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + MultiPolygon(typ) => { + Arc::new(MultiPolygonBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + GeometryCollection(typ) => Arc::new( + GeometryCollectionBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish(), + ), + Rect(_) => { + return Err(GeoArrowError::IncorrectGeometryType(format!( + "Cannot decode WKT geometries to Rect geometry type in from_wkt {to_type:?}", + ))); + } + Geometry(typ) => { + Arc::new(GeometryBuilder::from_nullable_geometries(&geoms_fn()?, typ)?.finish()) + } + Wkb(typ) => { + let mut wkb_arr = to_wkb::(arr)?; + wkb_arr.data_type = typ; + Arc::new(wkb_arr) + } + LargeWkb(typ) => { + let mut wkb_arr = to_wkb::(arr)?; + wkb_arr.data_type = typ; + Arc::new(wkb_arr) + } + WkbView(typ) => { + let mut wkb_view_arr = to_wkb_view(arr)?; + wkb_view_arr.data_type = typ; + Arc::new(wkb_view_arr) + } + Wkt(typ) => { + let mut wkt_arr = to_wkt::(arr)?; + wkt_arr.data_type = typ; + Arc::new(wkt_arr) + } + LargeWkt(typ) => { + let mut wkt_arr = to_wkt::(arr)?; + wkt_arr.data_type = typ; + Arc::new(wkt_arr) + } + WktView(typ) => { + let mut wkt_view_arr = to_wkt_view(arr)?; + wkt_view_arr.data_type = typ; + Arc::new(wkt_view_arr) + } + }; + Ok(result) +} + +/// Re-export symbols needed for downcast macros +/// +/// Name follows `serde` convention +#[doc(hidden)] +pub mod __private { + pub use geoarrow_schema::GeoArrowType; +} + +/// Downcast a [GeoArrowArray] to a concrete-typed array based on its [`GeoArrowType`]. +/// +/// For example: computing unsigned area: +/// +/// ``` +/// use arrow_array::Float64Array; +/// use arrow_array::builder::Float64Builder; +/// use geo::Area; +/// use geo_traits::to_geo::ToGeoGeometry; +/// use geoarrow_schema::error::GeoArrowResult; +/// use geoarrow_array::{GeoArrowArrayAccessor, GeoArrowArray, downcast_geoarrow_array}; +/// +/// pub fn unsigned_area(array: &dyn GeoArrowArray) -> GeoArrowResult { +/// downcast_geoarrow_array!(array, impl_unsigned_area) +/// } +/// +/// fn impl_unsigned_area<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { +/// let mut builder = Float64Builder::with_capacity(array.len()); +/// +/// for item in array.iter() { +/// if let Some(geom) = item { +/// builder.append_value(geom?.to_geometry().unsigned_area()); +/// } else { +/// builder.append_null(); +/// } +/// } +/// +/// Ok(builder.finish()) +/// } +/// ``` +/// +/// You can also override the behavior of specific data types to specialize or provide a fast path. +/// For example, we know that points and lines will always have an area of 0, and don't need to +/// iterate over the input values to compute that. +/// +/// ``` +/// # use arrow_array::Float64Array; +/// # use arrow_array::builder::Float64Builder; +/// # use geo::Area; +/// # use geo_traits::to_geo::ToGeoGeometry; +/// # use geoarrow_schema::error::GeoArrowResult; +/// # use geoarrow_schema::GeoArrowType; +/// # use geoarrow_array::GeoArrowArrayAccessor; +/// # +/// # fn impl_unsigned_area<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { +/// # let mut builder = Float64Builder::with_capacity(array.len()); +/// # +/// # for item in array.iter() { +/// # if let Some(geom) = item { +/// # builder.append_value(geom?.to_geometry().unsigned_area()); +/// # } else { +/// # builder.append_null(); +/// # } +/// # } +/// # +/// # Ok(builder.finish()) +/// # } +/// # +/// fn impl_unsigned_area_specialized<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { +/// use GeoArrowType::*; +/// match array.data_type() { +/// Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { +/// let values = vec![0.0f64; array.len()]; +/// Ok(Float64Array::new(values.into(), array.logical_nulls())) +/// } +/// _ => impl_unsigned_area(array), +/// } +/// } +/// ``` +/// +/// This is a simplified version of the upstream +/// [downcast_primitive_array][arrow_array::downcast_primitive_array]. +/// +/// If you would like to help in updating this `downcast_geoarrow_array` to support the full range +/// of functionality of the upstream `downcast_primitive_array`, please create an issue or submit a +/// PR. +#[macro_export] +macro_rules! downcast_geoarrow_array { + ($array:ident, $fn:expr $(, $args:expr )* $(,)?) => { + match $array.data_type() { + $crate::cast::__private::GeoArrowType::Point(_) => { + $fn($crate::cast::AsGeoArrowArray::as_point($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::LineString(_) => { + $fn($crate::cast::AsGeoArrowArray::as_line_string($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::Polygon(_) => { + $fn($crate::cast::AsGeoArrowArray::as_polygon($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::MultiPoint(_) => { + $fn($crate::cast::AsGeoArrowArray::as_multi_point($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::MultiLineString(_) => { + $fn($crate::cast::AsGeoArrowArray::as_multi_line_string($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::MultiPolygon(_) => { + $fn($crate::cast::AsGeoArrowArray::as_multi_polygon($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::Geometry(_) => { + $fn($crate::cast::AsGeoArrowArray::as_geometry($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::GeometryCollection(_) => $fn( + $crate::cast::AsGeoArrowArray::as_geometry_collection($array) $(, $args )* + ), + $crate::cast::__private::GeoArrowType::Rect(_) => { + $fn($crate::cast::AsGeoArrowArray::as_rect($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::Wkb(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkb::($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::LargeWkb(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkb::($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::WkbView(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkb_view($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::Wkt(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkt::($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::LargeWkt(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkt::($array) $(, $args )*) + } + $crate::cast::__private::GeoArrowType::WktView(_) => { + $fn($crate::cast::AsGeoArrowArray::as_wkt_view($array) $(, $args )*) + } + } + }; +} + +// #[cfg(test)] +// mod test { +// use std::sync::Arc; + +// use geoarrow_schema::{CoordType, Dimension, WkbType}; + +// use super::*; +// use crate::test; + +// #[test] +// fn test_cast_wkb_in_to_wkb() { +// let wkb_arr: GenericWkbArray = +// to_wkb(&test::point::array(CoordType::Separated, Dimension::XY)).unwrap(); +// let wkb_arr2: GenericWkbArray = to_wkb(&wkb_arr).unwrap(); +// let wkb_arr3: GenericWkbArray = to_wkb(&wkb_arr2).unwrap(); +// let wkb_arr4: GenericWkbArray = to_wkb(&wkb_arr3).unwrap(); +// let wkb_arr5: GenericWkbArray = to_wkb(&wkb_arr4).unwrap(); +// assert_eq!(wkb_arr, wkb_arr5); +// } + +// #[test] +// fn test_cast_wkt_in_to_wkt() { +// let wkt_arr: GenericWktArray = +// to_wkt(&test::point::array(CoordType::Separated, Dimension::XY)).unwrap(); +// let wkt_arr2: GenericWktArray = to_wkt(&wkt_arr).unwrap(); +// let wkt_arr3: GenericWktArray = to_wkt(&wkt_arr2).unwrap(); +// let wkt_arr4: GenericWktArray = to_wkt(&wkt_arr3).unwrap(); +// let wkt_arr5: GenericWktArray = to_wkt(&wkt_arr4).unwrap(); +// assert_eq!(wkt_arr, wkt_arr5); +// } + +// // Start WKB round trip tests +// #[test] +// fn test_round_trip_wkb_point() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::point::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_point()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_point()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_linestring() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::linestring::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_line_string()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_line_string()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_polygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::polygon::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_polygon()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_polygon()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_multipoint() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multipoint::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_point()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_point()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_multilinestring() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multilinestring::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_line_string()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_line_string()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_multipolygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multipolygon::array(coord_type, dim); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_polygon()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_polygon()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_geometrycollection() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::geometrycollection::array(coord_type, dim, false); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_geometry_collection()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_geometry_collection()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkb_geometry() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let arr = test::geometry::array(coord_type, false); + +// let wkb_arr = to_wkb::(&arr).unwrap(); +// let arr2 = from_wkb(&wkb_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_geometry()); + +// let wkb_arr2 = to_wkb::(&arr).unwrap(); +// let arr3 = from_wkb(&wkb_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_geometry()); +// } +// } + +// // Start WKT round trip tests +// #[test] +// fn test_round_trip_wkt_point() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::point::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_point()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_point()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_linestring() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::linestring::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_line_string()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_line_string()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_polygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::polygon::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_polygon()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_polygon()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_multipoint() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multipoint::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_point()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_point()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_multilinestring() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multilinestring::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_line_string()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_line_string()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_multipolygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::multipolygon::array(coord_type, dim); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_multi_polygon()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_multi_polygon()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_geometrycollection() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let arr = test::geometrycollection::array(coord_type, dim, false); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_geometry_collection()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_geometry_collection()); +// } +// } +// } + +// #[test] +// fn test_round_trip_wkt_geometry() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// let arr = test::geometry::array(coord_type, false); + +// let wkt_arr = to_wkt::(&arr).unwrap(); +// let arr2 = from_wkt(&wkt_arr, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr2.as_geometry()); + +// let wkt_arr2 = to_wkt::(&arr).unwrap(); +// let arr3 = from_wkt(&wkt_arr2, arr.data_type().clone()).unwrap(); +// assert_eq!(&arr, arr3.as_geometry()); +// } +// } + +// // Verify that this compiles with the macro +// #[allow(dead_code)] +// fn _to_wkb_test_downcast_macro( +// arr: &dyn GeoArrowArray, +// ) -> GeoArrowResult> { +// downcast_geoarrow_array!(arr, impl_to_wkb) +// } + +// fn impl_to_wkb<'a>( +// geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +// ) -> GeoArrowResult> { +// let geoms = geo_arr +// .iter() +// .map(|x| x.transpose()) +// .collect::, _>>() +// .unwrap(); +// let wkb_type = WkbType::new(geo_arr.data_type().metadata().clone()); +// Ok(WkbBuilder::from_nullable_geometries(geoms.as_slice(), wkb_type)?.finish()) +// } + +// // Verify that this compiles with the macro +// #[test] +// fn test_downcast_macro_with_param() { +// let arr = +// Arc::new(test::geometry::array(Default::default(), false)) as Arc; +// let arr_ref = arr.as_ref(); +// let x = downcast_geoarrow_array!(arr_ref, impl_inner_function_with_param, 1.0).unwrap(); +// assert_eq!(x, 1.0); +// } + +// fn impl_inner_function_with_param<'a>( +// _geo_arr: &'a impl GeoArrowArrayAccessor<'a>, +// param: f64, +// ) -> GeoArrowResult { +// Ok(param) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/eq.rs b/src/geoarrow/geoarrow-array/src/eq.rs new file mode 100644 index 0000000000..b83671ab30 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/eq.rs @@ -0,0 +1,283 @@ +use std::fmt::Debug; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::OffsetBuffer; +use geo_traits::{ + CoordTrait, GeometryCollectionTrait, GeometryTrait, GeometryType, LineStringTrait, + MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait, RectTrait, +}; +use num_traits::{Float, Num, NumCast}; + +// The same as geo-types::CoordFloat +pub trait CoordFloat: Num + Copy + NumCast + PartialOrd + Debug + Float {} +impl CoordFloat for T {} + +#[inline] +pub fn coord_eq( + left: &impl CoordTrait, + right: &impl CoordTrait, +) -> bool { + let left_dim = left.dim(); + if left_dim != right.dim() { + return false; + } + + for i in 0..left_dim.size() { + if left.nth_or_panic(i) != right.nth_or_panic(i) { + return false; + } + } + + true +} + +#[inline] +pub fn point_eq( + left: &impl PointTrait, + right: &impl PointTrait, +) -> bool { + match (left.coord(), right.coord()) { + (Some(left), Some(right)) => coord_eq(&left, &right), + (None, None) => true, + _ => false, + } +} + +#[inline] +pub fn line_string_eq( + left: &impl LineStringTrait, + right: &impl LineStringTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_coords() != right.num_coords() { + return false; + } + + for (left_coord, right_coord) in left.coords().zip(right.coords()) { + if !coord_eq(&left_coord, &right_coord) { + return false; + } + } + + true +} + +#[inline] +pub fn polygon_eq( + left: &impl PolygonTrait, + right: &impl PolygonTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_interiors() != right.num_interiors() { + return false; + } + + match (left.exterior(), right.exterior()) { + (None, None) => (), + (Some(_), None) => { + return false; + } + (None, Some(_)) => { + return false; + } + (Some(left), Some(right)) => { + if !line_string_eq(&left, &right) { + return false; + } + } + }; + + for (left_interior, right_interior) in left.interiors().zip(right.interiors()) { + if !line_string_eq(&left_interior, &right_interior) { + return false; + } + } + + true +} + +#[inline] +pub fn multi_point_eq( + left: &impl MultiPointTrait, + right: &impl MultiPointTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_points() != right.num_points() { + return false; + } + + for (left_point, right_point) in left.points().zip(right.points()) { + if !point_eq(&left_point, &right_point) { + return false; + } + } + + true +} + +#[inline] +pub fn multi_line_string_eq( + left: &impl MultiLineStringTrait, + right: &impl MultiLineStringTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_line_strings() != right.num_line_strings() { + return false; + } + + for (left_line, right_line) in left.line_strings().zip(right.line_strings()) { + if !line_string_eq(&left_line, &right_line) { + return false; + } + } + + true +} + +#[inline] +pub fn multi_polygon_eq( + left: &impl MultiPolygonTrait, + right: &impl MultiPolygonTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_polygons() != right.num_polygons() { + return false; + } + + for (left_polygon, right_polygon) in left.polygons().zip(right.polygons()) { + if !polygon_eq(&left_polygon, &right_polygon) { + return false; + } + } + + true +} + +#[inline] +pub fn rect_eq(left: &impl RectTrait, right: &impl RectTrait) -> bool { + if left.dim() != right.dim() { + return false; + } + + if !coord_eq(&left.min(), &right.min()) { + return false; + } + + if !coord_eq(&left.max(), &right.max()) { + return false; + } + + true +} + +#[inline] +pub fn geometry_eq( + left: &impl GeometryTrait, + right: &impl GeometryTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + match (left.as_type(), right.as_type()) { + (GeometryType::Point(l), GeometryType::Point(r)) => { + if !point_eq(l, r) { + return false; + } + } + (GeometryType::LineString(l), GeometryType::LineString(r)) => { + if !line_string_eq(l, r) { + return false; + } + } + (GeometryType::Polygon(l), GeometryType::Polygon(r)) => { + if !polygon_eq(l, r) { + return false; + } + } + (GeometryType::MultiPoint(l), GeometryType::MultiPoint(r)) => { + if !multi_point_eq(l, r) { + return false; + } + } + (GeometryType::MultiLineString(l), GeometryType::MultiLineString(r)) => { + if !multi_line_string_eq(l, r) { + return false; + } + } + (GeometryType::MultiPolygon(l), GeometryType::MultiPolygon(r)) => { + if !multi_polygon_eq(l, r) { + return false; + } + } + (GeometryType::Rect(l), GeometryType::Rect(r)) => { + if !rect_eq(l, r) { + return false; + } + } + (GeometryType::GeometryCollection(l), GeometryType::GeometryCollection(r)) => { + if !geometry_collection_eq(l, r) { + return false; + } + } + _ => { + return false; + } + } + + true +} + +#[inline] +pub fn geometry_collection_eq( + left: &impl GeometryCollectionTrait, + right: &impl GeometryCollectionTrait, +) -> bool { + if left.dim() != right.dim() { + return false; + } + + if left.num_geometries() != right.num_geometries() { + return false; + } + + for (left_geometry, right_geometry) in left.geometries().zip(right.geometries()) { + if !geometry_eq(&left_geometry, &right_geometry) { + return false; + } + } + + true +} + +pub(crate) fn offset_buffer_eq( + left: &OffsetBuffer, + right: &OffsetBuffer, +) -> bool { + if left.len() != right.len() { + return false; + } + + for (o1, o2) in left.iter().zip(right.iter()) { + if o1 != o2 { + return false; + } + } + + true +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/geometry.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/geometry.rs new file mode 100644 index 0000000000..4e211d2c35 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/geometry.rs @@ -0,0 +1,23 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::GeometryArray, + geozero::export::scalar::process_geometry, +}; + +impl GeozeroGeometry for GeometryArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_geometry(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/geometrycollection.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/geometrycollection.rs new file mode 100644 index 0000000000..dc60236cf9 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/geometrycollection.rs @@ -0,0 +1,23 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::GeometryCollectionArray, + geozero::export::scalar::process_geometry_collection, +}; + +impl GeozeroGeometry for GeometryCollectionArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_geometry_collection(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/linestring.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/linestring.rs new file mode 100644 index 0000000000..02403e5839 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/linestring.rs @@ -0,0 +1,40 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::LineStringArray, + geozero::export::scalar::process_line_string, +}; + +impl GeozeroGeometry for LineStringArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_line_string(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::CoordType; +// use geozero::ToWkt; + +// use crate::test::linestring::ls_array; + +// #[test] +// fn geozero_process_geom() -> geozero::error::Result<()> { +// let arr = ls_array(CoordType::Interleaved); +// let wkt = ToWkt::to_wkt(&arr)?; +// let expected = "GEOMETRYCOLLECTION(LINESTRING(0 1,1 2),LINESTRING EMPTY,LINESTRING(3 4,5 6),LINESTRING EMPTY)"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/mod.rs new file mode 100644 index 0000000000..e0612e2185 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/mod.rs @@ -0,0 +1,11 @@ +mod geometry; +mod geometrycollection; +mod linestring; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod rect; +mod wkb; +mod wkt; diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/multilinestring.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/multilinestring.rs new file mode 100644 index 0000000000..59e6dbdbb5 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/multilinestring.rs @@ -0,0 +1,45 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::MultiLineStringArray, + geozero::export::scalar::process_multi_line_string, +}; + +impl GeozeroGeometry for MultiLineStringArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_multi_line_string(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{Dimension, MultiLineStringType}; +// use geozero::ToWkt; + +// use crate::{ +// builder::MultiLineStringBuilder, +// test::multilinestring::{ml0, ml1}, +// }; + +// #[test] +// fn geozero_process_geom() -> geozero::error::Result<()> { +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()); +// let geo_arr = +// MultiLineStringBuilder::from_multi_line_strings(&[&ml0(), &ml1()], typ).finish(); +// let wkt = ToWkt::to_wkt(&geo_arr)?; +// let expected = "GEOMETRYCOLLECTION(MULTILINESTRING((-111 45,-111 41,-104 41,-104 45)),MULTILINESTRING((-111 45,-111 41,-104 41,-104 45),(-110 44,-110 42,-105 42,-105 44)))"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/multipoint.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/multipoint.rs new file mode 100644 index 0000000000..50a47d170b --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/multipoint.rs @@ -0,0 +1,44 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::MultiPointArray, + geozero::export::scalar::process_multi_point, +}; + +impl GeozeroGeometry for MultiPointArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_multi_point(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{Dimension, MultiPointType}; +// use geozero::{ToWkt, error::Result}; + +// use crate::{ +// builder::MultiPointBuilder, +// test::multipoint::{mp0, mp1}, +// }; + +// #[test] +// fn geozero_process_geom() -> Result<()> { +// let typ = MultiPointType::new(Dimension::XY, Default::default()); +// let geo_arr = MultiPointBuilder::from_multi_points(&[&mp0(), &mp1()], typ).finish(); +// let wkt = ToWkt::to_wkt(&geo_arr)?; +// let expected = "GEOMETRYCOLLECTION(MULTIPOINT(0 1,1 2),MULTIPOINT(3 4,5 6))"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/multipolygon.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/multipolygon.rs new file mode 100644 index 0000000000..f976c5de91 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/multipolygon.rs @@ -0,0 +1,44 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::MultiPolygonArray, + geozero::export::scalar::process_multi_polygon, +}; + +impl GeozeroGeometry for MultiPolygonArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_multi_polygon(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{Dimension, MultiPolygonType}; +// use geozero::ToWkt; + +// use crate::{ +// builder::MultiPolygonBuilder, +// test::multipolygon::{mp0, mp1}, +// }; + +// #[test] +// fn geozero_process_geom() -> geozero::error::Result<()> { +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()); +// let geo_arr = MultiPolygonBuilder::from_multi_polygons(&[&mp0(), &mp1()], typ).finish(); +// let wkt = ToWkt::to_wkt(&geo_arr)?; +// let expected = "GEOMETRYCOLLECTION(MULTIPOLYGON(((-111 45,-111 41,-104 41,-104 45,-111 45)),((-111 45,-111 41,-104 41,-104 45,-111 45),(-110 44,-110 42,-105 42,-105 44,-110 44))),MULTIPOLYGON(((-111 45,-111 41,-104 41,-104 45,-111 45)),((-110 44,-110 42,-105 42,-105 44,-110 44))))"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/point.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/point.rs new file mode 100644 index 0000000000..666b2c03d7 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/point.rs @@ -0,0 +1,22 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PointArray, geozero::export::scalar::process_point, +}; + +impl GeozeroGeometry for PointArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for idx in 0..num_geometries { + process_point(&self.value(idx).unwrap(), idx, processor)?; + } + + processor.geometrycollection_end(num_geometries)?; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/polygon.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/polygon.rs new file mode 100644 index 0000000000..57870366f7 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/polygon.rs @@ -0,0 +1,44 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PolygonArray, + geozero::export::scalar::process_polygon, +}; + +impl GeozeroGeometry for PolygonArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_polygon(&self.value(geom_idx).unwrap(), true, geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{Dimension, PolygonType}; +// use geozero::ToWkt; + +// use crate::{ +// builder::PolygonBuilder, +// test::polygon::{p0, p1}, +// }; + +// #[test] +// fn geozero_process_geom() -> geozero::error::Result<()> { +// let typ = PolygonType::new(Dimension::XY, Default::default()); +// let geo_arr = PolygonBuilder::from_polygons(&[&p0(), &p1()], typ).finish(); +// let wkt = ToWkt::to_wkt(&geo_arr)?; +// let expected = "GEOMETRYCOLLECTION(POLYGON((-111 45,-111 41,-104 41,-104 45,-111 45)),POLYGON((-111 45,-111 41,-104 41,-104 45,-111 45),(-110 44,-110 42,-105 42,-105 44,-110 44)))"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/rect.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/rect.rs new file mode 100644 index 0000000000..54a5e2e7fc --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/rect.rs @@ -0,0 +1,43 @@ +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, array::RectArray, geozero::export::scalar::process_rect, +}; + +impl GeozeroGeometry for RectArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + process_rect(&self.value(geom_idx).unwrap(), geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{BoxType, Dimension}; +// use geozero::ToWkt; + +// use crate::{ +// builder::RectBuilder, +// test::rect::{r0, r1}, +// }; + +// #[test] +// fn geozero_process_geom() -> geozero::error::Result<()> { +// let typ = BoxType::new(Dimension::XY, Default::default()); +// let geo_arr = RectBuilder::from_rects([r0(), r1()].iter(), typ).finish(); +// let wkt = ToWkt::to_wkt(&geo_arr)?; +// let expected = "GEOMETRYCOLLECTION(POLYGON((10 10,10 20,30 20,30 10,10 10)),POLYGON((100 100,100 200,300 200,300 100,100 100)))"; +// assert_eq!(wkt, expected); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/wkb.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/wkb.rs new file mode 100644 index 0000000000..a758bbebba --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/wkb.rs @@ -0,0 +1,48 @@ +use arrow_array::OffsetSizeTrait; +use geozero::{GeomProcessor, GeozeroGeometry, error::GeozeroError}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, + array::{GenericWkbArray, WkbViewArray}, + geozero::export::scalar::process_geometry, +}; + +impl GeozeroGeometry for GenericWkbArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + let geom = &self + .value(geom_idx) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(geom, geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +impl GeozeroGeometry for WkbViewArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + let geom = &self + .value(geom_idx) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(geom, geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/array/wkt.rs b/src/geoarrow/geoarrow-array/src/geozero/export/array/wkt.rs new file mode 100644 index 0000000000..aefbad42b6 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/array/wkt.rs @@ -0,0 +1,48 @@ +use arrow_array::OffsetSizeTrait; +use geozero::{GeomProcessor, GeozeroGeometry, error::GeozeroError}; + +use crate::{ + GeoArrowArray, GeoArrowArrayAccessor, + array::{GenericWktArray, WktViewArray}, + geozero::export::scalar::process_geometry, +}; + +impl GeozeroGeometry for GenericWktArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + let geom = &self + .value(geom_idx) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(geom, geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} + +impl GeozeroGeometry for WktViewArray { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + let num_geometries = self.len(); + processor.geometrycollection_begin(num_geometries, 0)?; + + for geom_idx in 0..num_geometries { + let geom = &self + .value(geom_idx) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(geom, geom_idx, processor)?; + } + + processor.geometrycollection_end(num_geometries - 1)?; + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/data_source/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/mod.rs new file mode 100644 index 0000000000..ac96db146a --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/mod.rs @@ -0,0 +1,5 @@ +mod record_batch; +mod record_batch_reader; + +pub use record_batch::GeozeroRecordBatchWriter; +pub use record_batch_reader::GeozeroRecordBatchReader; diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch.rs b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch.rs new file mode 100644 index 0000000000..237d3051fd --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch.rs @@ -0,0 +1,447 @@ +use std::{str::FromStr, sync::Arc}; + +use arrow_array::{Array, RecordBatch, cast::AsArray, timezone::Tz, types::*}; +use arrow_json::writer::make_encoder; +use arrow_schema::{DataType, Schema, SchemaRef, TimeUnit}; +use geoarrow_schema::GeoArrowType; +use geozero::{ + ColumnValue, FeatureProcessor, GeomProcessor, PropertyProcessor, error::GeozeroError, +}; + +use crate::{ + GeoArrowArray, + array::from_arrow_array, + builder::geo_trait_wrappers::RectWrapper, + cast::AsGeoArrowArray, + geozero::export::scalar::{ + process_geometry, process_geometry_collection, process_line_string, + process_multi_line_string, process_multi_point, process_multi_polygon, process_point, + process_polygon, + }, + trait_::GeoArrowArrayAccessor, +}; + +/// A push-based writer for creating geozero-based outputs. +pub struct GeozeroRecordBatchWriter { + schema: SchemaRef, + overall_row_idx: usize, + geometry_column_index: usize, + processor: P, +} + +impl GeozeroRecordBatchWriter

{ + /// Create a new GeozeroRecordBatchWriter from a schema + pub fn try_new( + schema: SchemaRef, + mut processor: P, + name: Option<&str>, + ) -> Result { + let geom_indices = geometry_columns(&schema); + let geometry_column_index = if geom_indices.len() != 1 { + Err(GeozeroError::Dataset( + "Writing through geozero not supported with multiple geometries".to_string(), + ))? + } else { + geom_indices[0] + }; + + processor.dataset_begin(name)?; + + Ok(Self { + schema, + geometry_column_index, + overall_row_idx: 0, + processor, + }) + } + + /// Write a [`RecordBatch`], processing it with the given [`FeatureProcessor`]. + pub fn write(&mut self, batch: &RecordBatch) -> Result<(), GeozeroError> { + if *batch.schema_ref() != self.schema { + return Err(GeozeroError::Dataset( + "Batch schema does not match writer schema".to_string(), + )); + } + + let num_rows = batch.num_rows(); + process_batch( + batch, + batch.schema_ref(), + self.geometry_column_index, + self.overall_row_idx, + &mut self.processor, + )?; + self.overall_row_idx += num_rows; + + Ok(()) + } + + /// Finish the dataset processing and return the processor. + pub fn finish(mut self) -> Result { + self.processor.dataset_end()?; + Ok(self.processor) + } +} + +pub(super) fn process_batch( + batch: &RecordBatch, + schema: &Schema, + geometry_column_index: usize, + batch_start_idx: usize, + processor: &mut P, +) -> Result<(), GeozeroError> { + let num_rows = batch.num_rows(); + let geometry_field = schema.field(geometry_column_index); + let geometry_column_box = &batch.columns()[geometry_column_index]; + let geometry_column = from_arrow_array(&geometry_column_box, geometry_field) + .map_err(|err| GeozeroError::Dataset(err.to_string()))?; + + for within_batch_row_idx in 0..num_rows { + processor.feature_begin((within_batch_row_idx + batch_start_idx) as u64)?; + + processor.properties_begin()?; + process_properties( + batch, + schema, + within_batch_row_idx, + geometry_column_index, + processor, + )?; + processor.properties_end()?; + + processor.geometry_begin()?; + process_geometry_n(&geometry_column, within_batch_row_idx, processor)?; + processor.geometry_end()?; + + processor.feature_end((within_batch_row_idx + batch_start_idx) as u64)?; + } + + Ok(()) +} + +fn process_properties( + batch: &RecordBatch, + schema: &Schema, + within_batch_row_idx: usize, + geometry_column_index: usize, + processor: &mut P, +) -> Result<(), GeozeroError> { + // Note: the `column_idx` will be off by one if the geometry column is not the last column in + // the table, so we maintain a separate property index counter + let mut property_idx = 0; + for (column_idx, (field, array)) in schema.fields.iter().zip(batch.columns().iter()).enumerate() + { + // Don't include geometry column in properties + if column_idx == geometry_column_index { + continue; + } + let name = field.name(); + + // Don't pass null properties to geozero + if array.is_null(within_batch_row_idx) { + continue; + } + + match field.data_type() { + DataType::Boolean => { + let arr = array.as_boolean(); + processor.property( + property_idx, + name, + &ColumnValue::Bool(arr.value(within_batch_row_idx)), + )?; + } + DataType::UInt8 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::UByte(arr.value(within_batch_row_idx)), + )?; + } + DataType::Int8 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Byte(arr.value(within_batch_row_idx)), + )?; + } + DataType::UInt16 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::UShort(arr.value(within_batch_row_idx)), + )?; + } + DataType::Int16 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Short(arr.value(within_batch_row_idx)), + )?; + } + DataType::UInt32 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::UInt(arr.value(within_batch_row_idx)), + )?; + } + DataType::Int32 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Int(arr.value(within_batch_row_idx)), + )?; + } + DataType::UInt64 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::ULong(arr.value(within_batch_row_idx)), + )?; + } + DataType::Int64 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Long(arr.value(within_batch_row_idx)), + )?; + } + DataType::Float16 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Float(arr.value(within_batch_row_idx).to_f32()), + )?; + } + DataType::Float32 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Float(arr.value(within_batch_row_idx)), + )?; + } + DataType::Float64 => { + let arr = array.as_primitive::(); + processor.property( + property_idx, + name, + &ColumnValue::Double(arr.value(within_batch_row_idx)), + )?; + } + DataType::Utf8 => { + let arr = array.as_string::(); + processor.property( + property_idx, + name, + &ColumnValue::String(arr.value(within_batch_row_idx)), + )?; + } + DataType::LargeUtf8 => { + let arr = array.as_string::(); + processor.property( + property_idx, + name, + &ColumnValue::String(arr.value(within_batch_row_idx)), + )?; + } + DataType::Binary => { + let arr = array.as_binary::(); + processor.property( + property_idx, + name, + &ColumnValue::Binary(arr.value(within_batch_row_idx)), + )?; + } + DataType::LargeBinary => { + let arr = array.as_binary::(); + processor.property( + property_idx, + name, + &ColumnValue::Binary(arr.value(within_batch_row_idx)), + )?; + } + DataType::Struct(_) + | DataType::List(_) + | DataType::LargeList(_) + | DataType::Map(_, _) => { + // TODO(Perf): refactor so that we don't make a new encoder on every row + let options = Default::default(); + let mut enc = make_encoder(field, array, &options) + .map_err(|err| GeozeroError::Property(err.to_string()))?; + let mut out = vec![]; + enc.encode(within_batch_row_idx, &mut out); + let json_string = String::from_utf8(out) + .map_err(|err| GeozeroError::Property(err.to_string()))?; + processor.property(property_idx, name, &ColumnValue::Json(&json_string))?; + } + DataType::Date32 => { + let arr = array.as_primitive::(); + let datetime = arr.value_as_datetime(within_batch_row_idx).unwrap(); + let dt_str = datetime.and_utc().to_rfc3339(); + processor.property(property_idx, name, &ColumnValue::DateTime(&dt_str))?; + } + DataType::Date64 => { + let arr = array.as_primitive::(); + let datetime = arr.value_as_datetime(within_batch_row_idx).unwrap(); + let dt_str = datetime.and_utc().to_rfc3339(); + processor.property(property_idx, name, &ColumnValue::DateTime(&dt_str))?; + } + DataType::Timestamp(unit, tz) => { + let arrow_tz = if let Some(tz) = tz { + Some(Tz::from_str(tz).map_err(|err| GeozeroError::Property(err.to_string()))?) + } else { + None + }; + + macro_rules! impl_timestamp { + ($arrow_type:ty) => {{ + let arr = array.as_primitive::<$arrow_type>(); + let dt_str = if let Some(arrow_tz) = arrow_tz { + arr.value_as_datetime_with_tz(within_batch_row_idx, arrow_tz) + .unwrap() + .to_rfc3339() + } else { + arr.value_as_datetime(within_batch_row_idx) + .unwrap() + .and_utc() + .to_rfc3339() + }; + processor.property(property_idx, name, &ColumnValue::DateTime(&dt_str))?; + }}; + } + + match unit { + TimeUnit::Microsecond => impl_timestamp!(TimestampMicrosecondType), + TimeUnit::Millisecond => impl_timestamp!(TimestampMillisecondType), + TimeUnit::Nanosecond => impl_timestamp!(TimestampNanosecondType), + TimeUnit::Second => impl_timestamp!(TimestampSecondType), + } + } + dt => { + return Err(GeozeroError::Properties(format!( + "unsupported type: {dt:?}", + ))); + } + } + property_idx += 1; + } + + Ok(()) +} + +fn process_geometry_n( + geometry_column: &Arc, + within_batch_row_idx: usize, + processor: &mut P, +) -> Result<(), GeozeroError> { + let arr = geometry_column.as_ref(); + let i = within_batch_row_idx; + + use GeoArrowType::*; + // TODO: should we be passing the geom_idx down into these process* functions? + match arr.data_type() { + Point(_) => { + let geom = arr.as_point().value(i).unwrap(); + process_point(&geom, 0, processor)?; + } + LineString(_) => { + let geom = arr.as_line_string().value(i).unwrap(); + process_line_string(&geom, 0, processor)?; + } + Polygon(_) => { + let geom = arr.as_polygon().value(i).unwrap(); + process_polygon(&geom, true, 0, processor)?; + } + MultiPoint(_) => { + let geom = arr.as_multi_point().value(i).unwrap(); + process_multi_point(&geom, 0, processor)?; + } + MultiLineString(_) => { + let geom = arr.as_multi_line_string().value(i).unwrap(); + process_multi_line_string(&geom, 0, processor)?; + } + MultiPolygon(_) => { + let geom = arr.as_multi_polygon().value(i).unwrap(); + process_multi_polygon(&geom, 0, processor)?; + } + GeometryCollection(_) => { + let geom = arr.as_geometry_collection().value(i).unwrap(); + process_geometry_collection(&geom, 0, processor)?; + } + Wkb(_) => { + let geom = arr + .as_wkb::() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + LargeWkb(_) => { + let geom = arr + .as_wkb::() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + WkbView(_) => { + let geom = arr + .as_wkb_view() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + Wkt(_) => { + let geom = arr + .as_wkt::() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + LargeWkt(_) => { + let geom = arr + .as_wkt::() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + WktView(_) => { + let geom = arr + .as_wkt_view() + .value(i) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + process_geometry(&geom, 0, processor)?; + } + Rect(_) => { + let geom = arr.as_rect().value(i).unwrap(); + let wrapper = RectWrapper::try_new(&geom) + .map_err(|err| geozero::error::GeozeroError::Geometry(err.to_string()))?; + process_polygon(&wrapper, true, 0, processor)? + } + Geometry(_) => { + let geom = arr.as_geometry().value(i).unwrap(); + process_geometry(&geom, 0, processor)?; + } + } + + Ok(()) +} + +pub(super) fn geometry_columns(schema: &Schema) -> Vec { + let mut geom_indices = vec![]; + for (field_idx, field) in schema.fields().iter().enumerate() { + if let Ok(Some(_)) = GeoArrowType::from_extension_field(field.as_ref()) { + geom_indices.push(field_idx); + } + } + geom_indices +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch_reader.rs b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch_reader.rs new file mode 100644 index 0000000000..5516207f52 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/data_source/record_batch_reader.rs @@ -0,0 +1,81 @@ +use arrow_array::RecordBatchReader; +use geozero::{FeatureProcessor, GeozeroDatasource, error::GeozeroError}; + +use crate::geozero::export::data_source::record_batch::{geometry_columns, process_batch}; + +/// A newtype wrapper around an [`arrow_array::RecordBatchReader`] so that we can implement the +/// [`geozero::GeozeroDatasource`] trait on it. +/// +/// This allows for exporting Arrow data to a geozero-based consumer even when not all of the Arrow +/// data is present in memory at once. +pub struct GeozeroRecordBatchReader(Box); + +impl GeozeroRecordBatchReader { + /// Create a new GeozeroRecordBatchReader from a [`RecordBatchReader`]. + pub fn new(reader: Box) -> Self { + Self(reader) + } + + /// Access the underlying [`RecordBatchReader`]. + pub fn into_inner(self) -> Box { + self.0 + } +} + +impl AsRef> for GeozeroRecordBatchReader { + fn as_ref(&self) -> &Box { + &self.0 + } +} + +impl AsMut> for GeozeroRecordBatchReader { + fn as_mut(&mut self) -> &mut Box { + &mut self.0 + } +} + +impl From> for GeozeroRecordBatchReader { + fn from(value: Box) -> Self { + Self(value) + } +} + +impl From> for GeozeroRecordBatchReader { + fn from(value: Box) -> Self { + Self(value) + } +} + +impl GeozeroDatasource for GeozeroRecordBatchReader { + fn process(&mut self, processor: &mut P) -> Result<(), GeozeroError> { + let reader = self.as_mut(); + let schema = reader.schema(); + let geom_indices = geometry_columns(&schema); + let geometry_column_index = if geom_indices.len() != 1 { + Err(GeozeroError::Dataset( + "Writing through geozero not supported with multiple geometries".to_string(), + ))? + } else { + geom_indices[0] + }; + + processor.dataset_begin(None)?; + + let mut overall_row_idx = 0; + for batch in reader.into_iter() { + let batch = batch.map_err(|err| GeozeroError::Dataset(err.to_string()))?; + process_batch( + &batch, + &schema, + geometry_column_index, + overall_row_idx, + processor, + )?; + overall_row_idx += batch.num_rows(); + } + + processor.dataset_end()?; + + Ok(()) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/export/mod.rs new file mode 100644 index 0000000000..9c06f073b1 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/mod.rs @@ -0,0 +1,7 @@ +//! Implementation to export GeoArrow arrays through the geozero API. + +mod array; +mod data_source; +pub(crate) mod scalar; + +pub use data_source::{GeozeroRecordBatchReader, GeozeroRecordBatchWriter}; diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/coord.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/coord.rs new file mode 100644 index 0000000000..dbc32ad784 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/coord.rs @@ -0,0 +1,47 @@ +use geo_traits::CoordTrait; +use geozero::GeomProcessor; + +pub(crate) fn process_coord( + coord: &impl CoordTrait, + coord_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + use geo_traits::Dimensions; + + match coord.dim() { + Dimensions::Xy | Dimensions::Unknown(2) => processor.xy(coord.x(), coord.y(), coord_idx)?, + Dimensions::Xyz | Dimensions::Unknown(3) => processor.coordinate( + coord.x(), + coord.y(), + Some(unsafe { coord.nth_unchecked(2) }), + None, + None, + None, + coord_idx, + )?, + Dimensions::Xym => processor.coordinate( + coord.x(), + coord.y(), + None, + Some(unsafe { coord.nth_unchecked(2) }), + None, + None, + coord_idx, + )?, + Dimensions::Xyzm | Dimensions::Unknown(4) => processor.coordinate( + coord.x(), + coord.y(), + Some(unsafe { coord.nth_unchecked(2) }), + Some(unsafe { coord.nth_unchecked(3) }), + None, + None, + coord_idx, + )?, + d => { + return Err(geozero::error::GeozeroError::Geometry(format!( + "Unexpected dimension {d:?}", + ))); + } + }; + Ok(()) +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry.rs new file mode 100644 index 0000000000..dbc8b62ec5 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry.rs @@ -0,0 +1,43 @@ +use geo_traits::{GeometryTrait, GeometryType}; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::{ + process_geometry_collection, process_line_string, process_multi_line_string, + process_multi_point, process_multi_polygon, process_point, process_polygon, process_rect, +}; +use crate::{ + builder::geo_trait_wrappers::{LineWrapper, TriangleWrapper}, + scalar::Geometry, +}; + +pub(crate) fn process_geometry( + geom: &impl GeometryTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + use GeometryType::*; + + match geom.as_type() { + Point(g) => process_point(g, geom_idx, processor)?, + LineString(g) => process_line_string(g, geom_idx, processor)?, + Polygon(g) => process_polygon(g, true, geom_idx, processor)?, + MultiPoint(g) => process_multi_point(g, geom_idx, processor)?, + MultiLineString(g) => process_multi_line_string(g, geom_idx, processor)?, + MultiPolygon(g) => process_multi_polygon(g, geom_idx, processor)?, + GeometryCollection(g) => process_geometry_collection(g, geom_idx, processor)?, + Rect(r) => process_rect(r, geom_idx, processor)?, + Triangle(tri) => process_polygon(&TriangleWrapper(tri), true, geom_idx, processor)?, + Line(l) => process_line_string(&LineWrapper(l), geom_idx, processor)?, + }; + + Ok(()) +} + +impl GeozeroGeometry for Geometry<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_geometry(&self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry_collection.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry_collection.rs new file mode 100644 index 0000000000..256e1fe8a4 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/geometry_collection.rs @@ -0,0 +1,29 @@ +use geo_traits::GeometryCollectionTrait; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_geometry; +use crate::scalar::GeometryCollection; + +pub(crate) fn process_geometry_collection( + geom: &impl GeometryCollectionTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.geometrycollection_begin(geom.num_geometries(), geom_idx)?; + + for (i, geometry) in geom.geometries().enumerate() { + process_geometry(&geometry, i, processor)?; + } + + processor.geometrycollection_end(geom_idx)?; + Ok(()) +} + +impl GeozeroGeometry for GeometryCollection<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_geometry_collection(&self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/linestring.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/linestring.rs new file mode 100644 index 0000000000..544d05912c --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/linestring.rs @@ -0,0 +1,29 @@ +use geo_traits::LineStringTrait; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_coord; +use crate::scalar::LineString; + +pub(crate) fn process_line_string( + geom: &impl LineStringTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.linestring_begin(true, geom.num_coords(), geom_idx)?; + + for (coord_idx, coord) in geom.coords().enumerate() { + process_coord(&coord, coord_idx, processor)?; + } + + processor.linestring_end(true, geom_idx)?; + Ok(()) +} + +impl GeozeroGeometry for LineString<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_line_string(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/mod.rs new file mode 100644 index 0000000000..4a13a581f9 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/mod.rs @@ -0,0 +1,21 @@ +mod coord; +mod geometry; +mod geometry_collection; +mod linestring; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod rect; + +pub(crate) use coord::process_coord; +pub(crate) use geometry::process_geometry; +pub(crate) use geometry_collection::process_geometry_collection; +pub(crate) use linestring::process_line_string; +pub(crate) use multilinestring::process_multi_line_string; +pub(crate) use multipoint::process_multi_point; +pub(crate) use multipolygon::process_multi_polygon; +pub(crate) use point::{process_point, process_point_as_coord}; +pub(crate) use polygon::process_polygon; +pub(crate) use rect::process_rect; diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multilinestring.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multilinestring.rs new file mode 100644 index 0000000000..145d485cc8 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multilinestring.rs @@ -0,0 +1,35 @@ +use geo_traits::{LineStringTrait, MultiLineStringTrait}; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_coord; +use crate::scalar::MultiLineString; + +pub(crate) fn process_multi_line_string( + geom: &impl MultiLineStringTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.multilinestring_begin(geom.num_line_strings(), geom_idx)?; + + for (line_idx, line) in geom.line_strings().enumerate() { + processor.linestring_begin(false, line.num_coords(), line_idx)?; + + for (coord_idx, coord) in line.coords().enumerate() { + process_coord(&coord, coord_idx, processor)?; + } + + processor.linestring_end(false, line_idx)?; + } + + processor.multilinestring_end(geom_idx)?; + Ok(()) +} + +impl GeozeroGeometry for MultiLineString<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_multi_line_string(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipoint.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipoint.rs new file mode 100644 index 0000000000..98bb2e8657 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipoint.rs @@ -0,0 +1,29 @@ +use geo_traits::MultiPointTrait; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_point_as_coord; +use crate::scalar::MultiPoint; + +pub(crate) fn process_multi_point( + geom: &impl MultiPointTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.multipoint_begin(geom.num_points(), geom_idx)?; + + for (point_idx, point) in geom.points().enumerate() { + process_point_as_coord(&point, point_idx, processor)?; + } + + processor.multipoint_end(geom_idx)?; + Ok(()) +} + +impl GeozeroGeometry for MultiPoint<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_multi_point(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipolygon.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipolygon.rs new file mode 100644 index 0000000000..a6cdacbe9e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/multipolygon.rs @@ -0,0 +1,29 @@ +use geo_traits::MultiPolygonTrait; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_polygon; +use crate::scalar::MultiPolygon; + +pub(crate) fn process_multi_polygon( + geom: &impl MultiPolygonTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.multipolygon_begin(geom.num_polygons(), geom_idx)?; + + for (polygon_idx, polygon) in geom.polygons().enumerate() { + process_polygon(&polygon, false, polygon_idx, processor)?; + } + + processor.multipolygon_end(geom_idx)?; + Ok(()) +} + +impl GeozeroGeometry for MultiPolygon<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_multi_polygon(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/point.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/point.rs new file mode 100644 index 0000000000..6743691a7b --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/point.rs @@ -0,0 +1,81 @@ +use geo_traits::{CoordTrait, PointTrait}; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::scalar::Point; + +/// Process a [PointTrait] through a [GeomProcessor]. +pub(crate) fn process_point( + geom: &impl PointTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.point_begin(geom_idx)?; + process_point_as_coord(geom, 0, processor)?; + processor.point_end(geom_idx)?; + Ok(()) +} + +/// Note that this does _not_ call `processor.point_begin` and `processor.point_end` because as of +/// geozero v0.12, `point_begin` and `point_end` are **not** called for each point in a +/// MultiPoint +/// +pub(crate) fn process_point_as_coord( + geom: &impl PointTrait, + coord_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + use geo_traits::Dimensions; + + if let Some(coord) = geom.coord() { + match coord.dim() { + Dimensions::Xy | Dimensions::Unknown(2) => { + processor.xy(coord.x(), coord.y(), coord_idx)? + } + Dimensions::Xyz | Dimensions::Unknown(3) => processor.coordinate( + coord.x(), + coord.y(), + Some(unsafe { coord.nth_unchecked(2) }), + None, + None, + None, + coord_idx, + )?, + Dimensions::Xym => processor.coordinate( + coord.x(), + coord.y(), + None, + Some(unsafe { coord.nth_unchecked(2) }), + None, + None, + coord_idx, + )?, + Dimensions::Xyzm | Dimensions::Unknown(4) => processor.coordinate( + coord.x(), + coord.y(), + Some(unsafe { coord.nth_unchecked(2) }), + Some(unsafe { coord.nth_unchecked(3) }), + None, + None, + coord_idx, + )?, + d => { + return Err(geozero::error::GeozeroError::Geometry(format!( + "Unexpected dimension {d:?}", + ))); + } + }; + } else { + processor.empty_point(coord_idx)?; + } + + Ok(()) +} + +impl GeozeroGeometry for Point<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_point(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/polygon.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/polygon.rs new file mode 100644 index 0000000000..3b2020b55c --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/polygon.rs @@ -0,0 +1,50 @@ +use geo_traits::{LineStringTrait, PolygonTrait}; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use super::process_coord; +use crate::scalar::Polygon; + +fn process_ring( + ring: impl LineStringTrait, + ring_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.linestring_begin(false, ring.num_coords(), ring_idx)?; + + for (coord_idx, coord) in ring.coords().enumerate() { + process_coord(&coord, coord_idx, processor)?; + } + + processor.linestring_end(false, ring_idx)?; + Ok(()) +} + +pub(crate) fn process_polygon( + geom: &impl PolygonTrait, + tagged: bool, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + processor.polygon_begin(tagged, geom.num_interiors() + 1, geom_idx)?; + + if let Some(exterior) = geom.exterior() { + process_ring(exterior, 0, processor)?; + } + + for (interior_ring_idx, interior_ring) in geom.interiors().enumerate() { + process_ring(interior_ring, interior_ring_idx + 1, processor)?; + } + + processor.polygon_end(tagged, geom_idx)?; + + Ok(()) +} + +impl GeozeroGeometry for Polygon<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_polygon(self, true, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/export/scalar/rect.rs b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/rect.rs new file mode 100644 index 0000000000..10e612b27b --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/export/scalar/rect.rs @@ -0,0 +1,26 @@ +use geo_traits::RectTrait; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + builder::geo_trait_wrappers::RectWrapper, geozero::export::scalar::process_polygon, + scalar::Rect, +}; + +pub(crate) fn process_rect( + geom: &impl RectTrait, + geom_idx: usize, + processor: &mut P, +) -> geozero::error::Result<()> { + let polygon = RectWrapper::try_new(geom) + .map_err(|err| geozero::error::GeozeroError::Geometry(err.to_string()))?; + process_polygon(&polygon, true, geom_idx, processor) +} + +impl GeozeroGeometry for Rect<'_> { + fn process_geom(&self, processor: &mut P) -> geozero::error::Result<()> + where + Self: Sized, + { + process_rect(self, 0, processor) + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/geometry.rs b/src/geoarrow/geoarrow-array/src/geozero/import/geometry.rs new file mode 100644 index 0000000000..f91077e438 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/geometry.rs @@ -0,0 +1,285 @@ +use std::{fmt::Debug, sync::Arc}; + +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; +use geozero::{GeomProcessor, GeozeroGeometry, error::GeozeroError, geo_types::GeoWriter}; + +use crate::{ + GeoArrowArray, array::GeometryArray, builder::GeometryBuilder, trait_::GeoArrowArrayBuilder, +}; + +/// GeoZero trait to convert to GeoArrow [`GeometryArray`]. +/// +/// **NOTE** only XY dimensions are currently supported here. +/// +/// (This is because the internal GeoWriter only supports XY dimensions.) +pub trait ToGeometryArray { + /// Convert to GeoArrow [`GeometryArray`] + fn to_geometry_array(&self, typ: GeometryType) -> geozero::error::Result { + Ok(self.to_geometry_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow [`GeometryBuilder`] + fn to_geometry_builder(&self, typ: GeometryType) -> geozero::error::Result; +} + +impl ToGeometryArray for T { + fn to_geometry_builder(&self, typ: GeometryType) -> geozero::error::Result { + let mut stream_builder = GeometryStreamBuilder::new(typ); + self.process_geom(&mut stream_builder)?; + Ok(stream_builder.builder) + } +} + +/// A streaming builder for GeoArrow [`GeometryArray`]. +/// +/// This is useful in conjunction with [`geozero`] APIs because its coordinate stream requires the +/// consumer to keep track of which geometry type is currently being added to. +/// +/// This implementation can be complex because we need to connect the push-based stream of the +/// geozero source (coordinate-by-coordinate) with the pull-based (complete geometry) APIs of the +/// [`GeometryBuilder`]. In particular, [`GeometryBuilder`] requires reading from _whole +/// geometries_. +/// +/// This is implemented with an internal [GeoWriter] used to buffer each stream of coordinates. +/// Each incoming geometry is collected into a "current geometry", and then when that geometry's +/// stream is finished, that geometry is propagated on to the [`GeometryBuilder`] and the current +/// geometry is cleared. +/// +/// Note that this has some memory overhead because of the buffering, and it requires copying +/// _once_ from the geozero source into the intermediate [geo_types] object, and then _again_ into +/// the GeoArrow array. +/// +/// In the future we could use a bump allocator to improve memory performance here. +/// +/// Converting an [`GeometryStreamBuilder`] into a [`GeometryArray`] is `O(1)`. +struct GeometryStreamBuilder { + /// The underlying geometry builder. When each geometry is finished, we add the geometry to + /// this builder. + builder: GeometryBuilder, + /// The current geometry being built. [GeoWriter] implements [GeomProcessor]. + current_geometry: GeoWriter, + /// The current nesting level of geometry collections. This is required because geozero + /// represents an array of geometries as a GeometryCollection. But we don't want to try to + /// "finish" the `current_geometry` when this only represents the top-level sequence of + /// geometries we're putting into the array. + /// + /// We should only "finish" the `current_geometry` for _nested_ geometry collections beyond the + /// root level. + geometry_collection_level: usize, +} + +impl GeometryStreamBuilder { + pub fn new(typ: GeometryType) -> Self { + Self { + builder: GeometryBuilder::new(typ), + current_geometry: GeoWriter::new(), + geometry_collection_level: 0, + } + } + + fn push_current_geometry(&mut self) -> geozero::error::Result<()> { + let geom = self + .current_geometry + .take_geometry() + .ok_or(GeozeroError::Geometry("Take geometry failed".to_string()))?; + self.builder + .push_geometry(Some(&geom)) + .map_err(|err| GeozeroError::Geometry(err.to_string()))?; + self.current_geometry = GeoWriter::new(); + Ok(()) + } +} + +impl Debug for GeometryStreamBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.builder.fmt(f) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for GeometryStreamBuilder { + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.xy(x, y, idx) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + self.current_geometry.coordinate(x, y, z, m, t, tm, idx) + } + + fn empty_point(&mut self, idx: usize) -> geozero::error::Result<()> { + // This needs to be separate because GeoWriter doesn't know how to handle empty points + Err(GeozeroError::Geometry( + "Empty points not currently supported in ToGeometryArray.".to_string(), + )) + } + + fn point_begin(&mut self, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.point_begin(idx) + } + + fn point_end(&mut self, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.point_end(idx)?; + self.push_current_geometry() + } + + fn multipoint_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multipoint_begin(size, idx) + } + + fn multipoint_end(&mut self, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multipoint_end(idx)?; + self.push_current_geometry() + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + self.current_geometry.linestring_begin(tagged, size, idx) + } + + fn linestring_end(&mut self, tagged: bool, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.linestring_end(tagged, idx)?; + + // When tagged is true, that means it's a standalone LineString and not part of a + // MultiLineString + if tagged { + self.push_current_geometry()?; + } + Ok(()) + } + + fn multilinestring_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multilinestring_begin(size, idx) + } + + fn multilinestring_end(&mut self, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multilinestring_end(idx)?; + self.push_current_geometry() + } + + fn polygon_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + self.current_geometry.polygon_begin(tagged, size, idx) + } + + fn polygon_end(&mut self, tagged: bool, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.polygon_end(tagged, idx)?; + + // When tagged is true, that means it's a standalone LineString and not part of a + // MultiLineString + if tagged { + self.push_current_geometry()?; + } + + Ok(()) + } + + fn multipolygon_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multipolygon_begin(size, idx) + } + + fn multipolygon_end(&mut self, idx: usize) -> geozero::error::Result<()> { + self.current_geometry.multipolygon_end(idx)?; + self.push_current_geometry() + } + + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + if self.geometry_collection_level > 0 { + self.current_geometry.geometrycollection_begin(size, idx)?; + } + + self.geometry_collection_level += 1; + Ok(()) + } + + fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> { + self.geometry_collection_level -= 1; + + if self.geometry_collection_level > 0 { + self.current_geometry.geometrycollection_end(idx)?; + self.push_current_geometry()?; + } + + Ok(()) + } +} + +impl GeoArrowArrayBuilder for GeometryStreamBuilder { + fn len(&self) -> usize { + self.builder.len() + } + + fn push_null(&mut self) { + self.builder.push_null() + } + + fn push_geometry( + &mut self, + geometry: Option<&impl geo_traits::GeometryTrait>, + ) -> GeoArrowResult<()> { + self.builder.push_geometry(geometry) + } + + fn finish(self) -> Arc { + Arc::new(self.builder.finish()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::{Geometry, GeometryCollection}; +// use geozero::error::Result; + +// use super::*; +// use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon}; + +// fn geoms() -> Vec { +// vec![ +// point::p0().into(), +// point::p1().into(), +// point::p2().into(), +// linestring::ls0().into(), +// linestring::ls1().into(), +// polygon::p0().into(), +// polygon::p1().into(), +// multipoint::mp0().into(), +// multipoint::mp1().into(), +// multilinestring::ml0().into(), +// multilinestring::ml1().into(), +// multipolygon::mp0().into(), +// multipolygon::mp1().into(), +// ] +// } + +// #[test] +// fn from_geo_using_geozero() -> Result<()> { +// let geo_geoms = geoms().into_iter().map(Some).collect::>(); +// let geo = Geometry::GeometryCollection(GeometryCollection(geoms())); +// let typ = GeometryType::new(Default::default()); +// let geo_arr = geo.to_geometry_array(typ.clone()).unwrap(); + +// let geo_arr2 = GeometryBuilder::from_nullable_geometries(&geo_geoms, typ) +// .unwrap() +// .finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/linestring.rs b/src/geoarrow/geoarrow-array/src/geozero/import/linestring.rs new file mode 100644 index 0000000000..9fb9262cb1 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/linestring.rs @@ -0,0 +1,121 @@ +use geoarrow_schema::LineStringType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::LineStringArray, + builder::LineStringBuilder, + capacity::LineStringCapacity, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow LineStringArray. +pub trait ToLineStringArray { + /// Convert to GeoArrow LineStringArray + fn to_line_string_array(&self, typ: LineStringType) -> geozero::error::Result { + Ok(self.to_line_string_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow LineStringBuilder + fn to_line_string_builder( + &self, + typ: LineStringType, + ) -> geozero::error::Result; +} + +impl ToLineStringArray for T { + fn to_line_string_builder( + &self, + typ: LineStringType, + ) -> geozero::error::Result { + let mut mutable_array = LineStringBuilder::new(typ); + self.process_geom(&mut mutable_array)?; + Ok(mutable_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for LineStringBuilder { + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + let capacity = LineStringCapacity::new(0, size); + self.reserve(capacity); + Ok(()) + } + + fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> { + // self.shrink_to_fit() + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xy(x, y).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xyzm(x, y, z, m).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + let capacity = LineStringCapacity::new(size, 0); + self.reserve(capacity); + self.try_push_length(size).unwrap(); + Ok(()) + } + + fn linestring_end(&mut self, tagged: bool, idx: usize) -> geozero::error::Result<()> { + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::{Geometry, LineString}; +// use geoarrow_schema::Dimension; +// use geozero::error::Result; + +// use super::*; +// use crate::test::linestring::{ls0, ls1}; + +// #[test] +// fn from_geo_using_geozero() -> Result<()> { +// let geo_geoms = vec![ls0(), LineString(vec![]), ls1()]; +// let geo = Geometry::GeometryCollection( +// geo_geoms +// .clone() +// .into_iter() +// .map(Geometry::LineString) +// .collect(), +// ); +// let typ = LineStringType::new(Dimension::XY, Default::default()); +// let geo_arr = geo.to_line_string_array(typ.clone()).unwrap(); + +// let geo_arr2 = LineStringBuilder::from_line_strings(&geo_geoms, typ).finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/import/mod.rs new file mode 100644 index 0000000000..31e58b533b --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/mod.rs @@ -0,0 +1,18 @@ +//! Import geozero types into GeoArrow arrays + +mod geometry; +mod linestring; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod util; + +pub use geometry::ToGeometryArray; +pub use linestring::ToLineStringArray; +pub use multilinestring::ToMultiLineStringArray; +pub use multipoint::ToMultiPointArray; +pub use multipolygon::ToMultiPolygonArray; +pub use point::ToPointArray; +pub use polygon::ToPolygonArray; diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/multilinestring.rs b/src/geoarrow/geoarrow-array/src/geozero/import/multilinestring.rs new file mode 100644 index 0000000000..8bd1cbd31f --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/multilinestring.rs @@ -0,0 +1,153 @@ +use geoarrow_schema::MultiLineStringType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::MultiLineStringArray, + builder::MultiLineStringBuilder, + capacity::MultiLineStringCapacity, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow MultiLineStringArray. +pub trait ToMultiLineStringArray { + /// Convert to GeoArrow MultiLineStringArray + fn to_multi_line_string_array( + &self, + typ: MultiLineStringType, + ) -> geozero::error::Result { + Ok(self.to_multi_line_string_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow MultiLineStringBuilder + fn to_multi_line_string_builder( + &self, + typ: MultiLineStringType, + ) -> geozero::error::Result; +} + +impl ToMultiLineStringArray for T { + fn to_multi_line_string_builder( + &self, + typ: MultiLineStringType, + ) -> geozero::error::Result { + let mut mutable_array = MultiLineStringBuilder::new(typ); + self.process_geom(&mut mutable_array)?; + Ok(mutable_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for MultiLineStringBuilder { + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // reserve `size` geometries + let capacity = MultiLineStringCapacity::new(0, 0, size); + self.reserve(capacity); + Ok(()) + } + + fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> { + // self.shrink_to_fit() + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xy(x, y).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xyzm(x, y, z, m).expect("valid coord")) + .unwrap(); + Ok(()) + } + + // Here, size is the number of LineStrings in the MultiLineString + fn multilinestring_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // reserve `size` line strings + let capacity = MultiLineStringCapacity::new(0, size, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_geom_offset(size).unwrap(); + Ok(()) + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + // > An untagged LineString is either a Polygon ring or part of a MultiLineString + // So if tagged, we need to update the geometry offsets array. + if tagged { + // reserve 1 line strings + let capacity = MultiLineStringCapacity::new(0, 1, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_geom_offset(1).unwrap(); + } + + // reserve `size` coordinates + let capacity = MultiLineStringCapacity::new(size, 0, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the geometry offsets in + // polygon_begin + self.try_push_ring_offset(size).unwrap(); + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::Geometry; +// use geoarrow_schema::Dimension; +// use geozero::error::Result; + +// use super::*; +// use crate::test::multilinestring::{ml0, ml1}; + +// #[test] +// fn from_geozero() -> Result<()> { +// let geo_geoms = vec![ml0(), ml1()]; + +// let geo = Geometry::GeometryCollection( +// geo_geoms +// .clone() +// .into_iter() +// .map(Geometry::MultiLineString) +// .collect(), +// ); +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()); +// let geo_arr = geo.to_multi_line_string_array(typ.clone()).unwrap(); + +// let geo_arr2 = MultiLineStringBuilder::from_multi_line_strings(&geo_geoms, typ).finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/multipoint.rs b/src/geoarrow/geoarrow-array/src/geozero/import/multipoint.rs new file mode 100644 index 0000000000..ddf235aad5 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/multipoint.rs @@ -0,0 +1,123 @@ +use geoarrow_schema::MultiPointType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::MultiPointArray, + builder::MultiPointBuilder, + capacity::MultiPointCapacity, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow MultiPointArray. +pub trait ToMultiPointArray { + /// Convert to GeoArrow MultiPointArray + fn to_multi_point_array(&self, typ: MultiPointType) -> geozero::error::Result { + Ok(self.to_multi_point_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow MultiPointBuilder + fn to_multi_point_builder( + &self, + typ: MultiPointType, + ) -> geozero::error::Result; +} + +impl ToMultiPointArray for T { + fn to_multi_point_builder( + &self, + typ: MultiPointType, + ) -> geozero::error::Result { + let mut mutable_array = MultiPointBuilder::new(typ); + self.process_geom(&mut mutable_array)?; + Ok(mutable_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for MultiPointBuilder { + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + let capacity = MultiPointCapacity::new(0, size); + self.reserve(capacity); + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xy(x, y).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xyzm(x, y, z, m).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn point_begin(&mut self, idx: usize) -> geozero::error::Result<()> { + let capacity = MultiPointCapacity::new(1, 0); + self.reserve(capacity); + self.try_push_length(1).unwrap(); + Ok(()) + } + + fn point_end(&mut self, idx: usize) -> geozero::error::Result<()> { + Ok(()) + } + + fn multipoint_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + let capacity = MultiPointCapacity::new(size, 0); + self.reserve(capacity); + self.try_push_length(size).unwrap(); + Ok(()) + } + + fn multipoint_end(&mut self, idx: usize) -> geozero::error::Result<()> { + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::{Geometry, MultiPoint}; +// use geoarrow_schema::Dimension; +// use geozero::error::Result; + +// use super::*; +// use crate::test::multipoint::{mp0, mp1}; + +// #[test] +// fn from_geozero() -> Result<()> { +// let geo_geoms = vec![mp0(), MultiPoint(vec![]), mp1()]; + +// let geo = Geometry::GeometryCollection( +// geo_geoms +// .clone() +// .into_iter() +// .map(Geometry::MultiPoint) +// .collect(), +// ); +// let typ = MultiPointType::new(Dimension::XY, Default::default()); +// let geo_arr = geo.to_multi_point_array(typ.clone()).unwrap(); + +// let geo_arr2 = MultiPointBuilder::from_multi_points(&geo_geoms, typ).finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/multipolygon.rs b/src/geoarrow/geoarrow-array/src/geozero/import/multipolygon.rs new file mode 100644 index 0000000000..ad8d4dcf74 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/multipolygon.rs @@ -0,0 +1,173 @@ +use geoarrow_schema::MultiPolygonType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::MultiPolygonArray, + builder::MultiPolygonBuilder, + capacity::MultiPolygonCapacity, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow MultiPolygonArray. +pub trait ToMultiPolygonArray { + /// Convert to GeoArrow MultiPolygonArray + fn to_multi_polygon_array( + &self, + typ: MultiPolygonType, + ) -> geozero::error::Result; + + /// Convert to a GeoArrow MultiPolygonBuilder + fn to_multi_polygon_builder( + &self, + typ: MultiPolygonType, + ) -> geozero::error::Result; +} + +impl ToMultiPolygonArray for T { + fn to_multi_polygon_array( + &self, + typ: MultiPolygonType, + ) -> geozero::error::Result { + Ok(self.to_multi_polygon_builder(typ)?.finish()) + } + + fn to_multi_polygon_builder( + &self, + typ: MultiPolygonType, + ) -> geozero::error::Result { + let mut mutable_array = MultiPolygonBuilder::new(typ); + self.process_geom(&mut mutable_array)?; + Ok(mutable_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for MultiPolygonBuilder { + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // reserve `size` geometries + let capacity = MultiPolygonCapacity::new(0, 0, 0, size); + self.reserve(capacity); + Ok(()) + } + + fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> { + // self.shrink_to_fit() + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + unsafe { self.push_coord(&from_xy(x, y).expect("valid coord")) }.unwrap(); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in multipoint_begin to ensure + // offset arrays are correct. + unsafe { self.push_coord(&from_xyzm(x, y, z, m).expect("valid coord")) }.unwrap(); + Ok(()) + } + + fn multipolygon_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // reserve `size` polygons + let capacity = MultiPolygonCapacity::new(0, 0, size, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_geom_offset(size).unwrap(); + Ok(()) + } + + fn polygon_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + // > An untagged Polygon is part of a MultiPolygon + if tagged { + // reserve 1 polygon + let capacity = MultiPolygonCapacity::new(0, 0, 1, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_geom_offset(1).unwrap(); + } + + // reserve `size` rings + let capacity = MultiPolygonCapacity::new(0, size, 0, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the geometry offsets in + // polygon_begin + self.try_push_polygon_offset(size).unwrap(); + Ok(()) + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + assert!(!tagged); + + // reserve `size` coordinates + let capacity = MultiPolygonCapacity::new(size, 0, 0, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_ring_offset(size).unwrap(); + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::Geometry; +// use geoarrow_schema::Dimension; +// use geozero::error::Result; + +// use super::*; +// use crate::test::multipolygon::{mp0, mp1}; + +// #[test] +// fn from_geozero() -> Result<()> { +// let geo_geoms = vec![mp0(), mp1()]; + +// let geo = Geometry::GeometryCollection( +// geo_geoms +// .clone() +// .into_iter() +// .map(Geometry::MultiPolygon) +// .collect(), +// ); +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()); +// let geo_arr = geo.to_multi_polygon_array(typ.clone()).unwrap(); + +// let geo_arr2 = MultiPolygonBuilder::from_multi_polygons(&geo_geoms, typ).finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/point.rs b/src/geoarrow/geoarrow-array/src/geozero/import/point.rs new file mode 100644 index 0000000000..a46d3f244a --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/point.rs @@ -0,0 +1,196 @@ +use geoarrow_schema::PointType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::PointArray, + builder::PointBuilder, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow PointArray. +pub trait ToPointArray { + /// Convert to GeoArrow PointArray + fn to_point_array(&self, typ: PointType) -> geozero::error::Result { + Ok(self.to_point_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow PointBuilder + fn to_point_builder(&self, typ: PointType) -> geozero::error::Result; +} + +impl ToPointArray for T { + fn to_point_builder(&self, typ: PointType) -> geozero::error::Result { + let mut mutable_point_array = PointBuilder::new(typ); + self.process_geom(&mut mutable_point_array)?; + Ok(mutable_point_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for PointBuilder { + fn empty_point(&mut self, idx: usize) -> geozero::error::Result<()> { + self.push_empty(); + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, _idx: usize) -> geozero::error::Result<()> { + self.push_coord(from_xy(x, y).as_ref()); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + self.push_coord(from_xyzm(x, y, z, m).as_ref()); + Ok(()) + } + + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + self.reserve_exact(size); + Ok(()) + } + + // Override all other trait _begin methods + fn circularstring_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn compoundcurve_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn tin_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn polygon_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn triangle_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + // fn multicurve_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // Err(geozero::error::GeozeroError::Geometry( + // "Only point geometries allowed".to_string(), + // )) + // } + + fn multipoint_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn curvepolygon_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn multipolygon_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn multisurface_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn multilinestring_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } + + fn polyhedralsurface_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + Err(geozero::error::GeozeroError::Geometry( + "Only point geometries allowed".to_string(), + )) + } +} + +// #[cfg(test)] +// mod test { + +// use geo_types::{Geometry, GeometryCollection}; +// use geoarrow_schema::Dimension; + +// use super::*; +// use crate::{ +// GeoArrowArrayAccessor, +// test::{linestring, point}, +// }; + +// #[test] +// fn from_geozero() { +// let geo = Geometry::GeometryCollection( +// vec![ +// Geometry::Point(point::p0()), +// Geometry::Point(point::p1()), +// Geometry::Point(point::p2()), +// ] +// .into(), +// ); + +// let typ = PointType::new(Dimension::XY, Default::default()); +// let point_array = geo.to_point_array(typ).unwrap(); +// assert_eq!(point_array.value(0).unwrap(), point::p0()); +// assert_eq!(point_array.value(1).unwrap(), point::p1()); +// assert_eq!(point_array.value(2).unwrap(), point::p2()); +// } + +// #[test] +// fn from_geozero_error_multiple_geom_types() { +// let geo = Geometry::GeometryCollection(GeometryCollection(vec![ +// Geometry::Point(point::p0()), +// Geometry::LineString(linestring::ls0()), +// ])); + +// let typ = PointType::new(Dimension::XY, Default::default()); +// let err = ToPointArray::to_point_array(&geo, typ).unwrap_err(); +// assert!(matches!(err, geozero::error::GeozeroError::Geometry(..))); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/polygon.rs b/src/geoarrow/geoarrow-array/src/geozero/import/polygon.rs new file mode 100644 index 0000000000..a5d95fac1f --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/polygon.rs @@ -0,0 +1,135 @@ +use geoarrow_schema::PolygonType; +use geozero::{GeomProcessor, GeozeroGeometry}; + +use crate::{ + array::PolygonArray, + builder::PolygonBuilder, + capacity::PolygonCapacity, + geozero::import::util::{from_xy, from_xyzm}, +}; + +/// GeoZero trait to convert to GeoArrow PolygonArray. +pub trait ToPolygonArray { + /// Convert to GeoArrow PolygonArray + fn to_polygon_array(&self, typ: PolygonType) -> geozero::error::Result { + Ok(self.to_polygon_builder(typ)?.finish()) + } + + /// Convert to a GeoArrow PolygonBuilder + fn to_polygon_builder(&self, typ: PolygonType) -> geozero::error::Result; +} + +impl ToPolygonArray for T { + fn to_polygon_builder(&self, typ: PolygonType) -> geozero::error::Result { + let mut mutable_array = PolygonBuilder::new(typ); + self.process_geom(&mut mutable_array)?; + Ok(mutable_array) + } +} + +#[allow(unused_variables)] +impl GeomProcessor for PolygonBuilder { + fn geometrycollection_begin(&mut self, size: usize, idx: usize) -> geozero::error::Result<()> { + // reserve `size` geometries + let capacity = PolygonCapacity::new(0, 0, size); + self.reserve(capacity); + Ok(()) + } + + fn geometrycollection_end(&mut self, idx: usize) -> geozero::error::Result<()> { + // self.shrink_to_fit() + Ok(()) + } + + fn xy(&mut self, x: f64, y: f64, idx: usize) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in polygon_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xy(x, y).expect("valid coord")) + .unwrap(); + Ok(()) + } + + fn coordinate( + &mut self, + x: f64, + y: f64, + z: Option, + m: Option, + t: Option, + tm: Option, + idx: usize, + ) -> geozero::error::Result<()> { + // # Safety: + // This upholds invariants because we call try_push_length in polygon_begin to ensure + // offset arrays are correct. + self.push_coord(&from_xyzm(x, y, z, m).expect("valid coord")) + .unwrap(); + Ok(()) + } + + // Here, size is the number of rings in the polygon + fn polygon_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + // reserve `size` rings + let capacity = PolygonCapacity::new(0, size, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the ring offsets in + // linestring_begin + self.try_push_geom_offset(size).unwrap(); + Ok(()) + } + + fn linestring_begin( + &mut self, + tagged: bool, + size: usize, + idx: usize, + ) -> geozero::error::Result<()> { + // reserve `size` coordinates + let capacity = PolygonCapacity::new(size, 0, 0); + self.reserve(capacity); + + // # Safety: + // This upholds invariants because we separately update the geometry offsets in + // polygon_begin + self.try_push_ring_offset(size).unwrap(); + Ok(()) + } +} + +// #[cfg(test)] +// mod test { +// use geo_types::Geometry; +// use geoarrow_schema::Dimension; +// use geozero::error::Result; + +// use super::*; +// use crate::test::polygon::{p0, p1}; + +// #[test] +// fn from_geozero() -> Result<()> { +// let geo_geoms = vec![p0(), p1()]; +// let gc = Geometry::GeometryCollection( +// geo_geoms +// .clone() +// .into_iter() +// .map(Geometry::Polygon) +// .collect(), +// ); +// let typ = PolygonType::new(Dimension::XY, Default::default()); +// let geo_arr = gc.to_polygon_array(typ.clone()).unwrap(); + +// let geo_arr2 = PolygonBuilder::from_polygons(&geo_geoms, typ).finish(); + +// // These are constructed with two different code paths +// assert_eq!(geo_arr, geo_arr2); +// Ok(()) +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/geozero/import/util.rs b/src/geoarrow/geoarrow-array/src/geozero/import/util.rs new file mode 100644 index 0000000000..9b4d4ed123 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/import/util.rs @@ -0,0 +1,68 @@ +use geo_traits::CoordTrait; + +/// Construct a coordinate from x and y values. +pub(super) fn from_xy(x: f64, y: f64) -> Option> { + if x.is_finite() && y.is_finite() { + let coord = wkt::types::Coord { + x, + y, + z: None, + m: None, + }; + Some(coord) + } else { + None + } +} + +/// Construct a coordinate from x, y, z, and m values. +pub(super) fn from_xyzm( + x: f64, + y: f64, + z: Option, + m: Option, +) -> Option> { + if let (Some(z), Some(m)) = (z, m) { + if [x, y, z, m].iter().all(|v| v.is_finite()) { + Some(wkt::types::Coord { + x, + y, + z: Some(z), + m: Some(m), + }) + } else { + None + } + } else if let Some(z) = z { + if [x, y, z].iter().all(|v| v.is_finite()) { + Some(wkt::types::Coord { + x, + y, + z: Some(z), + m: None, + }) + } else { + None + } + } else if let Some(m) = m { + if [x, y, m].iter().all(|v| v.is_finite()) { + Some(wkt::types::Coord { + x, + y, + z: None, + m: Some(m), + }) + } else { + None + } + } else if [x, y].iter().all(|v| v.is_finite()) { + Some(wkt::types::Coord { + x, + y, + z: None, + m: None, + }) + } else { + None + } +} diff --git a/src/geoarrow/geoarrow-array/src/geozero/mod.rs b/src/geoarrow/geoarrow-array/src/geozero/mod.rs new file mode 100644 index 0000000000..bc168d172e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/geozero/mod.rs @@ -0,0 +1,5 @@ +//! Implements the geometry and dataset conversion APIs defined by the [`geozero`] +//! crate. + +pub mod export; +pub mod import; diff --git a/src/geoarrow/geoarrow-array/src/lib.rs b/src/geoarrow/geoarrow-array/src/lib.rs new file mode 100644 index 0000000000..2d98388b80 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/lib.rs @@ -0,0 +1,16 @@ +pub mod array; +pub mod builder; +pub mod capacity; +pub mod cast; +mod eq; +#[cfg(feature = "geozero")] +pub mod geozero; +pub mod scalar; +mod trait_; +pub(crate) mod util; +mod wrap_array; + +pub use trait_::{ + GeoArrowArray, GeoArrowArrayAccessor, GeoArrowArrayIterator, GeoArrowArrayReader, IntoArrow, +}; +pub use wrap_array::WrapArray; diff --git a/src/geoarrow/geoarrow-array/src/scalar/coord/combined.rs b/src/geoarrow/geoarrow-array/src/scalar/coord/combined.rs new file mode 100644 index 0000000000..0546432712 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/coord/combined.rs @@ -0,0 +1,109 @@ +use geo_traits::CoordTrait; + +use crate::{ + eq::coord_eq, + scalar::{InterleavedCoord, SeparatedCoord}, +}; + +/// An Arrow equivalent of a Coord +/// +/// This implements [CoordTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub enum Coord<'a> { + /// Separated coordinate + Separated(SeparatedCoord<'a>), + /// Interleaved coordinate + Interleaved(InterleavedCoord<'a>), +} + +impl Coord<'_> { + /// Return `true` if all values in the coordinate are f64::NAN + pub(crate) fn is_nan(&self) -> bool { + match self { + Coord::Separated(c) => c.is_nan(), + Coord::Interleaved(c) => c.is_nan(), + } + } +} + +impl PartialEq for Coord<'_> { + fn eq(&self, other: &Self) -> bool { + coord_eq(self, other) + } +} + +impl PartialEq> for Coord<'_> { + fn eq(&self, other: &InterleavedCoord<'_>) -> bool { + coord_eq(self, other) + } +} + +impl PartialEq> for Coord<'_> { + fn eq(&self, other: &SeparatedCoord<'_>) -> bool { + coord_eq(self, other) + } +} + +impl CoordTrait for Coord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + match self { + Coord::Interleaved(c) => c.dim(), + Coord::Separated(c) => c.dim(), + } + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + match self { + Coord::Interleaved(c) => c.nth_or_panic(n), + Coord::Separated(c) => c.nth_or_panic(n), + } + } + + fn x(&self) -> Self::T { + match self { + Coord::Interleaved(c) => c.x(), + Coord::Separated(c) => c.x(), + } + } + + fn y(&self) -> Self::T { + match self { + Coord::Interleaved(c) => c.y(), + Coord::Separated(c) => c.y(), + } + } +} + +impl CoordTrait for &Coord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + match self { + Coord::Interleaved(c) => c.dim(), + Coord::Separated(c) => c.dim(), + } + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + match self { + Coord::Interleaved(c) => c.nth_or_panic(n), + Coord::Separated(c) => c.nth_or_panic(n), + } + } + + fn x(&self) -> Self::T { + match self { + Coord::Interleaved(c) => c.x(), + Coord::Separated(c) => c.x(), + } + } + + fn y(&self) -> Self::T { + match self { + Coord::Interleaved(c) => c.y(), + Coord::Separated(c) => c.y(), + } + } +} diff --git a/src/geoarrow/geoarrow-array/src/scalar/coord/interleaved.rs b/src/geoarrow/geoarrow-array/src/scalar/coord/interleaved.rs new file mode 100644 index 0000000000..baab5d1816 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/coord/interleaved.rs @@ -0,0 +1,111 @@ +use arrow_buffer::ScalarBuffer; +use geo_traits::CoordTrait; +use geoarrow_schema::Dimension; + +use crate::{eq::coord_eq, scalar::SeparatedCoord}; + +/// An Arrow equivalent of a Coord +/// +/// This implements [CoordTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct InterleavedCoord<'a> { + pub(crate) coords: &'a ScalarBuffer, + pub(crate) i: usize, + pub(crate) dim: Dimension, +} + +impl InterleavedCoord<'_> { + /// Return `true` if all values in the coordinate are f64::NAN + pub(crate) fn is_nan(&self) -> bool { + (0..self.dim.size()).all(|coord_dim| self.nth_or_panic(coord_dim).is_nan()) + } +} + +impl PartialEq for InterleavedCoord<'_> { + fn eq(&self, other: &Self) -> bool { + coord_eq(self, other) + } +} + +impl PartialEq> for InterleavedCoord<'_> { + fn eq(&self, other: &SeparatedCoord<'_>) -> bool { + coord_eq(self, other) + } +} + +impl CoordTrait for InterleavedCoord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + self.dim.into() + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + debug_assert!(n < self.dim.size()); + *self.coords.get(self.i * self.dim.size() + n).unwrap() + } + + fn x(&self) -> Self::T { + *self.coords.get(self.i * self.dim.size()).unwrap() + } + + fn y(&self) -> Self::T { + *self.coords.get(self.i * self.dim.size() + 1).unwrap() + } +} + +impl CoordTrait for &InterleavedCoord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + self.dim.into() + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + debug_assert!(n < self.dim.size()); + *self.coords.get(self.i * self.dim.size() + n).unwrap() + } + + fn x(&self) -> Self::T { + *self.coords.get(self.i * self.dim.size()).unwrap() + } + + fn y(&self) -> Self::T { + *self.coords.get(self.i * self.dim.size() + 1).unwrap() + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::Dimension; + +// use crate::array::{InterleavedCoordBuffer, SeparatedCoordBuffer}; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let coords1 = vec![0., 3., 1., 4., 2., 5.]; +// let buf1 = InterleavedCoordBuffer::new(coords1.into(), Dimension::XY); +// let coord1 = buf1.value(0); + +// let coords2 = vec![0., 3., 100., 400., 200., 500.]; +// let buf2 = InterleavedCoordBuffer::new(coords2.into(), Dimension::XY); +// let coord2 = buf2.value(0); + +// assert_eq!(coord1, coord2); +// } + +// #[test] +// fn test_eq_against_separated_coord() { +// let coords1 = vec![0., 3., 1., 4., 2., 5.]; +// let buf1 = InterleavedCoordBuffer::new(coords1.into(), Dimension::XY); +// let coord1 = buf1.value(0); + +// let x = vec![0.]; +// let y = vec![3.]; +// let buf2 = SeparatedCoordBuffer::from_vec(vec![x.into(), y.into()], Dimension::XY).unwrap(); +// let coord2 = buf2.value(0); + +// assert_eq!(coord1, coord2); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/coord/mod.rs b/src/geoarrow/geoarrow-array/src/scalar/coord/mod.rs new file mode 100644 index 0000000000..fc7541973e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/coord/mod.rs @@ -0,0 +1,13 @@ +//! Contains implementations for how to encode arrays of coordinates for all other geometry array +//! types. +//! +//! Coordinates can be either _interleaved_, where they're represented as a `FixedSizeList`, or +//! _separated_, where they're represented with a `StructArray`. + +mod combined; +mod interleaved; +mod separated; + +pub use combined::Coord; +pub use interleaved::InterleavedCoord; +pub use separated::SeparatedCoord; diff --git a/src/geoarrow/geoarrow-array/src/scalar/coord/separated.rs b/src/geoarrow/geoarrow-array/src/scalar/coord/separated.rs new file mode 100644 index 0000000000..8392ab8fd5 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/coord/separated.rs @@ -0,0 +1,114 @@ +use arrow_buffer::ScalarBuffer; +use geo_traits::CoordTrait; +use geoarrow_schema::Dimension; + +use crate::{eq::coord_eq, scalar::InterleavedCoord}; + +/// An Arrow equivalent of a Coord +/// +/// This implements [CoordTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct SeparatedCoord<'a> { + pub(crate) buffers: &'a [ScalarBuffer; 4], + pub(crate) i: usize, + pub(crate) dim: Dimension, +} + +impl SeparatedCoord<'_> { + /// Return `true` if all values in the coordinate are f64::NAN + pub(crate) fn is_nan(&self) -> bool { + (0..self.dim.size()).all(|coord_dim| self.nth_or_panic(coord_dim).is_nan()) + } +} + +impl PartialEq for SeparatedCoord<'_> { + fn eq(&self, other: &SeparatedCoord) -> bool { + coord_eq(self, other) + } +} + +impl PartialEq> for SeparatedCoord<'_> { + fn eq(&self, other: &InterleavedCoord) -> bool { + coord_eq(self, other) + } +} + +impl CoordTrait for SeparatedCoord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + self.dim.into() + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + self.buffers[n][self.i] + } + + fn x(&self) -> Self::T { + self.buffers[0][self.i] + } + + fn y(&self) -> Self::T { + self.buffers[1][self.i] + } +} + +impl CoordTrait for &SeparatedCoord<'_> { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + self.dim.into() + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + self.buffers[n][self.i] + } + + fn x(&self) -> Self::T { + self.buffers[0][self.i] + } + + fn y(&self) -> Self::T { + self.buffers[1][self.i] + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::Dimension; + +// use crate::array::{InterleavedCoordBuffer, SeparatedCoordBuffer}; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; +// let buf1 = +// SeparatedCoordBuffer::from_vec(vec![x1.into(), y1.into()], Dimension::XY).unwrap(); +// let coord1 = buf1.value(0); + +// let x2 = vec![0., 100., 2.]; +// let y2 = vec![3., 400., 5.]; +// let buf2 = +// SeparatedCoordBuffer::from_vec(vec![x2.into(), y2.into()], Dimension::XY).unwrap(); +// let coord2 = buf2.value(0); + +// assert_eq!(coord1, coord2); +// } + +// #[test] +// fn test_eq_against_interleaved_coord() { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; +// let buf1 = +// SeparatedCoordBuffer::from_vec(vec![x1.into(), y1.into()], Dimension::XY).unwrap(); +// let coord1 = buf1.value(0); + +// let coords2 = vec![0., 3., 1., 4., 2., 5.]; +// let buf2 = InterleavedCoordBuffer::new(coords2.into(), Dimension::XY); +// let coord2 = buf2.value(0); + +// assert_eq!(coord1, coord2); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/geometry.rs b/src/geoarrow/geoarrow-array/src/scalar/geometry.rs new file mode 100644 index 0000000000..f100058a65 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/geometry.rs @@ -0,0 +1,200 @@ +use geo_traits::{GeometryTrait, GeometryType, UnimplementedLine, UnimplementedTriangle}; + +use crate::{eq::geometry_eq, scalar::*}; + +/// An Arrow equivalent of a Geometry +/// +/// This implements [GeometryTrait], which you can use to extract data. +#[derive(Debug)] +pub enum Geometry<'a> { + /// Point geometry + Point(crate::scalar::Point<'a>), + /// LineString geometry + LineString(crate::scalar::LineString<'a>), + /// Polygon geometry + Polygon(crate::scalar::Polygon<'a>), + /// MultiPoint geometry + MultiPoint(crate::scalar::MultiPoint<'a>), + /// MultiLineString geometry + MultiLineString(crate::scalar::MultiLineString<'a>), + /// MultiPolygon geometry + MultiPolygon(crate::scalar::MultiPolygon<'a>), + /// GeometryCollection geometry + GeometryCollection(crate::scalar::GeometryCollection<'a>), + /// Rect geometry + Rect(crate::scalar::Rect<'a>), +} + +impl GeometryTrait for Geometry<'_> { + type T = f64; + type PointType<'b> + = Point<'b> + where + Self: 'b; + type LineStringType<'b> + = LineString<'b> + where + Self: 'b; + type PolygonType<'b> + = Polygon<'b> + where + Self: 'b; + type MultiPointType<'b> + = MultiPoint<'b> + where + Self: 'b; + type MultiLineStringType<'b> + = MultiLineString<'b> + where + Self: 'b; + type MultiPolygonType<'b> + = MultiPolygon<'b> + where + Self: 'b; + type GeometryCollectionType<'b> + = GeometryCollection<'b> + where + Self: 'b; + type RectType<'b> + = Rect<'b> + where + Self: 'b; + type LineType<'b> + = UnimplementedLine + where + Self: 'b; + type TriangleType<'b> + = UnimplementedTriangle + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + match self { + Geometry::Point(p) => p.dim(), + Geometry::LineString(p) => p.dim(), + Geometry::Polygon(p) => p.dim(), + Geometry::MultiPoint(p) => p.dim(), + Geometry::MultiLineString(p) => p.dim(), + Geometry::MultiPolygon(p) => p.dim(), + Geometry::GeometryCollection(p) => p.dim(), + Geometry::Rect(p) => p.dim(), + } + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + match self { + Geometry::Point(p) => GeometryType::Point(p), + Geometry::LineString(p) => GeometryType::LineString(p), + Geometry::Polygon(p) => GeometryType::Polygon(p), + Geometry::MultiPoint(p) => GeometryType::MultiPoint(p), + Geometry::MultiLineString(p) => GeometryType::MultiLineString(p), + Geometry::MultiPolygon(p) => GeometryType::MultiPolygon(p), + Geometry::GeometryCollection(p) => GeometryType::GeometryCollection(p), + Geometry::Rect(p) => GeometryType::Rect(p), + } + } +} + +impl<'a> GeometryTrait for &'a Geometry<'a> { + type T = f64; + type PointType<'b> + = Point<'b> + where + Self: 'b; + type LineStringType<'b> + = LineString<'b> + where + Self: 'b; + type PolygonType<'b> + = Polygon<'b> + where + Self: 'b; + type MultiPointType<'b> + = MultiPoint<'b> + where + Self: 'b; + type MultiLineStringType<'b> + = MultiLineString<'b> + where + Self: 'b; + type MultiPolygonType<'b> + = MultiPolygon<'b> + where + Self: 'b; + type GeometryCollectionType<'b> + = GeometryCollection<'b> + where + Self: 'b; + type RectType<'b> + = Rect<'b> + where + Self: 'b; + type LineType<'b> + = UnimplementedLine + where + Self: 'b; + type TriangleType<'b> + = UnimplementedTriangle + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + match self { + Geometry::Point(p) => p.dim(), + Geometry::LineString(p) => p.dim(), + Geometry::Polygon(p) => p.dim(), + Geometry::MultiPoint(p) => p.dim(), + Geometry::MultiLineString(p) => p.dim(), + Geometry::MultiPolygon(p) => p.dim(), + Geometry::GeometryCollection(p) => p.dim(), + Geometry::Rect(p) => p.dim(), + } + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + match self { + Geometry::Point(p) => GeometryType::Point(p), + Geometry::LineString(p) => GeometryType::LineString(p), + Geometry::Polygon(p) => GeometryType::Polygon(p), + Geometry::MultiPoint(p) => GeometryType::MultiPoint(p), + Geometry::MultiLineString(p) => GeometryType::MultiLineString(p), + Geometry::MultiPolygon(p) => GeometryType::MultiPolygon(p), + Geometry::GeometryCollection(p) => GeometryType::GeometryCollection(p), + Geometry::Rect(p) => GeometryType::Rect(p), + } + } +} + +impl> PartialEq for Geometry<'_> { + fn eq(&self, other: &G) -> bool { + geometry_eq(self, other) + } +} diff --git a/src/geoarrow/geoarrow-array/src/scalar/geometrycollection.rs b/src/geoarrow/geoarrow-array/src/scalar/geometrycollection.rs new file mode 100644 index 0000000000..d24d263f50 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/geometrycollection.rs @@ -0,0 +1,105 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::GeometryCollectionTrait; +use geoarrow_schema::Dimension; + +use crate::{ + array::MixedGeometryArray, eq::geometry_collection_eq, scalar::Geometry, + util::OffsetBufferUtils, +}; + +/// An Arrow equivalent of a GeometryCollection +/// +/// This implements [GeometryCollectionTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct GeometryCollection<'a> { + pub(crate) array: &'a MixedGeometryArray, + + /// Offsets into the geometry array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> GeometryCollection<'a> { + pub(crate) fn new( + array: &'a MixedGeometryArray, + geom_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + array, + geom_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.array.dim + } +} + +impl<'a> GeometryCollectionTrait for GeometryCollection<'a> { + type GeometryType<'b> + = Geometry<'a> + where + Self: 'b; + + fn num_geometries(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn geometry_unchecked(&self, i: usize) -> Self::GeometryType<'_> { + self.array.value(self.start_offset + i) + } +} + +impl<'a> GeometryCollectionTrait for &'a GeometryCollection<'a> { + type GeometryType<'b> + = Geometry<'a> + where + Self: 'b; + + fn num_geometries(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn geometry_unchecked(&self, i: usize) -> Self::GeometryType<'_> { + self.array.value(self.start_offset + i) + } +} + +impl> PartialEq for GeometryCollection<'_> { + fn eq(&self, other: &G) -> bool { + geometry_collection_eq(self, other) + } +} + +// #[cfg(test)] +// mod tests { +// use arrow_buffer::OffsetBufferBuilder; + +// use crate::array::PointArray; + +// use super::*; + +// #[test] +// fn stack_overflow_repro_issue_979() { +// let orig_point = geo::point!(x: 0., y: 0.); +// let array: MixedGeometryArray = +// PointArray::from((vec![orig_point].as_slice(), Dimension::XY)).into(); +// let mut offsets = OffsetBufferBuilder::new(1); +// offsets.push_length(1); +// let offsets = offsets.finish(); +// let gc = GeometryCollection::new(&array, &offsets, 0); + +// let out: geo::GeometryCollection = gc.into(); +// assert_eq!(out.0.len(), 1, "should be one point"); +// assert_eq!(out.0[0], geo::Geometry::Point(orig_point)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/linestring.rs b/src/geoarrow/geoarrow-array/src/scalar/linestring.rs new file mode 100644 index 0000000000..770aef4b8f --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/linestring.rs @@ -0,0 +1,78 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::LineStringTrait; +use geoarrow_schema::Dimension; + +use crate::{array::CoordBuffer, eq::line_string_eq, scalar::Coord, util::OffsetBufferUtils}; + +/// An Arrow equivalent of a LineString +/// +/// This implements [LineStringTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct LineString<'a> { + pub(crate) coords: &'a CoordBuffer, + + /// Offsets into the coordinate array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> LineString<'a> { + pub(crate) fn new( + coords: &'a CoordBuffer, + geom_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + coords, + geom_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> LineStringTrait for LineString<'a> { + type CoordType<'b> + = Coord<'a> + where + Self: 'b; + + fn num_coords(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn coord_unchecked(&self, i: usize) -> Self::CoordType<'_> { + self.coords.value(self.start_offset + i) + } +} + +impl<'a> LineStringTrait for &'a LineString<'a> { + type CoordType<'b> + = Coord<'a> + where + Self: 'b; + + fn num_coords(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn coord_unchecked(&self, i: usize) -> Self::CoordType<'_> { + self.coords.value(self.start_offset + i) + } +} + +impl> PartialEq for LineString<'_> { + fn eq(&self, other: &G) -> bool { + line_string_eq(self, other) + } +} diff --git a/src/geoarrow/geoarrow-array/src/scalar/mod.rs b/src/geoarrow/geoarrow-array/src/scalar/mod.rs new file mode 100644 index 0000000000..4bd15eb7d1 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/mod.rs @@ -0,0 +1,51 @@ +//! Scalar references onto a parent GeoArrow array. +//! +//! For all "native" GeoArrow scalar types, (all types defined in this module) it is `O(1)` and +//! allocation-free for any coordinate access. +//! +//! For "serialized" scalars emitted from the [`GenericWkbArray`][crate::array::GenericWkbArray], +//! [`WkbViewArray`][crate::array::WkbViewArray], +//! [`GenericWktArray`][crate::array::GenericWktArray], and +//! [`WktViewArray`][crate::array::WktViewArray], there is an initial parsing step when accessing +//! the scalar from the [`GeoArrowArrayAccessor`][crate::GeoArrowArrayAccessor] trait. +//! +//! All scalars implement [`geo_traits`]. You can iterate through geometry parts directly using the +//! APIs exposed by [`geo_traits`]. Or, for simplicity at the cost of a memory copy, you can use +//! the traits defined in [`geo_traits::to_geo`] to convert these scalars to [`geo_types`] objects +//! (though keep in mind ). +//! +//! ## Converting to [`geo_types`] +//! +//! You can convert these scalars to [`geo_types`] objects using the [`geo_traits::to_geo`] traits. +//! +//! There are a couple drawbacks: +//! +//! - `geo_types` only supports 2D geometries. Any other dimensions will be dropped. +//! - `geo_types` doesn't support empty points. This is why both +//! [`ToGeoGeometry::to_geometry`][geo_traits::to_geo::ToGeoGeometry::to_geometry] and +//! [`ToGeoGeometry::try_to_geometry`][geo_traits::to_geo::ToGeoGeometry::try_to_geometry] exist. +//! The former will panic on any empty points. +//! + +mod coord; +mod geometry; +mod geometrycollection; +mod linestring; +mod multilinestring; +mod multipoint; +mod multipolygon; +mod point; +mod polygon; +mod rect; +mod specialization; + +pub use coord::{Coord, InterleavedCoord, SeparatedCoord}; +pub use geometry::Geometry; +pub use geometrycollection::GeometryCollection; +pub use linestring::LineString; +pub use multilinestring::MultiLineString; +pub use multipoint::MultiPoint; +pub use multipolygon::MultiPolygon; +pub use point::Point; +pub use polygon::Polygon; +pub use rect::Rect; diff --git a/src/geoarrow/geoarrow-array/src/scalar/multilinestring.rs b/src/geoarrow/geoarrow-array/src/scalar/multilinestring.rs new file mode 100644 index 0000000000..e7244e8b1e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/multilinestring.rs @@ -0,0 +1,114 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::MultiLineStringTrait; +use geoarrow_schema::Dimension; + +use crate::{ + array::CoordBuffer, eq::multi_line_string_eq, scalar::LineString, util::OffsetBufferUtils, +}; + +/// An Arrow equivalent of a MultiLineString +/// +/// This implements [MultiLineStringTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct MultiLineString<'a> { + pub(crate) coords: &'a CoordBuffer, + + /// Offsets into the ring array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> MultiLineString<'a> { + pub(crate) fn new( + coords: &'a CoordBuffer, + geom_offsets: &'a OffsetBuffer, + ring_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + coords, + geom_offsets, + ring_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> MultiLineStringTrait for MultiLineString<'a> { + type InnerLineStringType<'b> + = LineString<'a> + where + Self: 'b; + + fn num_line_strings(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn line_string_unchecked(&self, i: usize) -> Self::InnerLineStringType<'_> { + LineString::new(self.coords, self.ring_offsets, self.start_offset + i) + } +} + +impl<'a> MultiLineStringTrait for &'a MultiLineString<'a> { + type InnerLineStringType<'b> + = LineString<'a> + where + Self: 'b; + + fn num_line_strings(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn line_string_unchecked(&self, i: usize) -> Self::InnerLineStringType<'_> { + LineString::new(self.coords, self.ring_offsets, self.start_offset + i) + } +} + +impl> PartialEq for MultiLineString<'_> { + fn eq(&self, other: &G) -> bool { + multi_line_string_eq(self, other) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_schema::{Dimension, MultiLineStringType}; + +// use crate::{ +// builder::MultiLineStringBuilder, +// test::multilinestring::{ml0, ml1}, +// trait_::GeoArrowArrayAccessor, +// }; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()); + +// let arr1 = MultiLineStringBuilder::from_multi_line_strings( +// vec![ml0(), ml1()].as_slice(), +// typ.clone(), +// ) +// .finish(); +// let arr2 = +// MultiLineStringBuilder::from_multi_line_strings(vec![ml0(), ml0()].as_slice(), typ) +// .finish(); + +// assert_eq!(arr1.value(0).unwrap(), arr2.value(0).unwrap()); +// assert_ne!(arr1.value(1).unwrap(), arr2.value(1).unwrap()); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/multipoint.rs b/src/geoarrow/geoarrow-array/src/scalar/multipoint.rs new file mode 100644 index 0000000000..4052d45ac4 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/multipoint.rs @@ -0,0 +1,97 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::MultiPointTrait; +use geoarrow_schema::Dimension; + +use crate::{array::CoordBuffer, eq::multi_point_eq, scalar::Point, util::OffsetBufferUtils}; + +/// An Arrow equivalent of a MultiPoint +/// +/// This implements [MultiPointTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct MultiPoint<'a> { + /// Buffer of coordinates + pub(crate) coords: &'a CoordBuffer, + + /// Offsets into the coordinate array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> MultiPoint<'a> { + pub(crate) fn new( + coords: &'a CoordBuffer, + geom_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + coords, + geom_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> MultiPointTrait for MultiPoint<'a> { + type InnerPointType<'b> + = Point<'a> + where + Self: 'b; + + fn num_points(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn point_unchecked(&self, i: usize) -> Self::InnerPointType<'_> { + Point::new(self.coords, self.start_offset + i) + } +} + +impl<'a> MultiPointTrait for &'a MultiPoint<'a> { + type InnerPointType<'b> + = Point<'a> + where + Self: 'b; + + fn num_points(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn point_unchecked(&self, i: usize) -> Self::InnerPointType<'_> { + Point::new(self.coords, self.start_offset + i) + } +} + +impl> PartialEq for MultiPoint<'_> { + fn eq(&self, other: &G) -> bool { + multi_point_eq(self, other) + } +} + +// #[cfg(test)] +// mod test { +// use crate::array::MultiPointArray; +// use crate::test::multipoint::{mp0, mp1}; +// use crate::trait_::ArrayAccessor; +// use geoarrow_schema::Dimension; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let arr1: MultiPointArray = (vec![mp0(), mp1()].as_slice(), Dimension::XY).into(); +// let arr2: MultiPointArray = (vec![mp0(), mp0()].as_slice(), Dimension::XY).into(); + +// assert_eq!(arr1.value(0), arr2.value(0)); +// assert_ne!(arr1.value(1), arr2.value(1)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/multipolygon.rs b/src/geoarrow/geoarrow-array/src/scalar/multipolygon.rs new file mode 100644 index 0000000000..bcb8ea2897 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/multipolygon.rs @@ -0,0 +1,116 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::MultiPolygonTrait; +use geoarrow_schema::Dimension; + +use crate::{array::CoordBuffer, eq::multi_polygon_eq, scalar::Polygon, util::OffsetBufferUtils}; + +/// An Arrow equivalent of a MultiPolygon +/// +/// This implements [MultiPolygonTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct MultiPolygon<'a> { + pub(crate) coords: &'a CoordBuffer, + + /// Offsets into the polygon array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + /// Offsets into the ring array where each polygon starts + pub(crate) polygon_offsets: &'a OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> MultiPolygon<'a> { + pub(crate) fn new( + coords: &'a CoordBuffer, + geom_offsets: &'a OffsetBuffer, + polygon_offsets: &'a OffsetBuffer, + ring_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + coords, + geom_offsets, + polygon_offsets, + ring_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> MultiPolygonTrait for MultiPolygon<'a> { + type InnerPolygonType<'b> + = Polygon<'a> + where + Self: 'b; + + fn num_polygons(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn polygon_unchecked(&self, i: usize) -> Self::InnerPolygonType<'_> { + Polygon::new( + self.coords, + self.polygon_offsets, + self.ring_offsets, + self.start_offset + i, + ) + } +} + +impl<'a> MultiPolygonTrait for &'a MultiPolygon<'a> { + type InnerPolygonType<'b> + = Polygon<'a> + where + Self: 'b; + + fn num_polygons(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + end - start + } + + unsafe fn polygon_unchecked(&self, i: usize) -> Self::InnerPolygonType<'_> { + Polygon::new( + self.coords, + self.polygon_offsets, + self.ring_offsets, + self.start_offset + i, + ) + } +} + +impl> PartialEq for MultiPolygon<'_> { + fn eq(&self, other: &G) -> bool { + multi_polygon_eq(self, other) + } +} + +// #[cfg(test)] +// mod test { +// use crate::array::MultiPolygonArray; +// use crate::test::multipolygon::{mp0, mp1}; +// use crate::trait_::ArrayAccessor; +// use geoarrow_schema::Dimension; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let arr1: MultiPolygonArray = (vec![mp0(), mp1()].as_slice(), Dimension::XY).into(); +// let arr2: MultiPolygonArray = (vec![mp0(), mp0()].as_slice(), Dimension::XY).into(); + +// assert_eq!(arr1.value(0), arr2.value(0)); +// assert_ne!(arr1.value(1), arr2.value(1)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/point.rs b/src/geoarrow/geoarrow-array/src/scalar/point.rs new file mode 100644 index 0000000000..d35143ef8e --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/point.rs @@ -0,0 +1,75 @@ +use geo_traits::PointTrait; +use geoarrow_schema::Dimension; + +use crate::{array::CoordBuffer, eq::point_eq, scalar::Coord}; + +/// An Arrow equivalent of a Point +/// +/// This implements [PointTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct Point<'a> { + coords: &'a CoordBuffer, + geom_index: usize, +} + +impl<'a> Point<'a> { + pub(crate) fn new(coords: &'a CoordBuffer, geom_index: usize) -> Self { + Point { coords, geom_index } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> PointTrait for Point<'a> { + type CoordType<'b> + = Coord<'a> + where + Self: 'b; + + fn coord(&self) -> Option> { + let coord = self.coords.value(self.geom_index); + if coord.is_nan() { None } else { Some(coord) } + } +} + +impl<'a> PointTrait for &Point<'a> { + type CoordType<'b> + = Coord<'a> + where + Self: 'b; + + fn coord(&self) -> Option> { + let coord = self.coords.value(self.geom_index); + if coord.is_nan() { None } else { Some(coord) } + } +} + +impl> PartialEq for Point<'_> { + fn eq(&self, other: &G) -> bool { + point_eq(self, other) + } +} + +// #[cfg(test)] +// mod test { +// use crate::array::{CoordBuffer, PointArray}; +// use crate::trait_::ArrayAccessor; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_eq_other_index_false() { +// let x1 = vec![0., 1., 2.]; +// let y1 = vec![3., 4., 5.]; +// let buf1 = CoordBuffer::Separated((x1, y1).try_into().unwrap()); +// let arr1 = PointArray::new(buf1, None, Default::default()); + +// let x2 = vec![0., 100., 2.]; +// let y2 = vec![3., 400., 5.]; +// let buf2 = CoordBuffer::Separated((x2, y2).try_into().unwrap()); +// let arr2 = PointArray::new(buf2, None, Default::default()); + +// assert_eq!(arr1.value(0), arr2.value(0)); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/polygon.rs b/src/geoarrow/geoarrow-array/src/scalar/polygon.rs new file mode 100644 index 0000000000..1a6a3240d3 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/polygon.rs @@ -0,0 +1,124 @@ +use arrow_buffer::OffsetBuffer; +use geo_traits::PolygonTrait; +use geoarrow_schema::Dimension; + +use crate::{array::CoordBuffer, eq::polygon_eq, scalar::LineString, util::OffsetBufferUtils}; + +/// An Arrow equivalent of a Polygon +/// +/// This implements [PolygonTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct Polygon<'a> { + pub(crate) coords: &'a CoordBuffer, + + /// Offsets into the ring array where each geometry starts + pub(crate) geom_offsets: &'a OffsetBuffer, + + /// Offsets into the coordinate array where each ring starts + pub(crate) ring_offsets: &'a OffsetBuffer, + + pub(crate) geom_index: usize, + + start_offset: usize, +} + +impl<'a> Polygon<'a> { + pub(crate) fn new( + coords: &'a CoordBuffer, + geom_offsets: &'a OffsetBuffer, + ring_offsets: &'a OffsetBuffer, + geom_index: usize, + ) -> Self { + let (start_offset, _) = geom_offsets.start_end(geom_index); + Self { + coords, + geom_offsets, + ring_offsets, + geom_index, + start_offset, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.coords.dim() + } +} + +impl<'a> PolygonTrait for Polygon<'a> { + type RingType<'b> + = LineString<'a> + where + Self: 'b; + + fn exterior(&self) -> Option> { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + if start == end { + None + } else { + Some(LineString::new(self.coords, self.ring_offsets, start)) + } + } + + fn num_interiors(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + // Note: we need to use saturating_sub in the case of an empty polygon, where start == end + (end - start).saturating_sub(1) + } + + unsafe fn interior_unchecked(&self, i: usize) -> Self::RingType<'_> { + LineString::new(self.coords, self.ring_offsets, self.start_offset + 1 + i) + } +} + +impl<'a> PolygonTrait for &'a Polygon<'a> { + type RingType<'b> + = LineString<'a> + where + Self: 'b; + + fn exterior(&self) -> Option> { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + if start == end { + None + } else { + Some(LineString::new(self.coords, self.ring_offsets, start)) + } + } + + fn num_interiors(&self) -> usize { + let (start, end) = self.geom_offsets.start_end(self.geom_index); + // Note: we need to use saturating_sub in the case of an empty polygon, where start == end + (end - start).saturating_sub(1) + } + + unsafe fn interior_unchecked(&self, i: usize) -> Self::RingType<'_> { + LineString::new(self.coords, self.ring_offsets, self.start_offset + 1 + i) + } +} + +impl> PartialEq for Polygon<'_> { + fn eq(&self, other: &G) -> bool { + polygon_eq(self, other) + } +} + +// #[cfg(test)] +// mod test { +// use geo::HasDimensions; +// use geo_traits::to_geo::ToGeoPolygon; +// use geoarrow_schema::{Dimension, PolygonType}; +// use wkt::wkt; + +// use crate::{GeoArrowArrayAccessor, builder::PolygonBuilder}; + +// /// Test Eq where the current index is true but another index is false +// #[test] +// fn test_access_empty_polygon() { +// let empty_polygon: wkt::types::Polygon = wkt! { POLYGON EMPTY }; +// let typ = PolygonType::new(Dimension::XY, Default::default()); +// let polygon_array = PolygonBuilder::from_polygons(&[empty_polygon], typ).finish(); + +// let geo_polygon = polygon_array.value(0).unwrap().to_polygon(); +// assert!(geo_polygon.is_empty()); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/scalar/rect.rs b/src/geoarrow/geoarrow-array/src/scalar/rect.rs new file mode 100644 index 0000000000..6a64784d90 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/rect.rs @@ -0,0 +1,68 @@ +use geo_traits::RectTrait; +use geoarrow_schema::Dimension; + +use crate::{array::SeparatedCoordBuffer, eq::rect_eq, scalar::SeparatedCoord}; + +/// An Arrow equivalent of a Rect +/// +/// This implements [RectTrait], which you can use to extract data. +#[derive(Debug, Clone)] +pub struct Rect<'a> { + lower: &'a SeparatedCoordBuffer, + upper: &'a SeparatedCoordBuffer, + pub(crate) geom_index: usize, +} + +impl<'a> Rect<'a> { + pub(crate) fn new( + lower: &'a SeparatedCoordBuffer, + upper: &'a SeparatedCoordBuffer, + geom_index: usize, + ) -> Self { + Self { + lower, + upper, + geom_index, + } + } + + pub(crate) fn native_dim(&self) -> Dimension { + self.lower.dim + } +} + +impl<'a> RectTrait for Rect<'a> { + type CoordType<'b> + = SeparatedCoord<'a> + where + Self: 'b; + + fn min(&self) -> Self::CoordType<'_> { + self.lower.value(self.geom_index) + } + + fn max(&self) -> Self::CoordType<'_> { + self.upper.value(self.geom_index) + } +} + +impl<'a> RectTrait for &Rect<'a> { + type CoordType<'b> + = SeparatedCoord<'a> + where + Self: 'b; + + fn min(&self) -> Self::CoordType<'_> { + self.lower.value(self.geom_index) + } + + fn max(&self) -> Self::CoordType<'_> { + self.upper.value(self.geom_index) + } +} + +impl> PartialEq for Rect<'_> { + fn eq(&self, other: &G) -> bool { + rect_eq(self, other) + } +} diff --git a/src/geoarrow/geoarrow-array/src/scalar/specialization.rs b/src/geoarrow/geoarrow-array/src/scalar/specialization.rs new file mode 100644 index 0000000000..e1a91b045f --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/scalar/specialization.rs @@ -0,0 +1,150 @@ +// Specialized implementations of GeometryTrait on each scalar type. + +use geo_traits::GeometryTrait; + +use crate::scalar::*; + +macro_rules! impl_specialization { + ($geometry_type:ident, $trait_name:ident) => { + impl GeometryTrait for $geometry_type<'_> { + type T = f64; + type PointType<'b> + = Point<'b> + where + Self: 'b; + type LineStringType<'b> + = LineString<'b> + where + Self: 'b; + type PolygonType<'b> + = Polygon<'b> + where + Self: 'b; + type MultiPointType<'b> + = MultiPoint<'b> + where + Self: 'b; + type MultiLineStringType<'b> + = MultiLineString<'b> + where + Self: 'b; + type MultiPolygonType<'b> + = MultiPolygon<'b> + where + Self: 'b; + type GeometryCollectionType<'b> + = GeometryCollection<'b> + where + Self: 'b; + type RectType<'b> + = Rect<'b> + where + Self: 'b; + type LineType<'b> + = geo_traits::UnimplementedLine + where + Self: 'b; + type TriangleType<'b> + = geo_traits::UnimplementedTriangle + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + self.native_dim().into() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::$geometry_type(self) + } + } + + impl GeometryTrait for &'_ $geometry_type<'_> { + type T = f64; + type PointType<'b> + = Point<'b> + where + Self: 'b; + type LineStringType<'b> + = LineString<'b> + where + Self: 'b; + type PolygonType<'b> + = Polygon<'b> + where + Self: 'b; + type MultiPointType<'b> + = MultiPoint<'b> + where + Self: 'b; + type MultiLineStringType<'b> + = MultiLineString<'b> + where + Self: 'b; + type MultiPolygonType<'b> + = MultiPolygon<'b> + where + Self: 'b; + type GeometryCollectionType<'b> + = GeometryCollection<'b> + where + Self: 'b; + type RectType<'b> + = Rect<'b> + where + Self: 'b; + type LineType<'b> + = geo_traits::UnimplementedLine + where + Self: 'b; + type TriangleType<'b> + = geo_traits::UnimplementedTriangle + where + Self: 'b; + + fn dim(&self) -> geo_traits::Dimensions { + self.native_dim().into() + } + + fn as_type( + &self, + ) -> geo_traits::GeometryType< + '_, + Self::PointType<'_>, + Self::LineStringType<'_>, + Self::PolygonType<'_>, + Self::MultiPointType<'_>, + Self::MultiLineStringType<'_>, + Self::MultiPolygonType<'_>, + Self::GeometryCollectionType<'_>, + Self::RectType<'_>, + Self::TriangleType<'_>, + Self::LineType<'_>, + > { + geo_traits::GeometryType::$geometry_type(self) + } + } + }; +} + +impl_specialization!(Point, PointTrait); +impl_specialization!(LineString, LineStringTrait); +impl_specialization!(Polygon, PolygonTrait); +impl_specialization!(MultiPoint, MultiPointTrait); +impl_specialization!(MultiLineString, MultiLineStringTrait); +impl_specialization!(MultiPolygon, MultiPolygonTrait); +impl_specialization!(GeometryCollection, GeometryCollectionTrait); +impl_specialization!(Rect, RectTrait); diff --git a/src/geoarrow/geoarrow-array/src/trait_.rs b/src/geoarrow/geoarrow-array/src/trait_.rs new file mode 100644 index 0000000000..221559e7b2 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/trait_.rs @@ -0,0 +1,735 @@ +use std::{any::Any, fmt::Debug, sync::Arc}; + +use arrow_array::{Array, ArrayRef}; +use arrow_buffer::NullBuffer; +use arrow_schema::extension::ExtensionType; +use geo_traits::GeometryTrait; +use geoarrow_schema::{GeoArrowType, Metadata, error::GeoArrowResult}; + +use crate::array::from_arrow_array; + +/// Convert GeoArrow arrays into their respective [arrow][arrow_array] arrays. +pub trait IntoArrow { + /// The type of arrow array that this geoarrow array can be converted into. + type ArrowArray: Array; + + /// The extension type representing this array. It will always be a type defined by + /// [geoarrow_schema]. + type ExtensionType: ExtensionType; + + /// Converts this geoarrow array into an arrow array. + /// + /// Note that [arrow][arrow_array] arrays do not maintain Arrow extension metadata, so the + /// result of this method will omit any spatial extension information. Ensure you call + /// [Self::extension_type] to get extension information that you can add to a + /// [`Field`][arrow_schema::Field]. + fn into_arrow(self) -> Self::ArrowArray; + + /// Return the Arrow extension type representing this array. + fn extension_type(&self) -> &Self::ExtensionType; +} + +/// A base trait for all GeoArrow arrays. +/// +/// This is a geospatial corollary to the upstream [`Array`] trait. +pub trait GeoArrowArray: Debug + Send + Sync { + /// Returns the array as [`Any`] so that it can be downcasted to a specific implementation. + /// + /// Prefer using [`AsGeoArrowArray`][crate::cast::AsGeoArrowArray] instead of calling this + /// method and manually downcasting. + fn as_any(&self) -> &dyn Any; + + /// Returns the [`GeoArrowType`] of this array. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_schema::{Dimension, PointType, GeoArrowType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish(); + /// assert_eq!(point_array.data_type(), GeoArrowType::Point(point_type)); + /// ``` + fn data_type(&self) -> GeoArrowType; + + /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array, consuming the original + /// array. + /// + /// This is `O(1)`. + /// + /// Note that **this will omit any spatial extension information**. You must separately store + /// the spatial information in a [`Field`][arrow_schema::Field] derived from + /// [`Self::data_type`]. + /// + /// # Examples + /// + /// ``` + /// # use arrow_array::ArrayRef; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish(); + /// let array_ref: ArrayRef = point_array.into_array_ref(); + /// ``` + #[must_use] + fn into_array_ref(self) -> ArrayRef; + + /// Converts this array into an `Arc`ed [`arrow`][arrow_array] array. + /// + /// This is `O(1)`. + /// + /// Note that **this will omit any spatial extension information**. You must separately store + /// the spatial information in a [`Field`][arrow_schema::Field] derived from + /// [`Self::data_type`]. + /// + /// # Examples + /// + /// ``` + /// # use arrow_array::ArrayRef; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish(); + /// let array_ref: ArrayRef = point_array.to_array_ref(); + /// ``` + #[must_use] + fn to_array_ref(&self) -> ArrayRef; + + /// The number of geometries contained in this array. + /// + /// # Examples + /// + /// ``` + /// # use arrow_array::ArrayRef; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish(); + /// assert_eq!(point_array.len(), 1); + /// ``` + fn len(&self) -> usize; + + /// Returns `true` if the array is empty. + /// + /// # Examples + /// + /// ``` + /// # use arrow_array::ArrayRef; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = PointBuilder::from_points([point].iter(), point_type.clone()).finish(); + /// assert!(!point_array.is_empty()); + /// ``` + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns a potentially computed [`NullBuffer``] that represents the logical null values of + /// this array, if any. + /// + /// Logical nulls represent the values that are null in the array, regardless of the underlying + /// physical arrow representation. + /// + /// For most array types, this is equivalent to the "physical" nulls returned by + /// [`Array::nulls`]. However it is different for union arrays, including our + /// [`GeometryArray`][crate::array::GeometryArray] and + /// [`GeometryCollectionArray`][crate::array::GeometryCollectionArray] types, because the + /// unions aren't encoded in a single null buffer. + fn logical_nulls(&self) -> Option; + + /// Returns the number of null slots in this array. + /// + /// This is `O(1)` since the number of null elements is pre-computed. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish(); + /// assert_eq!(point_array.logical_null_count(), 1); + /// ``` + fn logical_null_count(&self) -> usize; + + /// Returns whether slot `i` is null. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish(); + /// assert!(point_array.is_null(1)); + /// ``` + /// + /// # Panics + /// + /// Panics iff `i >= self.len()`. + fn is_null(&self, i: usize) -> bool; + + /// Returns whether slot `i` is valid. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point = geo_types::point!(x: 1., y: 2.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point), None].into_iter(), point_type.clone()).finish(); + /// assert!(point_array.is_valid(0)); + /// ``` + /// + /// # Panics + /// + /// Panics iff `i >= self.len()`. + #[inline] + fn is_valid(&self, i: usize) -> bool { + !self.is_null(i) + } + + /// Returns a zero-copy slice of this array with the indicated offset and length. + /// + /// # Examples + /// + /// ``` + /// # use std::sync::Arc; + /// # + /// # use geoarrow_array::GeoArrowArray; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point1 = geo_types::point!(x: 1., y: 2.); + /// let point2 = geo_types::point!(x: 3., y: 4.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// Arc::new(PointBuilder::from_points([point1, point2].iter(), point_type.clone()).finish()) + /// as Arc; + /// let sliced_array = point_array.slice(1, 1); + /// assert_eq!(sliced_array.len(), 1); + /// ``` + /// + /// # Panics + /// + /// This function panics iff `offset + length > self.len()`. + #[must_use] + fn slice(&self, offset: usize, length: usize) -> Arc; + + /// Change the [`Metadata`] of this array. + fn with_metadata(self, metadata: Arc) -> Arc; +} + +/// Ergonomics: Allow use of an `Arc` as an `&dyn GeoArrowArray` +impl GeoArrowArray for Arc { + fn as_any(&self) -> &dyn Any { + self.as_ref().as_any() + } + + fn data_type(&self) -> GeoArrowType { + self.as_ref().data_type() + } + + fn into_array_ref(self) -> ArrayRef { + self.as_ref().to_array_ref() + } + + fn to_array_ref(&self) -> ArrayRef { + self.as_ref().to_array_ref() + } + + fn len(&self) -> usize { + self.as_ref().len() + } + + fn logical_nulls(&self) -> Option { + self.as_ref().logical_nulls() + } + + fn logical_null_count(&self) -> usize { + self.as_ref().logical_null_count() + } + + fn is_null(&self, i: usize) -> bool { + self.as_ref().is_null(i) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + self.as_ref().slice(offset, length) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + // This is a hack to allow consuming self + let field = self.data_type().with_metadata(metadata).to_field("", true); + let array = self.as_ref().to_array_ref(); + // This unwrap should be fine because we know we start with a GeoArrow array + from_arrow_array(array.as_ref(), &field).unwrap() + } +} + +impl GeoArrowArray for &T { + fn as_any(&self) -> &dyn Any { + T::as_any(self) + } + + fn data_type(&self) -> GeoArrowType { + T::data_type(self) + } + + fn into_array_ref(self) -> ArrayRef { + T::to_array_ref(self) + } + + fn to_array_ref(&self) -> ArrayRef { + T::to_array_ref(self) + } + + fn len(&self) -> usize { + T::len(self) + } + + fn logical_nulls(&self) -> Option { + T::logical_nulls(self) + } + + fn logical_null_count(&self) -> usize { + T::logical_null_count(self) + } + + fn is_null(&self, i: usize) -> bool { + T::is_null(self, i) + } + + fn slice(&self, offset: usize, length: usize) -> Arc { + T::slice(self, offset, length) + } + + fn with_metadata(self, metadata: Arc) -> Arc { + // This is a hack to allow consuming self + let field = self.data_type().with_metadata(metadata).to_field("", true); + let array = T::to_array_ref(self); + // This unwrap should be fine because we know we start with a GeoArrow array + from_arrow_array(array.as_ref(), &field).unwrap() + } +} + +/// A trait for accessing the values of a [`GeoArrowArray`]. +/// +/// # Performance +/// +/// Accessing a geometry from a "native" array, such as `PointArray`, `MultiPolygonArray` or +/// `GeometryArray` will always be constant-time and zero-copy. +/// +/// Accessing a geometry from a "serialized" array such as `GenericWkbArray` or `GenericWktArray` +/// will trigger some amount of parsing. In the case of `GenericWkbArray`, accessing an item will +/// read the WKB header and scan the buffer if needed to find internal geometry offsets, but will +/// not copy any internal coordinates. This allows for later access to be constant-time (though not +/// necessarily zero-copy, since WKB is not byte-aligned). In the case of `GenericWktArray`, +/// accessing a geometry will fully parse the WKT string and copy coordinates to a separate +/// representation. This means that calling `.iter()` on a `GenericWktArray` will transparently +/// fully parse every row. +/// +/// # Validity +/// +/// A [`GeoArrowArrayAccessor`] must always return a well-defined value for an index that is +/// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true. +/// +/// The value at null indexes is unspecified, and implementations must not rely on a specific +/// value such as [`Default::default`] being returned, however, it must not be undefined. +pub trait GeoArrowArrayAccessor<'a>: GeoArrowArray { + /// The [geoarrow scalar object][crate::scalar] for this geometry array type. + type Item: Send + Sync + GeometryTrait; + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::{CoordTrait, PointTrait}; + /// # use geoarrow_array::GeoArrowArrayAccessor; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// + /// let point1 = geo_types::point!(x: 1., y: 2.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone()) + /// .finish(); + /// + /// let coord = point_array.value(0).unwrap().coord().unwrap(); + /// assert_eq!(coord.x(), 1.); + /// assert_eq!(coord.y(), 2.); + /// ``` + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + /// + /// # Panics + /// + /// Panics if the value is outside the bounds of the array. + fn value(&'a self, index: usize) -> GeoArrowResult { + assert!(index < self.len()); + unsafe { self.value_unchecked(index) } + } + + /// Returns the element at index `i`, not considering validity. + /// + /// # Examples + /// + /// ``` + /// use geo_traits::{CoordTrait, PointTrait}; + /// # use geoarrow_array::GeoArrowArrayAccessor; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// + /// let point1 = geo_types::point!(x: 1., y: 2.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone()) + /// .finish(); + /// + /// let coord = unsafe { point_array.value_unchecked(0) } + /// .unwrap() + /// .coord() + /// .unwrap(); + /// assert_eq!(coord.x(), 1.); + /// assert_eq!(coord.y(), 2.); + /// ``` + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + /// + /// # Safety + /// + /// Caller is responsible for ensuring that the index is within the bounds of the array + unsafe fn value_unchecked(&'a self, index: usize) -> GeoArrowResult; + + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_array::GeoArrowArrayAccessor; + /// # use geoarrow_array::builder::PointBuilder; + /// # use geoarrow_schema::{Dimension, PointType}; + /// # + /// let point1 = geo_types::point!(x: 1., y: 2.); + /// + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let point_array = + /// PointBuilder::from_nullable_points([Some(&point1), None].into_iter(), point_type.clone()) + /// .finish(); + /// + /// assert!(point_array.get(0).unwrap().is_some()); + /// assert!(point_array.get(1).unwrap().is_none()); + /// ``` + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + fn get(&'a self, index: usize) -> GeoArrowResult> { + if self.is_null(index) { + return Ok(None); + } + + Ok(Some(self.value(index)?)) + } + + /// Returns the value at slot `i` as an Arrow scalar, considering validity. + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + /// + /// # Safety + /// + /// Caller is responsible for ensuring that the index is within the bounds of the array + unsafe fn get_unchecked(&'a self, index: usize) -> Option> { + if self.is_null(index) { + return None; + } + + Some(unsafe { self.value_unchecked(index) }) + } + + /// Iterates over this array's geoarrow scalar values, considering validity. + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + fn iter(&'a self) -> impl ExactSizeIterator>> + 'a { + (0..self.len()).map(|i| unsafe { self.get_unchecked(i) }) + } + + /// Iterator over geoarrow scalar values, not considering validity. + /// + /// # Errors + /// + /// Errors for invalid WKT and WKB geometries. Will never error for native arrays. + fn iter_values(&'a self) -> impl ExactSizeIterator> + 'a { + (0..self.len()).map(|i| unsafe { self.value_unchecked(i) }) + } +} + +/// A trait describing a mutable geometry array; i.e. an array whose values can be changed. +/// +// Note: This trait is not yet publicly exported from this crate, as we're not sure how the API +// should be, and in particular whether we need this trait to be dyn-compatible or not. +pub(crate) trait GeoArrowArrayBuilder: Debug + Send + Sync { + /// Returns the length of the array. + fn len(&self) -> usize; + + /// Returns whether the array is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Push a null value to this builder. + fn push_null(&mut self); + + /// Push a geometry to this builder. + #[allow(dead_code)] + fn push_geometry( + &mut self, + geometry: Option<&impl GeometryTrait>, + ) -> GeoArrowResult<()>; + + /// Finish the builder and return an [`Arc`] to the resulting array. + #[allow(dead_code)] + fn finish(self) -> Arc; +} + +/// Trait for types that can read `Arc`'s. +/// +/// This is similar to an upstream [RecordBatchReader][arrow_array::RecordBatchReader], but for +/// GeoArrow arrays instead of RecordBatches. +/// +/// This will always yield an `Arc` with the same [`GeoArrowType`], which is +/// known in advance (see [`Self::data_type`]). +/// +/// To create from an iterator, see [GeoArrowArrayIterator]. +pub trait GeoArrowArrayReader: Iterator>> { + /// Returns the field of this `GeoArrowArrayReader`. + /// + /// Implementation of this trait should guarantee that all `Arc`'s returned + /// by this reader should have the same [`GeoArrowType`] as returned from this method. + fn data_type(&self) -> GeoArrowType; +} + +impl GeoArrowArrayReader for Box { + fn data_type(&self) -> GeoArrowType { + self.as_ref().data_type() + } +} + +/// An iterator of [`Arc`] with an attached [`GeoArrowType`] +pub struct GeoArrowArrayIterator +where + I: IntoIterator>>, +{ + inner: I::IntoIter, + inner_type: GeoArrowType, +} + +impl GeoArrowArrayIterator +where + I: IntoIterator>>, +{ + /// Create a new [GeoArrowArrayIterator]. + /// + /// If `iter` is an infallible iterator, use `.map(Ok)`. + pub fn new(iter: I, data_type: GeoArrowType) -> Self { + Self { + inner: iter.into_iter(), + inner_type: data_type, + } + } +} + +impl Iterator for GeoArrowArrayIterator +where + I: IntoIterator>>, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl GeoArrowArrayReader for GeoArrowArrayIterator +where + I: IntoIterator>>, +{ + fn data_type(&self) -> GeoArrowType { + self.inner_type.clone() + } +} + +// #[cfg(test)] +// mod test { +// use std::sync::Arc; + +// use arrow_array::{ +// Array, +// builder::{ArrayBuilder, FixedSizeListBuilder, Float64Builder, StructBuilder}, +// }; +// use arrow_schema::{DataType, Field}; +// use geoarrow_schema::{CoordType, Dimension, GeometryType, PointType}; + +// use super::*; +// use crate::{builder::GeometryBuilder, trait_::GeoArrowArray}; + +// #[test] +// fn infer_type_interleaved_point() { +// let test_cases = [ +// (2, Dimension::XY), +// (3, Dimension::XYZ), +// (4, Dimension::XYZM), +// ]; +// for (list_size, dim) in test_cases.into_iter() { +// let array = FixedSizeListBuilder::new(Float64Builder::new(), list_size).finish(); +// let t = +// GeoArrowType::from_arrow_field(&Field::new("", array.data_type().clone(), true)) +// .unwrap(); +// assert_eq!( +// t, +// GeoArrowType::Point( +// PointType::new(dim, Default::default()).with_coord_type(CoordType::Interleaved) +// ) +// ); +// } +// } + +// #[test] +// fn infer_type_separated_point() { +// let test_cases = [ +// ( +// vec![ +// Arc::new(Field::new("x", DataType::Float64, true)), +// Arc::new(Field::new("y", DataType::Float64, true)), +// ], +// vec![ +// Box::new(Float64Builder::new()) as Box, +// Box::new(Float64Builder::new()), +// ], +// Dimension::XY, +// ), +// ( +// vec![ +// Arc::new(Field::new("x", DataType::Float64, true)), +// Arc::new(Field::new("y", DataType::Float64, true)), +// Arc::new(Field::new("z", DataType::Float64, true)), +// ], +// vec![ +// Box::new(Float64Builder::new()) as Box, +// Box::new(Float64Builder::new()), +// Box::new(Float64Builder::new()), +// ], +// Dimension::XYZ, +// ), +// ( +// vec![ +// Arc::new(Field::new("x", DataType::Float64, true)), +// Arc::new(Field::new("y", DataType::Float64, true)), +// Arc::new(Field::new("z", DataType::Float64, true)), +// Arc::new(Field::new("m", DataType::Float64, true)), +// ], +// vec![ +// Box::new(Float64Builder::new()) as Box, +// Box::new(Float64Builder::new()), +// Box::new(Float64Builder::new()), +// Box::new(Float64Builder::new()), +// ], +// Dimension::XYZM, +// ), +// ]; +// for (fields, builders, dim) in test_cases.into_iter() { +// let array = StructBuilder::new(fields, builders).finish(); +// let t = +// GeoArrowType::from_arrow_field(&Field::new("", array.data_type().clone(), true)) +// .unwrap(); +// assert_eq!( +// t, +// GeoArrowType::Point( +// PointType::new(dim, Default::default()).with_coord_type(CoordType::Separated) +// ) +// ); +// } +// } + +// #[test] +// fn native_type_round_trip() { +// let point_array = crate::test::point::point_array(CoordType::Interleaved); +// let field = point_array.data_type.to_field("geometry", true); +// let data_type: GeoArrowType = (&field).try_into().unwrap(); +// assert_eq!(point_array.data_type(), data_type); + +// let ml_array = crate::test::multilinestring::ml_array(CoordType::Interleaved); +// let field = ml_array.data_type.to_field("geometry", true); +// let data_type: GeoArrowType = (&field).try_into().unwrap(); +// assert_eq!(ml_array.data_type(), data_type); + +// let mut builder = GeometryBuilder::new( +// GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved), +// ); +// builder +// .push_geometry(Some(&crate::test::point::p0())) +// .unwrap(); +// builder +// .push_geometry(Some(&crate::test::point::p1())) +// .unwrap(); +// builder +// .push_geometry(Some(&crate::test::point::p2())) +// .unwrap(); +// builder +// .push_geometry(Some(&crate::test::multilinestring::ml0())) +// .unwrap(); +// builder +// .push_geometry(Some(&crate::test::multilinestring::ml1())) +// .unwrap(); +// let geom_array = builder.finish(); +// let field = geom_array.data_type.to_field("geometry", true); +// let data_type: GeoArrowType = (&field).try_into().unwrap(); +// assert_eq!(geom_array.data_type(), data_type); +// } +// } diff --git a/src/geoarrow/geoarrow-array/src/util.rs b/src/geoarrow/geoarrow-array/src/util.rs new file mode 100644 index 0000000000..556ad586be --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/util.rs @@ -0,0 +1,93 @@ +use arrow_array::OffsetSizeTrait; +use arrow_buffer::OffsetBuffer; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +pub(crate) fn offsets_buffer_i32_to_i64(offsets: &OffsetBuffer) -> OffsetBuffer { + let i64_offsets = offsets.iter().map(|x| *x as i64).collect::>(); + unsafe { OffsetBuffer::new_unchecked(i64_offsets.into()) } +} + +pub(crate) fn offsets_buffer_i64_to_i32( + offsets: &OffsetBuffer, +) -> GeoArrowResult> { + i32::try_from(*offsets.last()).map_err(|_| GeoArrowError::Overflow)?; + + let i32_offsets = offsets.iter().map(|x| *x as i32).collect::>(); + Ok(unsafe { OffsetBuffer::new_unchecked(i32_offsets.into()) }) +} + +/// Offsets utils that I miss from arrow2 +pub(crate) trait OffsetBufferUtils { + /// Returns the length an array with these offsets would be. + fn len_proxy(&self) -> usize; + + /// Returns a range (start, end) corresponding to the position `index` + /// # Panic + /// This function panics iff `index >= self.len_proxy()` + fn start_end(&self, index: usize) -> (usize, usize); + + /// Returns the last offset. + fn last(&self) -> &O; +} + +impl OffsetBufferUtils for OffsetBuffer { + /// Returns the length an array with these offsets would be. + #[inline] + fn len_proxy(&self) -> usize { + self.len() - 1 + } + + /// Returns a range (start, end) corresponding to the position `index` + /// + /// # Panic + /// + /// Panics iff `index >= self.len_proxy()` + #[inline] + fn start_end(&self, index: usize) -> (usize, usize) { + assert!(index < self.len_proxy()); + let start = self[index].to_usize().unwrap(); + let end = self[index + 1].to_usize().unwrap(); + (start, end) + } + + /// Returns the last offset. + #[inline] + fn last(&self) -> &O { + self.as_ref().last().unwrap() + } +} + +pub(crate) trait GeometryTypeName { + /// Returns the name of the geometry type. + fn name(&self) -> String; +} + +impl GeometryTypeName + for geo_traits::GeometryType<'_, P, LS, Y, MP, ML, MY, GC, R, T, L> +where + P: geo_traits::PointTrait, + LS: geo_traits::LineStringTrait, + Y: geo_traits::PolygonTrait, + MP: geo_traits::MultiPointTrait, + ML: geo_traits::MultiLineStringTrait, + MY: geo_traits::MultiPolygonTrait, + GC: geo_traits::GeometryCollectionTrait, + R: geo_traits::RectTrait, + T: geo_traits::TriangleTrait, + L: geo_traits::LineTrait, +{ + fn name(&self) -> String { + match self { + Self::Point(_) => "Point".to_string(), + Self::LineString(_) => "LineString".to_string(), + Self::Polygon(_) => "Polygon".to_string(), + Self::MultiPoint(_) => "MultiPoint".to_string(), + Self::MultiLineString(_) => "MultiLineString".to_string(), + Self::MultiPolygon(_) => "MultiPolygon".to_string(), + Self::GeometryCollection(_) => "GeometryCollection".to_string(), + Self::Rect(_) => "Rect".to_string(), + Self::Triangle(_) => "Triangle".to_string(), + Self::Line(_) => "Line".to_string(), + } + } +} diff --git a/src/geoarrow/geoarrow-array/src/wrap_array.rs b/src/geoarrow/geoarrow-array/src/wrap_array.rs new file mode 100644 index 0000000000..25e9a47bd5 --- /dev/null +++ b/src/geoarrow/geoarrow-array/src/wrap_array.rs @@ -0,0 +1,305 @@ +use std::sync::Arc; + +use arrow_array::{ + Array, BinaryArray, BinaryViewArray, FixedSizeListArray, LargeBinaryArray, LargeStringArray, + ListArray, StringArray, StringViewArray, StructArray, UnionArray, cast::AsArray, +}; +use arrow_schema::DataType; +use geoarrow_schema::{ + error::{GeoArrowError, GeoArrowResult}, + *, +}; + +use crate::{GeoArrowArray, array::*}; + +/// Using a GeoArrow geometry type, wrap the provided storage array as a GeoArrow array. +/// +/// This is a convenient way to convert from an Arrow array to a GeoArrow array when you have an +/// extension type. You can also use the `TryFrom` implementations on each GeoArrow array type, but +/// this may be easier to remember and find. +pub trait WrapArray { + /// The output GeoArrow array type. + type Output: GeoArrowArray; + + /// Wrap the given storage array as an GeoArrow array. + /// + /// This terminology comes from pyarrow/Arrow C++, where extension types similarly have a + /// [`wrap_array`](https://arrow.apache.org/docs/python/generated/pyarrow.ExtensionType.html#pyarrow.ExtensionType.wrap_array) + /// method. + fn wrap_array(&self, input: Input) -> GeoArrowResult; +} + +impl WrapArray<&StructArray> for PointType { + type Output = PointArray; + + fn wrap_array(&self, input: &StructArray) -> GeoArrowResult { + PointArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&FixedSizeListArray> for PointType { + type Output = PointArray; + + fn wrap_array(&self, input: &FixedSizeListArray) -> GeoArrowResult { + PointArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for PointType { + type Output = PointArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + PointArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for LineStringType { + type Output = LineStringArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + LineStringArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for LineStringType { + type Output = LineStringArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + LineStringArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for PolygonType { + type Output = PolygonArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + PolygonArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for PolygonType { + type Output = PolygonArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + PolygonArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for MultiPointType { + type Output = MultiPointArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + MultiPointArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for MultiPointType { + type Output = MultiPointArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + MultiPointArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for MultiLineStringType { + type Output = MultiLineStringArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + MultiLineStringArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for MultiLineStringType { + type Output = MultiLineStringArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + MultiLineStringArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for MultiPolygonType { + type Output = MultiPolygonArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + MultiPolygonArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for MultiPolygonType { + type Output = MultiPolygonArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + MultiPolygonArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&StructArray> for BoxType { + type Output = RectArray; + + fn wrap_array(&self, input: &StructArray) -> GeoArrowResult { + RectArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for BoxType { + type Output = RectArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + RectArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&ListArray> for GeometryCollectionType { + type Output = GeometryCollectionArray; + + fn wrap_array(&self, input: &ListArray) -> GeoArrowResult { + GeometryCollectionArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for GeometryCollectionType { + type Output = GeometryCollectionArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + GeometryCollectionArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&UnionArray> for GeometryType { + type Output = GeometryArray; + + fn wrap_array(&self, input: &UnionArray) -> GeoArrowResult { + GeometryArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&dyn Array> for GeometryType { + type Output = GeometryArray; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + GeometryArray::try_from((input, self.clone())) + } +} + +impl WrapArray<&BinaryViewArray> for WkbType { + type Output = WkbViewArray; + + fn wrap_array(&self, input: &BinaryViewArray) -> GeoArrowResult { + Ok(WkbViewArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&BinaryArray> for WkbType { + type Output = WkbArray; + + fn wrap_array(&self, input: &BinaryArray) -> GeoArrowResult { + Ok(WkbArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&LargeBinaryArray> for WkbType { + type Output = LargeWkbArray; + + fn wrap_array(&self, input: &LargeBinaryArray) -> GeoArrowResult { + Ok(LargeWkbArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&StringViewArray> for WktType { + type Output = WktViewArray; + + fn wrap_array(&self, input: &StringViewArray) -> GeoArrowResult { + Ok(WktViewArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&dyn Array> for WkbType { + type Output = Arc; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + match input.data_type() { + DataType::BinaryView => Ok(Arc::new(WkbViewArray::from(( + input.as_binary_view().clone(), + self.clone(), + )))), + DataType::Binary => Ok(Arc::new(WkbArray::from(( + input.as_binary().clone(), + self.clone(), + )))), + DataType::LargeBinary => Ok(Arc::new(LargeWkbArray::from(( + input.as_binary().clone(), + self.clone(), + )))), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected DataType for WkbType: {dt:?}", + ))), + } + } +} + +impl WrapArray<&StringArray> for WktType { + type Output = WktArray; + + fn wrap_array(&self, input: &StringArray) -> GeoArrowResult { + Ok(WktArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&LargeStringArray> for WktType { + type Output = LargeWktArray; + + fn wrap_array(&self, input: &LargeStringArray) -> GeoArrowResult { + Ok(LargeWktArray::from((input.clone(), self.clone()))) + } +} + +impl WrapArray<&dyn Array> for WktType { + type Output = Arc; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + match input.data_type() { + DataType::Utf8View => Ok(Arc::new(WktViewArray::from(( + input.as_string_view().clone(), + self.clone(), + )))), + DataType::Utf8 => Ok(Arc::new(WktArray::from(( + input.as_string().clone(), + self.clone(), + )))), + DataType::LargeUtf8 => Ok(Arc::new(LargeWktArray::from(( + input.as_string().clone(), + self.clone(), + )))), + dt => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unexpected DataType for WktType: {dt:?}", + ))), + } + } +} + +impl WrapArray<&dyn Array> for GeoArrowType { + type Output = Arc; + + fn wrap_array(&self, input: &dyn Array) -> GeoArrowResult { + use GeoArrowType::*; + + let result: Arc = match self { + Point(t) => Arc::new(t.wrap_array(input)?), + LineString(t) => Arc::new(t.wrap_array(input)?), + Polygon(t) => Arc::new(t.wrap_array(input)?), + MultiPoint(t) => Arc::new(t.wrap_array(input)?), + MultiLineString(t) => Arc::new(t.wrap_array(input)?), + MultiPolygon(t) => Arc::new(t.wrap_array(input)?), + GeometryCollection(t) => Arc::new(t.wrap_array(input)?), + Rect(t) => Arc::new(t.wrap_array(input)?), + Geometry(t) => Arc::new(t.wrap_array(input)?), + Wkb(t) => Arc::new(WkbArray::try_from((input, t.clone()))?), + LargeWkb(t) => Arc::new(LargeWkbArray::try_from((input, t.clone()))?), + WkbView(t) => Arc::new(WkbViewArray::try_from((input, t.clone()))?), + Wkt(t) => Arc::new(WktArray::try_from((input, t.clone()))?), + LargeWkt(t) => Arc::new(LargeWktArray::try_from((input, t.clone()))?), + WktView(t) => Arc::new(WktViewArray::try_from((input, t.clone()))?), + }; + Ok(result) + } +} diff --git a/src/geoarrow/geoarrow-cast/Cargo.toml b/src/geoarrow/geoarrow-cast/Cargo.toml new file mode 100644 index 0000000000..608a1c3c46 --- /dev/null +++ b/src/geoarrow/geoarrow-cast/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "geoarrow-cast" +version = "0.7.0" +authors = ["Kyle Barron "] +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/geoarrow/geoarrow-rs" +description = "Functions for converting from one GeoArrow geometry type to another." + +[dependencies] +arrow-schema = {workspace = true} +geo-traits = {workspace = true} +geoarrow-array = {workspace = true} +geoarrow-schema = {workspace = true} +wkt = {workspace = true} diff --git a/src/geoarrow/geoarrow-cast/src/cast.rs b/src/geoarrow/geoarrow-cast/src/cast.rs new file mode 100644 index 0000000000..2d2df183cb --- /dev/null +++ b/src/geoarrow/geoarrow-cast/src/cast.rs @@ -0,0 +1,564 @@ +//! Cast kernels to convert [`GeoArrowArray`] to other geometry types. + +use std::sync::Arc; + +use arrow_schema::ArrowError; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, + array::{GeometryArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray}, + builder::{ + GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, + MultiPolygonBuilder, PointBuilder, PolygonBuilder, + }, + capacity::{LineStringCapacity, PolygonCapacity}, + cast::{AsGeoArrowArray, from_wkb, from_wkt, to_wkb, to_wkb_view, to_wkt, to_wkt_view}, +}; +use geoarrow_schema::{GeoArrowType, error::GeoArrowResult}; + +/// Cast a [`GeoArrowArray`] to another [`GeoArrowType`]. +/// +/// ### Criteria: +/// +/// - Dimension must be compatible: +/// - If the source array and destination type are both dimension-aware, then their dimensions +/// must match. +/// - Casts from dimension-aware to dimensionless arrays (`GeometryArray`, `WkbArray`, +/// `WkbViewArray`, `WktArray`, `WktViewArray`) are always allowed. +/// - GeoArrow [`Metadata`][geoarrow_schema::Metadata] on the [`GeoArrowType`] must match. Use +/// [`GeoArrowArray::with_metadata`] +/// to change the metadata on an array. +/// +/// ### Infallible casts: +/// +/// As long as the above criteria are met, these casts will always succeed without erroring. +/// +/// - The same geometry type with different coord types. +/// - Any source array type to `Geometry`, `Wkb`, `LargeWkb`, `WkbView`, `Wkt`, `LargeWkt`, or +/// `WktView`. +/// - `Point` to `MultiPoint` +/// - `LineString` to `MultiLineString` +/// - `Polygon` to `MultiPolygon` +/// +/// ### Fallible casts: +/// +/// - `Geometry` to any other native type. +/// - Parsing `WKB` or `WKT` to any native type other than `Geometry`. +/// - `MultiPoint` to `Point` +/// - `MultiLineString` to `LineString` +/// - `MultiPolygon` to `Polygon` +/// +// TODO: need to check this behavior: +// +// - Casts from dimensionless arrays to dimension-aware arrays are never allowed. +#[allow(clippy::collapsible_if)] +pub fn cast( + array: &dyn GeoArrowArray, + to_type: &GeoArrowType, +) -> GeoArrowResult> { + // We want to error if the dimensions aren't compatible, but allow conversions to + // `GeometryArray`, `WKB`, etc where the target array isn't parameterized by a specific + // dimension. + if let (Some(from_dim), Some(to_dim)) = (array.data_type().dimension(), to_type.dimension()) { + if from_dim != to_dim { + return Err(ArrowError::CastError(format!( + "Cannot cast from {from_dim:?} to {to_dim:?}: incompatible dimensions", + )) + .into()); + } + } + + if array.data_type().metadata() != to_type.metadata() { + return Err(ArrowError::CastError(format!( + "Cannot cast from {:?} to {:?}: incompatible metadata", + array.data_type().metadata(), + to_type.metadata(), + )) + .into()); + } + + use GeoArrowType::*; + let out: Arc = match (array.data_type(), to_type) { + (Point(_), Point(to_type)) => { + let array = array.as_point(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (Point(_), MultiPoint(to_type)) => { + let mp_array = MultiPointArray::from(array.as_point().clone()); + Arc::new(mp_array.into_coord_type(to_type.coord_type())) + } + (Point(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_point().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (LineString(_), LineString(to_type)) => { + let array = array.as_line_string(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (LineString(_), MultiLineString(to_type)) => { + let mp_array = MultiLineStringArray::from(array.as_line_string().clone()); + Arc::new(mp_array.into_coord_type(to_type.coord_type())) + } + (LineString(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_line_string().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (Polygon(_), Polygon(to_type)) => { + let array = array.as_polygon(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (Polygon(_), MultiPolygon(to_type)) => { + let mp_array = MultiPolygonArray::from(array.as_polygon().clone()); + Arc::new(mp_array.into_coord_type(to_type.coord_type())) + } + (Polygon(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_polygon().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (MultiPoint(_), Point(to_type)) => { + let mut builder = PointBuilder::with_capacity(to_type.clone(), array.len()); + for geom in array.as_multi_point().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (MultiPoint(_), MultiPoint(to_type)) => { + let array = array.as_multi_point(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (MultiPoint(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_multi_point().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (MultiLineString(_), LineString(to_type)) => { + let ml_array = array.as_multi_line_string(); + let ml_capacity = ml_array.buffer_lengths(); + let ls_capacity = + LineStringCapacity::new(ml_capacity.coord_capacity(), ml_capacity.geom_capacity()); + let mut builder = LineStringBuilder::with_capacity(to_type.clone(), ls_capacity); + for geom in array.as_multi_line_string().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (MultiLineString(_), MultiLineString(to_type)) => { + let array = array.as_multi_line_string(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (MultiLineString(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_multi_line_string().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (MultiPolygon(_), Polygon(to_type)) => { + let mp_array = array.as_multi_polygon(); + let mp_capacity = mp_array.buffer_lengths(); + let p_capacity = PolygonCapacity::new( + mp_capacity.coord_capacity(), + mp_capacity.ring_capacity(), + mp_capacity.geom_capacity(), + ); + let mut builder = PolygonBuilder::with_capacity(to_type.clone(), p_capacity); + for geom in mp_array.iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (MultiPolygon(_), MultiPolygon(to_type)) => { + let array = array.as_multi_polygon(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (MultiPolygon(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_multi_polygon().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (Geometry(_), Point(to_type)) => { + let mut builder = PointBuilder::with_capacity(to_type.clone(), array.len()); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), LineString(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let ls_capacity = g_capacity.line_string(to_type.dimension()); + let mut builder = LineStringBuilder::with_capacity(to_type.clone(), ls_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), Polygon(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let p_capacity = g_capacity.polygon(to_type.dimension()); + let mut builder = PolygonBuilder::with_capacity(to_type.clone(), p_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), MultiPoint(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let mp_capacity = g_capacity.multi_point(to_type.dimension()); + let mut builder = MultiPointBuilder::with_capacity(to_type.clone(), mp_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), MultiLineString(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let ml_capacity = g_capacity.multi_line_string(to_type.dimension()); + let mut builder = MultiLineStringBuilder::with_capacity(to_type.clone(), ml_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), MultiPolygon(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let mp_capacity = g_capacity.multi_polygon(to_type.dimension()); + let mut builder = MultiPolygonBuilder::with_capacity(to_type.clone(), mp_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), GeometryCollection(to_type)) => { + let g_array = array.as_geometry(); + let g_capacity = g_array.buffer_lengths(); + let gc_capacity = g_capacity.geometry_collection(to_type.dimension()); + let mut builder = + GeometryCollectionBuilder::with_capacity(to_type.clone(), gc_capacity); + for geom in array.as_geometry().iter() { + builder.push_geometry(geom.transpose()?.as_ref())?; + } + Arc::new(builder.finish()) + } + (Geometry(_), Geometry(to_type)) => { + let array = array.as_geometry(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (GeometryCollection(_), GeometryCollection(to_type)) => { + let array = array.as_geometry_collection(); + Arc::new(array.clone().into_coord_type(to_type.coord_type())) + } + (GeometryCollection(_), Geometry(to_type)) => { + let geom_array = GeometryArray::from(array.as_geometry_collection().clone()); + Arc::new(geom_array.into_coord_type(to_type.coord_type())) + } + (_, Wkb(_)) => Arc::new(to_wkb::(array)?), + (_, LargeWkb(_)) => Arc::new(to_wkb::(array)?), + (_, WkbView(_)) => Arc::new(to_wkb_view(array)?), + (_, Wkt(_)) => Arc::new(to_wkt::(array)?), + (_, LargeWkt(_)) => Arc::new(to_wkt::(array)?), + (_, WktView(_)) => Arc::new(to_wkt_view(array)?), + (Wkb(_), _) => from_wkb(array.as_wkb::(), to_type.clone())?, + (LargeWkb(_), _) => from_wkb(array.as_wkb::(), to_type.clone())?, + (WkbView(_), _) => from_wkb(array.as_wkb_view(), to_type.clone())?, + (Wkt(_), _) => from_wkt(array.as_wkt::(), to_type.clone())?, + (LargeWkt(_), _) => from_wkt(array.as_wkt::(), to_type.clone())?, + (WktView(_), _) => from_wkt(array.as_wkt_view(), to_type.clone())?, + (_, _) => { + return Err(ArrowError::CastError(format!( + "Unsupported cast from {:?} to {:?}", + array.data_type(), + to_type + )) + .into()); + } + }; + Ok(out) +} + +// #[cfg(test)] +// mod test { +// use geoarrow_array::{IntoArrow, builder::MultiPointBuilder, test}; +// use geoarrow_schema::{ +// CoordType, Dimension, GeometryType, LineStringType, MultiLineStringType, MultiPointType, +// MultiPolygonType, PointType, PolygonType, WkbType, +// }; +// use wkt::wkt; + +// use super::*; + +// #[test] +// fn test_point() { +// let array = test::point::array(CoordType::Interleaved, Dimension::XY); + +// // Cast to the same type +// let array2 = cast(&array, &array.data_type()).unwrap(); +// assert_eq!(&array, array2.as_point()); + +// // Cast to other coord type +// let p_type = PointType::new(Dimension::XY, array.data_type().metadata().clone()) +// .with_coord_type(CoordType::Separated); +// let array3 = cast(&array, &p_type.into()).unwrap(); +// assert_eq!( +// array3.as_point().extension_type().coord_type(), +// CoordType::Separated +// ); + +// // Cast to multi point +// let mp_type = MultiPointType::new(Dimension::XY, array.data_type().metadata().clone()) +// .with_coord_type(CoordType::Interleaved); +// let mp_array = cast(&array, &mp_type.into()).unwrap(); +// assert!(mp_array.as_multi_point_opt().is_some()); + +// // Cast to geometry +// let mp_type = GeometryType::new(array.data_type().metadata().clone()) +// .with_coord_type(CoordType::Interleaved); +// let mp_array = cast(&array, &mp_type.into()).unwrap(); +// assert!(mp_array.as_geometry_opt().is_some()); +// } + +// #[test] +// fn cast_to_wkb() { +// let array = test::point::array(CoordType::Interleaved, Dimension::XY); + +// let wkb_type = GeoArrowType::Wkb(WkbType::new(array.data_type().metadata().clone())); +// let wkb_array = cast(&array, &wkb_type).unwrap(); +// assert!(wkb_array.as_wkb_opt::().is_some()); + +// let large_wkb_type = +// GeoArrowType::LargeWkb(WkbType::new(array.data_type().metadata().clone())); +// let wkb_array = cast(&array, &large_wkb_type).unwrap(); +// assert!(wkb_array.as_wkb_opt::().is_some()); +// } + +// #[test] +// fn downcast_multi_points_to_points() { +// let mp1 = wkt! { MULTIPOINT(0.0 0.0) }; +// let mp2 = wkt! { MULTIPOINT(1.0 2.0) }; +// let mp3 = wkt! { MULTIPOINT(3.0 4.0) }; + +// let typ = MultiPointType::new(Dimension::XY, Default::default()); +// let mp_arr = MultiPointBuilder::from_multi_points(&[mp1, mp2, mp3], typ).finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = PointType::new(dim, metadata).with_coord_type(coord_type); +// let p_arr = cast(&mp_arr, &p_type.into()).unwrap(); +// assert!(p_arr.as_point_opt().is_some()); +// } + +// #[test] +// fn downcast_multi_points_to_points_fails() { +// let mp1 = wkt! { MULTIPOINT(0.0 0.0) }; +// let mp2 = wkt! { MULTIPOINT(1.0 2.0) }; +// let mp3 = wkt! { MULTIPOINT(3.0 4.0, 5.0 6.0) }; + +// let typ = MultiPointType::new(Dimension::XY, Default::default()); +// let mp_arr = MultiPointBuilder::from_multi_points(&[mp1, mp2, mp3], typ).finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = PointType::new(dim, metadata).with_coord_type(coord_type); +// assert!(cast(&mp_arr, &p_type.into()).is_err()); +// } + +// #[test] +// fn downcast_multi_line_strings_to_line_strings() { +// let geoms = geoarrow_test::raw::multilinestring::xy::geoms(); +// let single = geoms[0].clone().unwrap(); + +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()); +// let mp_arr = MultiLineStringBuilder::from_multi_line_strings( +// &[single.clone(), single.clone(), single], +// typ, +// ) +// .finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = LineStringType::new(dim, metadata).with_coord_type(coord_type); +// let p_arr = cast(&mp_arr, &p_type.into()).unwrap(); +// assert!(p_arr.as_line_string_opt().is_some()); +// } + +// #[test] +// fn downcast_multi_line_strings_to_line_strings_fails() { +// let geoms = geoarrow_test::raw::multilinestring::xy::geoms(); +// let single = geoms[0].clone().unwrap(); +// let multi = geoms[1].clone().unwrap(); + +// let typ = MultiLineStringType::new(Dimension::XY, Default::default()); +// let mp_arr = +// MultiLineStringBuilder::from_multi_line_strings(&[single.clone(), single, multi], typ) +// .finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = LineStringType::new(dim, metadata).with_coord_type(coord_type); +// assert!(cast(&mp_arr, &p_type.into()).is_err()); +// } + +// #[test] +// fn downcast_multi_polygons_to_polygons() { +// let geoms = geoarrow_test::raw::multipolygon::xy::geoms(); +// let single = geoms[0].clone().unwrap(); + +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()); +// let mp_arr = MultiPolygonBuilder::from_multi_polygons( +// &[single.clone(), single.clone(), single], +// typ, +// ) +// .finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = PolygonType::new(dim, metadata).with_coord_type(coord_type); +// let p_arr = cast(&mp_arr, &p_type.into()).unwrap(); +// assert!(p_arr.as_polygon_opt().is_some()); +// } + +// #[test] +// fn downcast_multi_polygons_to_polygons_fails() { +// let geoms = geoarrow_test::raw::multipolygon::xy::geoms(); +// let single = geoms[0].clone().unwrap(); +// let multi = geoms[1].clone().unwrap(); + +// let typ = MultiPolygonType::new(Dimension::XY, Default::default()); +// let mp_arr = +// MultiPolygonBuilder::from_multi_polygons(&[single.clone(), single, multi], typ) +// .finish(); +// let (coord_type, dim, metadata) = mp_arr.extension_type().clone().into_inner(); +// let p_type = PolygonType::new(dim, metadata).with_coord_type(coord_type); +// assert!(cast(&mp_arr, &p_type.into()).is_err()); +// } + +// #[test] +// fn downcast_geometry_to_point() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::point::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_point(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_line_string() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::linestring::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_line_string(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_polygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::polygon::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_polygon(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_multi_point() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multipoint::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_multi_point(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_multi_line_string() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multilinestring::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_multi_line_string(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_multi_polygon() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multipolygon::array(coord_type, dim); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_multi_polygon(), &array); +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_geometry_collection() { +// for coord_type in [CoordType::Interleaved, CoordType::Separated] { +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// for prefer_multi in [false, true] { +// let array = test::geometrycollection::array(coord_type, dim, prefer_multi); +// let orig_type = array.data_type().clone(); +// let g_array = GeometryArray::from(array.clone()); + +// let casted = cast(&g_array, &orig_type).unwrap(); +// assert_eq!(casted.as_geometry_collection(), &array); +// } +// } +// } +// } + +// #[test] +// fn downcast_geometry_to_point_fails() { +// let array = test::geometry::array(Default::default(), false); +// let point_type = PointType::new(Dimension::XY, Default::default()); +// assert!(cast(&array, &point_type.into()).is_err()); +// } +// } diff --git a/src/geoarrow/geoarrow-cast/src/downcast.rs b/src/geoarrow/geoarrow-cast/src/downcast.rs new file mode 100644 index 0000000000..2a96135340 --- /dev/null +++ b/src/geoarrow/geoarrow-cast/src/downcast.rs @@ -0,0 +1,687 @@ +//! Utilities for inferring native geometry types from arbitrary GeoArrow input. + +use std::collections::HashSet; + +use arrow_schema::ArrowError; +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, MultiLineStringTrait, MultiPointTrait, + MultiPolygonTrait, +}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, cast::AsGeoArrowArray}; +use geoarrow_schema::{ + Dimension, GeoArrowType, + error::{GeoArrowError, GeoArrowResult}, +}; + +/// Infer the simplest, most-compact native geometry type from the provided arrays, if any. +/// +/// This accepts an [Iterator] of [`GeoArrowArray`] because it's important to have schema stability +/// across batches of a chunked GeoArrow array. You don't want to separately downcast different +/// batches because they could have different mixtures of geometry types. +/// +/// A return value of `None` means that there is no common native type (other than `Geometry`) to +/// downcast to. So your input data can be represented as a `GeometryArray` or as a serialized +/// array. +/// +/// After inferring a common type, use [`cast`][crate::cast::cast] to cast input to a specific +/// output type. +/// +/// ## Examples +/// +/// Let's say we have a WKB array with unknown data. We can use `infer_downcast_type` to find the +/// simplest geometry type that fits our data. +/// +/// ``` +/// # use geoarrow_schema::GeoArrowType; +/// # use geoarrow_array::builder::WkbBuilder; +/// use geoarrow_cast::cast::cast; +/// use geoarrow_cast::downcast::{NativeType, infer_downcast_type}; +/// # use geoarrow_schema::{Dimension, PointType}; +/// use wkt::wkt; +/// +/// let mut builder = WkbBuilder::::new(Default::default()); +/// +/// builder.push_geometry(Some(&wkt!(POINT (0. 1.)))); +/// builder.push_geometry(Some(&wkt!(POINT (2. 3.)))); +/// builder.push_geometry(Some(&wkt!(POINT (4. 5.)))); +/// +/// let wkb_array = builder.finish(); +/// +/// let (native_type, dim) = infer_downcast_type(std::iter::once(&wkb_array as _)) +/// .unwrap() +/// .unwrap(); +/// assert_eq!(native_type, NativeType::Point); +/// assert_eq!(dim, Dimension::XY); +/// +/// let point_type = PointType::new(Dimension::XY, Default::default()); +/// cast(&wkb_array, &GeoArrowType::Point(point_type)).unwrap(); +/// ``` +/// +/// However, if you have geometry types in your array that aren't compatible with a single GeoArrow +/// native type, you'll get `None` back from `infer_downcast_type`. +/// +/// ``` +/// # use geoarrow_array::builder::WkbBuilder; +/// use geoarrow_cast::downcast::infer_downcast_type; +/// # use geoarrow_schema::WkbType; +/// use wkt::wkt; +/// +/// let wkb_type = WkbType::new(Default::default()); +/// let mut builder = WkbBuilder::::new(wkb_type); +/// +/// builder.push_geometry(Some(&wkt!(POINT (0. 1.)))); +/// builder.push_geometry(Some(&wkt!(LINESTRING (2. 3., 4. 5.)))); +/// +/// let wkb_array = builder.finish(); +/// +/// assert_eq!( +/// infer_downcast_type(std::iter::once(&wkb_array as _)).unwrap(), +/// None +/// ); +/// ``` +/// +pub fn infer_downcast_type<'a>( + arrays: impl Iterator, +) -> GeoArrowResult> { + let mut type_ids = HashSet::new(); + for array in arrays { + let type_id = get_type_ids(array)?; + type_ids.extend(type_id); + } + + if type_ids.is_empty() { + return Err(ArrowError::CastError( + "Empty iterator of arrays passed to infer_downcast_type".to_string(), + ) + .into()); + } + + infer_from_native_type_and_dimension(type_ids) +} + +/// Get GeoArrow type ids from an array +fn get_type_ids(array: &dyn GeoArrowArray) -> GeoArrowResult> { + use GeoArrowType::*; + let type_ids: HashSet = match array.data_type() { + Point(typ) => [NativeTypeAndDimension::new( + NativeType::Point, + typ.dimension(), + )] + .into_iter() + .collect(), + LineString(typ) => [NativeTypeAndDimension::new( + NativeType::LineString, + typ.dimension(), + )] + .into_iter() + .collect(), + Polygon(typ) => [NativeTypeAndDimension::new( + NativeType::Polygon, + typ.dimension(), + )] + .into_iter() + .collect(), + MultiPoint(typ) => { + let dim = typ.dimension(); + let array = array.as_multi_point(); + array + .iter() + .flatten() + .map(|multi_point| { + let geom_type = if multi_point?.num_points() >= 2 { + NativeTypeAndDimension::new(NativeType::MultiPoint, dim) + } else { + NativeTypeAndDimension::new(NativeType::Point, dim) + }; + Ok::<_, GeoArrowError>(geom_type) + }) + .collect::>>()? + } + MultiLineString(typ) => { + let dim = typ.dimension(); + let array = array.as_multi_line_string(); + array + .iter() + .flatten() + .map(|multi_line_string| { + let geom_type = if multi_line_string?.num_line_strings() >= 2 { + NativeTypeAndDimension::new(NativeType::MultiLineString, dim) + } else { + NativeTypeAndDimension::new(NativeType::LineString, dim) + }; + Ok::<_, GeoArrowError>(geom_type) + }) + .collect::>>()? + } + MultiPolygon(typ) => { + let dim = typ.dimension(); + let array = array.as_multi_polygon(); + array + .iter() + .flatten() + .map(|multi_polygon| { + let geom_type = if multi_polygon?.num_polygons() >= 2 { + NativeTypeAndDimension::new(NativeType::MultiPolygon, dim) + } else { + NativeTypeAndDimension::new(NativeType::Polygon, dim) + }; + Ok::<_, GeoArrowError>(geom_type) + }) + .collect::>>()? + } + GeometryCollection(typ) => { + let dim = typ.dimension(); + let array = array.as_geometry_collection(); + array + .iter() + .flatten() + .map(|geometry_collection| { + let geometry_collection = geometry_collection?; + let geom_type = if geometry_collection.num_geometries() == 1 { + let geom_type = NativeType::from_geometry_trait( + &geometry_collection.geometry(0).unwrap(), + ); + NativeTypeAndDimension::new(geom_type, dim) + } else { + NativeTypeAndDimension::new(NativeType::GeometryCollection, dim) + }; + Ok::<_, GeoArrowError>(geom_type) + }) + .collect::>>()? + } + Rect(typ) => [NativeTypeAndDimension::new( + NativeType::Rect, + typ.dimension(), + )] + .into_iter() + .collect(), + Geometry(_) => { + let type_ids: HashSet = + HashSet::from_iter(array.as_geometry().type_ids().iter().copied()); + type_ids + .into_iter() + .map(NativeTypeAndDimension::from_type_id) + .collect() + } + Wkb(_) => array + .as_wkb::() + .iter() + .flatten() + .map(|wkb| { + let wkb = wkb?; + let dim = wkb.dim().try_into()?; + let geom_type = NativeType::from_geometry_trait(&wkb); + Ok(NativeTypeAndDimension::new(geom_type, dim)) + }) + .collect::>>()?, + LargeWkb(_) => array + .as_wkb::() + .iter() + .flatten() + .map(|wkb| { + let wkb = wkb?; + let dim = wkb.dim().try_into()?; + let geom_type = NativeType::from_geometry_trait(&wkb); + Ok(NativeTypeAndDimension::new(geom_type, dim)) + }) + .collect::>>()?, + WkbView(_) => array + .as_wkb_view() + .iter() + .flatten() + .map(|wkb| { + let wkb = wkb?; + let dim = wkb.dim().try_into()?; + let geom_type = NativeType::from_geometry_trait(&wkb); + Ok(NativeTypeAndDimension::new(geom_type, dim)) + }) + .collect::>>()?, + Wkt(_) => array + .as_wkt::() + .inner() + .iter() + .flatten() + .map(|s| { + let (wkt_type, wkt_dim) = wkt::infer_type(s).map_err(ArrowError::CastError)?; + let geom_type = + NativeTypeAndDimension::new(wkt_type.into(), wkt_dim_to_geoarrow_dim(wkt_dim)); + Ok(geom_type) + }) + .collect::>>()?, + LargeWkt(_) => array + .as_wkt::() + .inner() + .iter() + .flatten() + .map(|s| { + let (wkt_type, wkt_dim) = wkt::infer_type(s).map_err(ArrowError::CastError)?; + let geom_type = + NativeTypeAndDimension::new(wkt_type.into(), wkt_dim_to_geoarrow_dim(wkt_dim)); + Ok(geom_type) + }) + .collect::>>()?, + WktView(_) => array + .as_wkt_view() + .inner() + .iter() + .flatten() + .map(|s| { + let (wkt_type, wkt_dim) = wkt::infer_type(s).map_err(ArrowError::CastError)?; + let geom_type = + NativeTypeAndDimension::new(wkt_type.into(), wkt_dim_to_geoarrow_dim(wkt_dim)); + Ok(geom_type) + }) + .collect::>>()?, + }; + Ok(type_ids) +} + +fn wkt_dim_to_geoarrow_dim(wkt_dim: wkt::types::Dimension) -> Dimension { + match wkt_dim { + wkt::types::Dimension::XY => Dimension::XY, + wkt::types::Dimension::XYZ => Dimension::XYZ, + wkt::types::Dimension::XYM => Dimension::XYM, + wkt::types::Dimension::XYZM => Dimension::XYZM, + } +} + +fn infer_from_native_type_and_dimension( + type_ids: HashSet, +) -> GeoArrowResult> { + // Easy, if there's only one type, return that + if type_ids.len() == 1 { + let type_id = type_ids.into_iter().next().unwrap(); + return Ok(Some((type_id.geometry_type, type_id.dim))); + } + + // If there are multiple dimensions, we can't cast to a single type + let (dims, native_types): (HashSet<_>, HashSet<_>) = type_ids + .iter() + .map(|type_id| (type_id.dim, type_id.geometry_type)) + .unzip(); + if dims.len() > 1 { + return Ok(None); + } + let dim = dims.into_iter().next().unwrap(); + + if native_types.len() == 2 { + if native_types.contains(&NativeType::Point) + && native_types.contains(&NativeType::MultiPoint) + { + return Ok(Some((NativeType::MultiPoint, dim))); + } + + if native_types.contains(&NativeType::LineString) + && native_types.contains(&NativeType::MultiLineString) + { + return Ok(Some((NativeType::MultiLineString, dim))); + } + + if native_types.contains(&NativeType::Polygon) + && native_types.contains(&NativeType::MultiPolygon) + { + return Ok(Some((NativeType::MultiPolygon, dim))); + } + } + + Ok(None) +} + +/// An enum representing the different native GeoArrow geometry types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum NativeType { + #[allow(missing_docs)] + Point, + #[allow(missing_docs)] + LineString, + #[allow(missing_docs)] + Polygon, + #[allow(missing_docs)] + MultiPoint, + #[allow(missing_docs)] + MultiLineString, + #[allow(missing_docs)] + MultiPolygon, + #[allow(missing_docs)] + GeometryCollection, + #[allow(missing_docs)] + Rect, +} + +impl NativeType { + fn from_geometry_trait(geometry: &impl GeometryTrait) -> Self { + match geometry.as_type() { + geo_traits::GeometryType::Point(_) => Self::Point, + geo_traits::GeometryType::LineString(_) => Self::LineString, + geo_traits::GeometryType::Polygon(_) => Self::Polygon, + geo_traits::GeometryType::MultiPoint(_) => Self::MultiPoint, + geo_traits::GeometryType::MultiLineString(_) => Self::MultiLineString, + geo_traits::GeometryType::MultiPolygon(_) => Self::MultiPolygon, + geo_traits::GeometryType::GeometryCollection(_) => Self::GeometryCollection, + _ => panic!("Unsupported geometry type"), + } + } +} + +impl From for NativeType { + fn from(value: wkt::types::GeometryType) -> Self { + match value { + wkt::types::GeometryType::Point => Self::Point, + wkt::types::GeometryType::LineString => Self::LineString, + wkt::types::GeometryType::Polygon => Self::Polygon, + wkt::types::GeometryType::MultiPoint => Self::MultiPoint, + wkt::types::GeometryType::MultiLineString => Self::MultiLineString, + wkt::types::GeometryType::MultiPolygon => Self::MultiPolygon, + wkt::types::GeometryType::GeometryCollection => Self::GeometryCollection, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct NativeTypeAndDimension { + geometry_type: NativeType, + dim: Dimension, +} + +impl NativeTypeAndDimension { + fn new(geometry_type: NativeType, dim: Dimension) -> Self { + Self { geometry_type, dim } + } + + fn from_type_id(type_id: i8) -> Self { + let dim = match type_id / 10 { + 0 => Dimension::XY, + 1 => Dimension::XYZ, + 2 => Dimension::XYM, + 3 => Dimension::XYZM, + _ => panic!("unsupported type_id: {type_id}"), + }; + let geometry_type = match type_id % 10 { + 1 => NativeType::Point, + 2 => NativeType::LineString, + 3 => NativeType::Polygon, + 4 => NativeType::MultiPoint, + 5 => NativeType::MultiLineString, + 6 => NativeType::MultiPolygon, + 7 => NativeType::GeometryCollection, + _ => panic!("unsupported type id"), + }; + Self { geometry_type, dim } + } +} + +impl From<(NativeType, Dimension)> for NativeTypeAndDimension { + fn from(value: (NativeType, Dimension)) -> Self { + Self::new(value.0, value.1) + } +} + +// #[cfg(test)] +// mod test { +// use geoarrow_array::{ +// cast::{to_wkb, to_wkt}, +// test, +// }; +// use geoarrow_schema::CoordType; + +// use super::*; + +// #[test] +// fn infer_get_type_ids_point() { +// // Point +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::point::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([NativeTypeAndDimension::new(NativeType::Point, dim)]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_linestring() { +// // LineString +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::linestring::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([NativeTypeAndDimension::new(NativeType::LineString, dim)]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_polygon() { +// // Polygon +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::polygon::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([NativeTypeAndDimension::new(NativeType::Polygon, dim)]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_multipoint() { +// // MultiPoint +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multipoint::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([ +// NativeTypeAndDimension::new(NativeType::Point, dim), +// NativeTypeAndDimension::new(NativeType::MultiPoint, dim), +// ]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_multilinestring() { +// // MultiLineString +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multilinestring::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([ +// NativeTypeAndDimension::new(NativeType::LineString, dim), +// NativeTypeAndDimension::new(NativeType::MultiLineString, dim), +// ]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_multipolygon() { +// // MultiPolygon +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::multipolygon::array(CoordType::Interleaved, dim); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([ +// NativeTypeAndDimension::new(NativeType::Polygon, dim), +// NativeTypeAndDimension::new(NativeType::MultiPolygon, dim), +// ]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_geometrycollection() { +// // GeometryCollection +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// let array = test::geometrycollection::array(CoordType::Interleaved, dim, false); +// assert_eq!( +// get_type_ids(&array).unwrap(), +// HashSet::from_iter([ +// NativeTypeAndDimension::new(NativeType::Point, dim), +// NativeTypeAndDimension::new(NativeType::LineString, dim), +// NativeTypeAndDimension::new(NativeType::Polygon, dim), +// NativeTypeAndDimension::new(NativeType::MultiPoint, dim), +// NativeTypeAndDimension::new(NativeType::MultiLineString, dim), +// NativeTypeAndDimension::new(NativeType::MultiPolygon, dim), +// NativeTypeAndDimension::new(NativeType::GeometryCollection, dim), +// ]) +// ); +// } +// } + +// #[test] +// fn infer_get_type_ids_geometry_wkb_wkt() { +// let array = test::geometry::array(CoordType::Interleaved, false); +// let wkb_array = to_wkb::(&array).unwrap(); +// let large_wkb_array = to_wkb::(&array).unwrap(); +// let wkt_array = to_wkt::(&array).unwrap(); +// let large_wkt_array = to_wkt::(&array).unwrap(); + +// let mut expected_types = HashSet::new(); +// for dim in [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ] { +// expected_types.insert(NativeTypeAndDimension::new(NativeType::Point, dim)); +// expected_types.insert(NativeTypeAndDimension::new(NativeType::LineString, dim)); +// expected_types.insert(NativeTypeAndDimension::new(NativeType::Polygon, dim)); +// expected_types.insert(NativeTypeAndDimension::new(NativeType::MultiPoint, dim)); +// expected_types.insert(NativeTypeAndDimension::new( +// NativeType::MultiLineString, +// dim, +// )); +// expected_types.insert(NativeTypeAndDimension::new(NativeType::MultiPolygon, dim)); +// expected_types.insert(NativeTypeAndDimension::new( +// NativeType::GeometryCollection, +// dim, +// )); +// } + +// assert_eq!(get_type_ids(&array).unwrap(), expected_types); +// assert_eq!(get_type_ids(&wkb_array).unwrap(), expected_types); +// assert_eq!(get_type_ids(&large_wkb_array).unwrap(), expected_types); +// assert_eq!(get_type_ids(&wkt_array).unwrap(), expected_types); +// assert_eq!(get_type_ids(&large_wkt_array).unwrap(), expected_types); +// } + +// #[test] +// fn infer_from_one_type() { +// let input_type = NativeTypeAndDimension::new(NativeType::Point, Dimension::XY); +// let type_ids = [input_type].into_iter().collect::>(); +// let resolved_type = infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .unwrap(); +// assert_eq!(input_type, resolved_type.into()); +// } + +// #[test] +// fn cant_infer_from_two_dims() { +// let input_types = [ +// NativeTypeAndDimension::new(NativeType::Point, Dimension::XY), +// NativeTypeAndDimension::new(NativeType::Point, Dimension::XYZ), +// ]; +// let type_ids = input_types.into_iter().collect::>(); +// assert!( +// infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .is_none() +// ); +// } + +// #[test] +// fn infer_point_multi_point() { +// let input_types = [ +// NativeTypeAndDimension::new(NativeType::Point, Dimension::XYZ), +// NativeTypeAndDimension::new(NativeType::MultiPoint, Dimension::XYZ), +// ]; +// let type_ids = input_types.into_iter().collect::>(); +// let resolved_type = infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .unwrap(); +// assert_eq!( +// NativeTypeAndDimension::new(NativeType::MultiPoint, Dimension::XYZ), +// resolved_type.into() +// ); +// } + +// #[test] +// fn infer_linestring_multilinestring() { +// let input_types = [ +// NativeTypeAndDimension::new(NativeType::LineString, Dimension::XYM), +// NativeTypeAndDimension::new(NativeType::MultiLineString, Dimension::XYM), +// ]; +// let type_ids = input_types.into_iter().collect::>(); +// let resolved_type = infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .unwrap(); +// assert_eq!( +// NativeTypeAndDimension::new(NativeType::MultiLineString, Dimension::XYM), +// resolved_type.into() +// ); +// } + +// #[test] +// fn infer_polygon_multipolygon() { +// let input_types = [ +// NativeTypeAndDimension::new(NativeType::Polygon, Dimension::XYZM), +// NativeTypeAndDimension::new(NativeType::MultiPolygon, Dimension::XYZM), +// ]; +// let type_ids = input_types.into_iter().collect::>(); +// let resolved_type = infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .unwrap(); +// assert_eq!( +// NativeTypeAndDimension::new(NativeType::MultiPolygon, Dimension::XYZM), +// resolved_type.into() +// ); +// } + +// #[test] +// fn unable_to_infer() { +// let input_types = [ +// NativeTypeAndDimension::new(NativeType::Point, Dimension::XY), +// NativeTypeAndDimension::new(NativeType::LineString, Dimension::XY), +// ]; +// let type_ids = input_types.into_iter().collect::>(); +// assert!( +// infer_from_native_type_and_dimension(type_ids) +// .unwrap() +// .is_none() +// ); +// } +// } diff --git a/src/geoarrow/geoarrow-cast/src/lib.rs b/src/geoarrow/geoarrow-cast/src/lib.rs new file mode 100644 index 0000000000..f7e315c8d9 --- /dev/null +++ b/src/geoarrow/geoarrow-cast/src/lib.rs @@ -0,0 +1,2 @@ +pub mod cast; +pub mod downcast; diff --git a/src/geoarrow/geoarrow-expr-geo/Cargo.toml b/src/geoarrow/geoarrow-expr-geo/Cargo.toml new file mode 100644 index 0000000000..361c92a4c6 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "geoarrow-expr-geo" +version = "0.7.0" +authors = ["Kyle Barron "] +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/geoarrow/geoarrow-rs" +description = "Rust implementation of GeoArrow" + +[dependencies] +arrow-array = {workspace = true} +arrow-buffer = {workspace = true} +geo = {workspace = true} +geo-traits = {workspace = true} +geoarrow-array = {workspace = true} +geoarrow-schema = {workspace = true} + +[dev-dependencies] +geo = {workspace = true} +geoarrow-array = {workspace = true} diff --git a/src/geoarrow/geoarrow-expr-geo/src/affine_ops.rs b/src/geoarrow/geoarrow-expr-geo/src/affine_ops.rs new file mode 100644 index 0000000000..3158834c64 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/affine_ops.rs @@ -0,0 +1,36 @@ +use std::sync::Arc; + +use geo::AffineOps; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, builder::GeometryBuilder, downcast_geoarrow_array, +}; +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn affine_transform( + array: &dyn GeoArrowArray, + transform: &geo::AffineTransform, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _affine_transform_impl, transform) +} + +fn _affine_transform_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + transform: &geo::AffineTransform, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let mut geo_geom = geometry_to_geo(&geom?)?; + geo_geom.affine_transform_mut(transform); + builder.push_geometry(Some(&geo_geom))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/area.rs b/src/geoarrow/geoarrow-expr-geo/src/area.rs new file mode 100644 index 0000000000..ab50d691fb --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/area.rs @@ -0,0 +1,92 @@ +use arrow_array::{Float64Array, builder::Float64Builder}; +use arrow_buffer::NullBuffer; +use geo::Area; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::{GeoArrowType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn unsigned_area(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _unsigned_area_impl) +} + +pub fn signed_area(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _signed_area_impl) +} + +fn _zeros(len: usize, nulls: Option) -> Float64Array { + let values = vec![0.0f64; len]; + Float64Array::new(values.into(), nulls) +} + +fn _unsigned_area_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _area_impl(array, Area::unsigned_area), + } +} + +fn _signed_area_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _area_impl(array, Area::signed_area), + } +} + +fn _area_impl<'a, F: Fn(&geo::Geometry) -> f64>( + array: &'a impl GeoArrowArrayAccessor<'a>, + area_fn: F, +) -> GeoArrowResult { + let mut builder = Float64Builder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + builder.append_value(area_fn(&geo_geom)); + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} + +// #[cfg(test)] +// mod test { +// use arrow_array::create_array; +// use geoarrow_schema::{CoordType, Dimension}; + +// use super::*; + +// #[test] +// fn area_zero() { +// let geo_arr = geoarrow_array::test::point::array(CoordType::Interleaved, Dimension::XY); +// let signed = signed_area(&geo_arr).unwrap(); +// let unsigned = unsigned_area(&geo_arr).unwrap(); + +// let expected = create_array!(Float64, [Some(0.0), Some(0.0), None, Some(0.0)]); +// assert_eq!(&signed, expected.as_ref()); +// assert_eq!(&unsigned, expected.as_ref()); +// } + +// #[test] +// fn area_polygon() { +// let geo_arr = geoarrow_array::test::polygon::array(CoordType::Separated, Dimension::XY); +// let signed = signed_area(&geo_arr).unwrap(); +// let unsigned = unsigned_area(&geo_arr).unwrap(); + +// let expected = create_array!(Float64, [Some(550.0), Some(675.0), None, Some(0.0)]); +// assert_eq!(&signed, expected.as_ref()); +// assert_eq!(&unsigned, expected.as_ref()); +// } +// } diff --git a/src/geoarrow/geoarrow-expr-geo/src/bounding_rect.rs b/src/geoarrow/geoarrow-expr-geo/src/bounding_rect.rs new file mode 100644 index 0000000000..b98b084ab2 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/bounding_rect.rs @@ -0,0 +1,29 @@ +use geo::BoundingRect; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::RectArray, builder::RectBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{BoxType, Dimension, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn bounding_rect(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _bounding_rect_impl) +} + +fn _bounding_rect_impl<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { + let typ = BoxType::new(Dimension::XY, array.data_type().metadata().clone()); + let mut builder = RectBuilder::with_capacity(typ, array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let rect = geo_geom.bounding_rect(); + builder.push_rect(rect.as_ref()); + } else { + builder.push_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/center.rs b/src/geoarrow/geoarrow-expr-geo/src/center.rs new file mode 100644 index 0000000000..73febd59fe --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/center.rs @@ -0,0 +1,35 @@ +use geo::BoundingRect; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PointArray, builder::PointBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PointType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn center(array: &dyn GeoArrowArray, coord_type: CoordType) -> GeoArrowResult { + downcast_geoarrow_array!(array, _center_impl, coord_type) +} + +fn _center_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PointType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PointBuilder::with_capacity(typ, array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let center_point = geo_geom + .bounding_rect() + .map(|rect| geo::Point::from(rect.center())); + builder.push_point(center_point.as_ref()); + } else { + builder.push_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/centroid.rs b/src/geoarrow/geoarrow-expr-geo/src/centroid.rs new file mode 100644 index 0000000000..b7cb261245 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/centroid.rs @@ -0,0 +1,33 @@ +use geo::Centroid; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PointArray, builder::PointBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PointType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn centroid(array: &dyn GeoArrowArray, coord_type: CoordType) -> GeoArrowResult { + downcast_geoarrow_array!(array, _centroid_impl, coord_type) +} + +fn _centroid_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PointType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PointBuilder::with_capacity(typ, array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let centroid = geo_geom.centroid(); + builder.push_point(centroid.as_ref()); + } else { + builder.push_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/chaikin_smoothing.rs b/src/geoarrow/geoarrow-expr-geo/src/chaikin_smoothing.rs new file mode 100644 index 0000000000..a4b418a487 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/chaikin_smoothing.rs @@ -0,0 +1,142 @@ +use std::sync::Arc; + +use geo::ChaikinSmoothing; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn chaikin_smoothing( + array: &dyn GeoArrowArray, + n_iterations: usize, +) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => _chaikin_linestring(array.as_line_string(), n_iterations), + Polygon(_) => _chaikin_polygon(array.as_polygon(), n_iterations), + MultiLineString(_) => _chaikin_multi_linestring(array.as_multi_line_string(), n_iterations), + MultiPolygon(_) => _chaikin_multi_polygon(array.as_multi_polygon(), n_iterations), + _ => downcast_geoarrow_array!(array, _chaikin_geometry_impl, n_iterations), + } +} + +fn _chaikin_linestring( + array: &LineStringArray, + n_iterations: usize, +) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&geo_geom.chaikin_smoothing(n_iterations)))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _chaikin_polygon( + array: &PolygonArray, + n_iterations: usize, +) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&geo_geom.chaikin_smoothing(n_iterations)))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _chaikin_multi_linestring( + array: &MultiLineStringArray, + n_iterations: usize, +) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&geo_geom.chaikin_smoothing(n_iterations)))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _chaikin_multi_polygon( + array: &MultiPolygonArray, + n_iterations: usize, +) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&geo_geom.chaikin_smoothing(n_iterations)))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _chaikin_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + n_iterations: usize, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let smoothed = _chaikin_geometry(&geo_geom, n_iterations); + builder.push_geometry(Some(&smoothed))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _chaikin_geometry(geom: &geo::Geometry, n_iterations: usize) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => { + geo::Geometry::LineString(g.chaikin_smoothing(n_iterations)) + } + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.chaikin_smoothing(n_iterations)), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.chaikin_smoothing(n_iterations)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(g.chaikin_smoothing(n_iterations)) + } + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/chamberlain_duquette_area.rs b/src/geoarrow/geoarrow-expr-geo/src/chamberlain_duquette_area.rs new file mode 100644 index 0000000000..3f3c0089ed --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/chamberlain_duquette_area.rs @@ -0,0 +1,70 @@ +use arrow_array::{Float64Array, builder::Float64Builder}; +use arrow_buffer::NullBuffer; +use geo::ChamberlainDuquetteArea; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::{GeoArrowType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn chamberlain_duquette_unsigned_area( + array: &dyn GeoArrowArray, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, _cd_unsigned_area_impl) +} + +pub fn chamberlain_duquette_signed_area(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _cd_signed_area_impl) +} + +fn _zeros(len: usize, nulls: Option) -> Float64Array { + let values = vec![0.0f64; len]; + Float64Array::new(values.into(), nulls) +} + +fn _cd_unsigned_area_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _cd_area_impl( + array, + ChamberlainDuquetteArea::chamberlain_duquette_unsigned_area, + ), + } +} + +fn _cd_signed_area_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _cd_area_impl( + array, + ChamberlainDuquetteArea::chamberlain_duquette_signed_area, + ), + } +} + +fn _cd_area_impl<'a, F: Fn(&geo::Geometry) -> f64>( + array: &'a impl GeoArrowArrayAccessor<'a>, + area_fn: F, +) -> GeoArrowResult { + let mut builder = Float64Builder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + builder.append_value(area_fn(&geo_geom)); + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/concave_hull.rs b/src/geoarrow/geoarrow-expr-geo/src/concave_hull.rs new file mode 100644 index 0000000000..d13b8445bd --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/concave_hull.rs @@ -0,0 +1,45 @@ +use geo::{ConcaveHull, ConvexHull}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PolygonArray, builder::PolygonBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PolygonType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn concave_hull( + array: &dyn GeoArrowArray, + concavity: f64, + coord_type: CoordType, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, _concave_hull_impl, concavity, coord_type) +} + +fn _concave_hull_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + concavity: f64, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PolygonType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PolygonBuilder::new(typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let poly = match &geo_geom { + geo::Geometry::Polygon(g) => g.concave_hull(concavity), + geo::Geometry::MultiPolygon(g) => g.concave_hull(concavity), + geo::Geometry::LineString(g) => g.concave_hull(concavity), + geo::Geometry::MultiLineString(g) => g.concave_hull(concavity), + geo::Geometry::MultiPoint(g) => g.concave_hull(concavity), + _ => geo_geom.convex_hull(), + }; + builder.push_polygon(Some(&poly))?; + } else { + builder.push_polygon(None::.as_ref())?; + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/contains.rs b/src/geoarrow/geoarrow-expr-geo/src/contains.rs new file mode 100644 index 0000000000..4eee91ae11 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/contains.rs @@ -0,0 +1,141 @@ +use arrow_array::BooleanArray; +use geo::contains::Contains; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn contains( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _contains_impl) + } +} + +fn _contains_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = BooleanArray::builder(left_array.len()); + + for (canidate_left, canidate_right) in left_array.iter().zip(right_array.iter()) { + match (canidate_left, canidate_right) { + (Some(left), Some(right)) => { + let left_geom = geometry_to_geo(&left?)?; + let right_geom = geometry_to_geo(&right?)?; + let result = left_geom.contains(&right_geom); + builder.append_value(result); + } + (_, _) => { + builder.append_null(); + } + } + } + Ok(builder.finish()) +} + +// #[cfg(test)] +// mod tests { +// use geo::{Geometry, line_string, point, polygon}; +// use geoarrow_array::builder::GeometryBuilder; +// use geoarrow_schema::{CoordType, GeometryType}; + +// use super::*; + +// #[test] +// fn test_contains() { +// let test_pairs = [ +// //Right is contained in left +// vec![ +// Some(Geometry::from(polygon![ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 1.0), +// (x: 2.0, y: 2.0), +// (x: 1.0, y: 2.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 1.5, y: 1.5), +// (x: 1.75, y: 1.5), +// (x: 1.75, y: 1.75), +// (x: 1.5, y: 1.75) +// ])), +// ], +// //Right is not contained in left +// vec![ +// Some(Geometry::from(polygon![ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 1.0), +// (x: 2.0, y: 2.0), +// (x: 1.0, y: 2.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 4.5, y: 4.5), +// (x: 5.5, y: 4.5), +// (x: 5.5, y: 5.5), +// (x: 3.5, y: 5.5), +// ])), +// ], +// //Mixed geometry +// vec![ +// Some(Geometry::from(line_string![ +// (x: 0., y: 0.), +// (x: 2., y: 0.), +// (x: 2., y: 2.), +// (x: 0., y: 2.), +// (x: 0., y: 0.), +// ])), +// Some(Geometry::from(point!(x: 2., y: 0.))), +// ], +// ]; + +// let geoms_left = test_pairs +// .iter() +// .map(|pair| pair[0].clone()) +// .collect::>(); +// let geoms_right = test_pairs +// .iter() +// .map(|pair| pair[1].clone()) +// .collect::>(); + +// let typ = GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved); +// let left_array = GeometryBuilder::from_nullable_geometries(&geoms_left, typ.clone()) +// .unwrap() +// .finish(); +// let right_array = GeometryBuilder::from_nullable_geometries(&geoms_right, typ) +// .unwrap() +// .finish(); + +// let result = contains(&left_array, &right_array).unwrap(); +// let expected = BooleanArray::from(vec![Some(true), Some(false), Some(true)]); + +// assert_eq!(result, expected, "Contains test failed"); +// } + +// #[test] +// #[should_panic(expected = "Arrays must have the same length")] +// fn test_contains_length_mismatch() { +// let geoms_left = vec![Some(Geometry::from(polygon![ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 1.0), +// (x: 2.0, y: 2.0), +// (x: 1.0, y: 2.0) +// ]))]; +// let geoms_right: Vec> = vec![]; + +// let typ = GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved); +// let left_array = GeometryBuilder::from_nullable_geometries(&geoms_left, typ.clone()) +// .unwrap() +// .finish(); +// let right_array = GeometryBuilder::from_nullable_geometries(&geoms_right, typ) +// .unwrap() +// .finish(); + +// contains(&left_array, &right_array).unwrap(); +// } +// } diff --git a/src/geoarrow/geoarrow-expr-geo/src/convex_hull.rs b/src/geoarrow/geoarrow-expr-geo/src/convex_hull.rs new file mode 100644 index 0000000000..c08e2f8280 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/convex_hull.rs @@ -0,0 +1,36 @@ +use geo::ConvexHull; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PolygonArray, builder::PolygonBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PolygonType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn convex_hull( + array: &dyn GeoArrowArray, + coord_type: CoordType, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, convex_hull_impl, coord_type) +} + +fn convex_hull_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PolygonType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PolygonBuilder::new(typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let poly = geo_geom.convex_hull(); + builder.push_polygon(Some(&poly))?; + } else { + builder.push_polygon(None::.as_ref())?; + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/densify.rs b/src/geoarrow/geoarrow-expr-geo/src/densify.rs new file mode 100644 index 0000000000..0348a8e5aa --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/densify.rs @@ -0,0 +1,142 @@ +use std::sync::Arc; + +use geo::{Densify, Euclidean}; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn densify( + array: &dyn GeoArrowArray, + max_distance: f64, +) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => _densify_linestring(array.as_line_string(), max_distance), + Polygon(_) => _densify_polygon(array.as_polygon(), max_distance), + MultiLineString(_) => _densify_multi_linestring(array.as_multi_line_string(), max_distance), + MultiPolygon(_) => _densify_multi_polygon(array.as_multi_polygon(), max_distance), + _ => downcast_geoarrow_array!(array, _densify_geometry_impl, max_distance), + } +} + +fn _densify_linestring( + array: &LineStringArray, + max_distance: f64, +) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&Euclidean.densify(&geo_geom, max_distance)))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _densify_polygon( + array: &PolygonArray, + max_distance: f64, +) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&Euclidean.densify(&geo_geom, max_distance)))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _densify_multi_linestring( + array: &MultiLineStringArray, + max_distance: f64, +) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&Euclidean.densify(&geo_geom, max_distance)))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _densify_multi_polygon( + array: &MultiPolygonArray, + max_distance: f64, +) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&Euclidean.densify(&geo_geom, max_distance)))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _densify_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + max_distance: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let densified = _densify_geometry(&geo_geom, max_distance); + builder.push_geometry(Some(&densified))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _densify_geometry(geom: &geo::Geometry, max_distance: f64) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => { + geo::Geometry::LineString(Euclidean.densify(g, max_distance)) + } + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(Euclidean.densify(g, max_distance)), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(Euclidean.densify(g, max_distance)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(Euclidean.densify(g, max_distance)) + } + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/dimensions.rs b/src/geoarrow/geoarrow-expr-geo/src/dimensions.rs new file mode 100644 index 0000000000..d5e7375954 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/dimensions.rs @@ -0,0 +1,25 @@ +use arrow_array::{BooleanArray, builder::BooleanBuilder}; +use geo::HasDimensions; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +use crate::util::to_geo::geometry_to_geo; + +pub fn is_empty(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _is_empty_impl) +} + +fn _is_empty_impl<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { + let mut builder = BooleanBuilder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + builder.append_value(geo_geom.is_empty()); + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/distance.rs b/src/geoarrow/geoarrow-expr-geo/src/distance.rs new file mode 100644 index 0000000000..525245c7c4 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/distance.rs @@ -0,0 +1,41 @@ +use arrow_array::Float64Array; +use geo::{Distance, Euclidean}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn euclidean_distance( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _distance_impl) + } +} + +fn _distance_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = Float64Array::builder(left_array.len()); + + for (left, right) in left_array.iter().zip(right_array.iter()) { + match (left, right) { + (Some(left), Some(right)) => { + let left_geom = geometry_to_geo(&left?)?; + let right_geom = geometry_to_geo(&right?)?; + let result = Euclidean.distance(&left_geom, &right_geom); + builder.append_value(result); + } + (_, _) => { + builder.append_null(); + } + } + } + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/frechet_distance.rs b/src/geoarrow/geoarrow-expr-geo/src/frechet_distance.rs new file mode 100644 index 0000000000..b236d9f933 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/frechet_distance.rs @@ -0,0 +1,54 @@ +use arrow_array::Float64Array; +use geo::{Euclidean, line_measures::FrechetDistance}; +use geo_traits::{GeometryTrait, to_geo::ToGeoLineString}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::downcast::downcast_geoarrow_array_two_args; + +pub fn frechet_distance( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _frechet_distance_impl) + } +} + +fn _frechet_distance_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = Float64Array::builder(left_array.len()); + + for (maybe_left, maybe_right) in left_array.iter().zip(right_array.iter()) { + match (maybe_left, maybe_right) { + (Some(left), Some(right)) => { + let left_geom = left?; + let right_geom = right?; + + match (left_geom.as_type(), right_geom.as_type()) { + ( + geo_traits::GeometryType::LineString(ls1), + geo_traits::GeometryType::LineString(ls2), + ) => { + let geo_ls1 = ls1.to_line_string(); + let geo_ls2 = ls2.to_line_string(); + builder.append_value(Euclidean.frechet_distance(&geo_ls1, &geo_ls2)); + } + _ => { + builder.append_null(); + } + } + } + _ => { + builder.append_null(); + } + } + } + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/geodesic_area.rs b/src/geoarrow/geoarrow-expr-geo/src/geodesic_area.rs new file mode 100644 index 0000000000..1a9708f6fc --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/geodesic_area.rs @@ -0,0 +1,76 @@ +use arrow_array::{Float64Array, builder::Float64Builder}; +use arrow_buffer::NullBuffer; +use geo::GeodesicArea; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::{GeoArrowType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn geodesic_area_signed(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _geodesic_area_signed_impl) +} + +pub fn geodesic_area_unsigned(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _geodesic_area_unsigned_impl) +} + +pub fn geodesic_perimeter(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _geodesic_perimeter_impl) +} + +fn _zeros(len: usize, nulls: Option) -> Float64Array { + let values = vec![0.0f64; len]; + Float64Array::new(values.into(), nulls) +} + +fn _geodesic_area_signed_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _geodesic_area_impl(array, GeodesicArea::geodesic_area_signed), + } +} + +fn _geodesic_area_unsigned_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | LineString(_) | MultiPoint(_) | MultiLineString(_) => { + Ok(_zeros(array.len(), array.logical_nulls())) + } + _ => _geodesic_area_impl(array, GeodesicArea::geodesic_area_unsigned), + } +} + +fn _geodesic_perimeter_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) => Ok(_zeros(array.len(), array.logical_nulls())), + _ => _geodesic_area_impl(array, GeodesicArea::geodesic_perimeter), + } +} + +fn _geodesic_area_impl<'a, F: Fn(&geo::Geometry) -> f64>( + array: &'a impl GeoArrowArrayAccessor<'a>, + area_fn: F, +) -> GeoArrowResult { + let mut builder = Float64Builder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + builder.append_value(area_fn(&geo_geom)); + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/geodesic_length.rs b/src/geoarrow/geoarrow-expr-geo/src/geodesic_length.rs new file mode 100644 index 0000000000..0290e3fdae --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/geodesic_length.rs @@ -0,0 +1,37 @@ +use arrow_array::Float64Array; +use geo::{Geodesic, Length}; +use geo_traits::{ + GeometryTrait, + to_geo::{ToGeoLine, ToGeoLineString, ToGeoMultiLineString}, +}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +pub fn geodesic_length(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _geodesic_length_impl) +} + +fn _geodesic_length_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut result = Float64Array::builder(array.len()); + for geom in array.iter() { + if let Some(geom) = geom { + match geom?.as_type() { + geo_traits::GeometryType::Line(l) => { + result.append_value(Geodesic.length(&l.to_line())) + } + geo_traits::GeometryType::LineString(ls) => { + result.append_value(Geodesic.length(&ls.to_line_string())) + } + geo_traits::GeometryType::MultiLineString(mls) => { + result.append_value(Geodesic.length(&mls.to_multi_line_string())) + } + _ => result.append_value(0.0), + } + } else { + result.append_null(); + } + } + Ok(result.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/haversine_length.rs b/src/geoarrow/geoarrow-expr-geo/src/haversine_length.rs new file mode 100644 index 0000000000..37a48d9494 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/haversine_length.rs @@ -0,0 +1,37 @@ +use arrow_array::Float64Array; +use geo::{Haversine, Length}; +use geo_traits::{ + GeometryTrait, + to_geo::{ToGeoLine, ToGeoLineString, ToGeoMultiLineString}, +}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +pub fn haversine_length(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _haversine_length_impl) +} + +fn _haversine_length_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut result = Float64Array::builder(array.len()); + for geom in array.iter() { + if let Some(geom) = geom { + match geom?.as_type() { + geo_traits::GeometryType::Line(l) => { + result.append_value(Haversine.length(&l.to_line())) + } + geo_traits::GeometryType::LineString(ls) => { + result.append_value(Haversine.length(&ls.to_line_string())) + } + geo_traits::GeometryType::MultiLineString(mls) => { + result.append_value(Haversine.length(&mls.to_multi_line_string())) + } + _ => result.append_value(0.0), + } + } else { + result.append_null(); + } + } + Ok(result.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/interior_point.rs b/src/geoarrow/geoarrow-expr-geo/src/interior_point.rs new file mode 100644 index 0000000000..1fed32861c --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/interior_point.rs @@ -0,0 +1,36 @@ +use geo::InteriorPoint; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PointArray, builder::PointBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PointType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn interior_point( + array: &dyn GeoArrowArray, + coord_type: CoordType, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, _interior_point_impl, coord_type) +} + +fn _interior_point_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PointType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PointBuilder::with_capacity(typ, array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let interior_point = geo_geom.interior_point(); + builder.push_point(interior_point.as_ref()); + } else { + builder.push_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/intersects.rs b/src/geoarrow/geoarrow-expr-geo/src/intersects.rs new file mode 100644 index 0000000000..00751c688f --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/intersects.rs @@ -0,0 +1,172 @@ +use arrow_array::BooleanArray; +use geo::intersects::Intersects; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn intersects( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Input arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _intersects_impl) + } +} + +fn _intersects_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = BooleanArray::builder(left_array.len()); + + for (maybe_left, maybe_right) in left_array.iter().zip(right_array.iter()) { + match (maybe_left, maybe_right) { + (Some(left), Some(right)) => { + let left_geom = geometry_to_geo(&left?)?; + let right_geom = geometry_to_geo(&right?)?; + let intersects = left_geom.intersects(&right_geom); + builder.append_value(intersects); + } + _ => { + // If either is null, the result is null + builder.append_null(); + } + } + } + + Ok(builder.finish()) +} + +// #[cfg(test)] +// mod tests { +// use geo::{Geometry, line_string, polygon}; +// use geoarrow_array::builder::GeometryBuilder; +// use geoarrow_schema::{CoordType, GeometryType}; + +// use super::*; + +// #[test] +// fn test_intersects() { +// // Group matching pairs for better visibility +// let test_pairs = [ +// // Pair 1: Should intersect, overlapping unit squares +// vec![ +// Some(Geometry::from(polygon![ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 1.0), +// (x: 2.0, y: 2.0), +// (x: 1.0, y: 2.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 1.5, y: 1.5), +// (x: 2.5, y: 1.5), +// (x: 2.5, y: 2.5), +// (x: 1.5, y: 2.5) +// ])), +// ], +// // Pair 2: Should not intersect, separated squares +// vec![ +// Some(Geometry::from(polygon![ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 1.0), +// (x: 2.0, y: 2.0), +// (x: 1.0, y: 2.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 3.0, y: 3.0), +// (x: 4.0, y: 3.0), +// (x: 4.0, y: 4.0), +// (x: 3.0, y: 4.0) +// ])), +// ], +// // Pair 3: Should intersect, touching at corner +// vec![ +// Some(Geometry::from(polygon![ +// (x: 2.0, y: 2.0), +// (x: 3.0, y: 2.0), +// (x: 3.0, y: 3.0), +// (x: 2.0, y: 3.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 3.0, y: 3.0), +// (x: 4.0, y: 3.0), +// (x: 4.0, y: 4.0), +// (x: 3.0, y: 4.0) +// ])), +// ], +// // Pair 4: Mixed geometry types, should intersect +// vec![ +// Some(Geometry::from(line_string! [ +// (x: 1.0, y: 1.0), +// (x: 2.0, y: 2.0) +// ])), +// Some(Geometry::from(polygon![ +// (x: 1.5, y: 1.5), +// (x: 2.5, y: 1.5), +// (x: 2.5, y: 2.5), +// (x: 1.5, y: 2.5) +// ])), +// ], +// // Pair 5: Null geometries, should return null +// vec![None, None], +// ]; + +// let geoms_left: Vec<_> = test_pairs.iter().map(|pair| pair[0].clone()).collect(); +// let geoms_right: Vec<_> = test_pairs.iter().map(|pair| pair[1].clone()).collect(); + +// let typ = GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved); +// let left_array = GeometryBuilder::from_nullable_geometries(&geoms_left, typ.clone()) +// .unwrap() +// .finish(); +// let right_array = GeometryBuilder::from_nullable_geometries(&geoms_right, typ) +// .unwrap() +// .finish(); + +// let result = intersects(&left_array, &right_array).unwrap(); + +// let expected = +// BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), None]); + +// assert_eq!(result, expected); +// } + +// #[test] +// #[should_panic(expected = "Input arrays must have the same length")] +// fn test_intersects_length_mismatch() { +// let left_geom = vec![Some(Geometry::from( +// polygon![(x: 0.0, y: 0.0), (x: 1.0, y: 0.0), (x: 1.0, y: 1.0), (x: 0.0, y: 1.0)], +// ))]; +// let right_geom: Vec> = vec![]; + +// let typ = GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved); +// let left_array = GeometryBuilder::from_nullable_geometries(&left_geom, typ.clone()) +// .unwrap() +// .finish(); +// let right_array = GeometryBuilder::from_nullable_geometries(&right_geom, typ) +// .unwrap() +// .finish(); + +// intersects(&left_array, &right_array).unwrap(); +// } + +// #[test] +// fn test_intersects_empty_arrays() { +// let typ = GeometryType::new(Default::default()).with_coord_type(CoordType::Interleaved); +// let left_array = +// GeometryBuilder::from_nullable_geometries(&Vec::>::new(), typ.clone()) +// .unwrap() +// .finish(); +// let right_array = +// GeometryBuilder::from_nullable_geometries(&Vec::>::new(), typ) +// .unwrap() +// .finish(); + +// let result = intersects(&left_array, &right_array).unwrap(); +// assert_eq!(result.len(), 0); +// } +// } diff --git a/src/geoarrow/geoarrow-expr-geo/src/length.rs b/src/geoarrow/geoarrow-expr-geo/src/length.rs new file mode 100644 index 0000000000..c2d33986dd --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/length.rs @@ -0,0 +1,149 @@ +use arrow_array::Float64Array; +use geo::{Euclidean, Length}; +use geo_traits::{ + GeometryTrait, + to_geo::{ToGeoLine, ToGeoLineString, ToGeoMultiLineString}, +}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +/// Compute the euclidean length of linear geometries in a GeoArrowArray. +/// +/// Only LineString and MultiLineString geometries will have non-zero lengths. +/// Other geometry types (including polygons) will return a length of 0.0. +pub fn euclidean_length(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _length_impl) +} + +pub fn _length_impl<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { + let mut result = Float64Array::builder(array.len()); + for geom in array.iter() { + if let Some(geom) = geom { + match geom?.as_type() { + geo_traits::GeometryType::Line(l) => { + result.append_value(Euclidean.length(&l.to_line())) + } + geo_traits::GeometryType::LineString(ls) => { + result.append_value(Euclidean.length(&ls.to_line_string())) + } + geo_traits::GeometryType::MultiLineString(mls) => { + result.append_value(Euclidean.length(&mls.to_multi_line_string())) + } + _ => result.append_value(0.0), + } + } else { + result.append_null(); + } + } + Ok(result.finish()) +} + +// #[cfg(test)] +// mod test { + +// use geo::{Euclidean, Length, LineString, MultiLineString, Point}; +// use geoarrow_array::{ +// array::PointArray, +// builder::{LineStringBuilder, MultiLineStringBuilder, PointBuilder, WkbBuilder}, +// }; +// use geoarrow_schema::{CoordType, Dimension, PointType, WkbType}; + +// use super::*; + +// #[test] +// fn test_point() { +// let point_type = PointType::new(Dimension::XY, Default::default()); +// let mut builder = PointBuilder::new(point_type); + +// builder.push_point(Some(&Point::new(0., 1.))); +// builder.push_point(Some(&Point::new(2., 3.))); +// builder.push_point(Some(&Point::new(4., 5.))); + +// let point_array: PointArray = builder.finish(); +// let result = euclidean_length(&point_array).unwrap(); + +// assert_eq!(result.len(), 3); +// assert_eq!(result.value(0), 0.0); +// assert_eq!(result.value(1), 0.0); +// assert_eq!(result.value(2), 0.0); +// } + +// #[test] +// fn test_linestring() { +// let mut linestring_builder = LineStringBuilder::new( +// geoarrow_schema::LineStringType::new(Dimension::XY, Default::default()) +// .with_coord_type(CoordType::Separated), +// ); +// let linestring_1 = LineString::from(vec![(0.0, 0.0), (3.0, 9.0)]); +// let linestring_2 = LineString::from(vec![(0.0, 0.0), (4.0, 5.0)]); + +// let _ = linestring_builder.push_geometry(Some(&linestring_1)); +// let _ = linestring_builder.push_geometry(Some(&linestring_2)); +// let linestring_array = linestring_builder.finish(); + +// let result = euclidean_length(&linestring_array).unwrap(); + +// assert_eq!(result.len(), 2); +// assert_eq!(result.value(0), Euclidean.length(&linestring_1)); +// assert_eq!(result.value(1), Euclidean.length(&linestring_2)); +// } + +// #[test] +// fn test_multilinestring() { +// let mut multi_linestring_builder = MultiLineStringBuilder::new( +// geoarrow_schema::MultiLineStringType::new(Dimension::XY, Default::default()) +// .with_coord_type(CoordType::Separated), +// ); +// let linestring_1 = LineString::from(vec![(0.0, 9.0), (3.0, 4.0)]); +// let linestring_2 = LineString::from(vec![(0.0, 0.0), (4.0, 3.0)]); +// let multi_linestring_1 = +// MultiLineString::new(vec![linestring_1.clone(), linestring_2.clone()]); +// let linestring_3 = LineString::from(vec![(1.0, 5.0), (5.0, 6.0)]); +// let multi_linestring_2 = MultiLineString::new(vec![linestring_3.clone()]); + +// let _ = multi_linestring_builder.push_geometry(Some(&multi_linestring_1)); +// let _ = multi_linestring_builder.push_geometry(Some(&multi_linestring_2)); + +// let multi_linestring_array = multi_linestring_builder.finish(); +// let result = euclidean_length(&multi_linestring_array).unwrap(); + +// assert_eq!(result.len(), 2); +// assert_eq!( +// result.value(0), +// Euclidean.length(&linestring_1) + Euclidean.length(&linestring_2) +// ); +// assert_eq!(result.value(1), Euclidean.length(&linestring_3)); +// } + +// #[test] +// fn test_wkb_linestring() { +// let mut wkb_builder: WkbBuilder = +// geoarrow_array::builder::WkbBuilder::new(WkbType::new(Default::default())); +// let linestring_1 = LineString::from(vec![(0.0, 0.0), (3.0, 4.0)]); +// let linestring_2 = LineString::from(vec![(0.0, 0.0), (4.0, 5.0)]); +// let _ = wkb_builder.push_geometry(Some(&linestring_1)); +// let _ = wkb_builder.push_geometry(Some(&linestring_2)); +// let wkb_array = wkb_builder.finish(); + +// let result = euclidean_length(&wkb_array).unwrap(); +// assert_eq!(2, result.len()); +// assert_eq!(result.value(0), Euclidean.length(&linestring_1)); +// assert_eq!(result.value(1), Euclidean.length(&linestring_2)); +// } + +// #[test] +// fn test_wkb_point() { +// let mut wkb_builder: WkbBuilder = +// geoarrow_array::builder::WkbBuilder::new(WkbType::new(Default::default())); +// let point_1 = Point::new(1.0, 2.0); +// let point_2 = Point::new(3.0, 4.0); +// let _ = wkb_builder.push_geometry(Some(&point_1)); +// let _ = wkb_builder.push_geometry(Some(&point_2)); +// let wkb_array = wkb_builder.finish(); + +// let result = euclidean_length(&wkb_array).unwrap(); +// assert_eq!(2, result.len()); +// assert_eq!(result.value(0), 0.0); +// assert_eq!(result.value(1), 0.0); +// } +// } diff --git a/src/geoarrow/geoarrow-expr-geo/src/lib.rs b/src/geoarrow/geoarrow-expr-geo/src/lib.rs new file mode 100644 index 0000000000..5ed9680ebf --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/lib.rs @@ -0,0 +1,80 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] +#![cfg_attr(not(test), warn(unused_crate_dependencies))] +#![doc( + html_logo_url = "https://github.com/geoarrow.png", + html_favicon_url = "https://github.com/geoarrow.png?size=32" +)] + +mod affine_ops; +mod area; +mod bounding_rect; +mod center; +mod centroid; +mod chaikin_smoothing; +mod chamberlain_duquette_area; +mod concave_hull; +mod contains; +mod convex_hull; +mod densify; +mod dimensions; +mod distance; +mod frechet_distance; +mod geodesic_area; +mod geodesic_length; +mod haversine_length; +mod interior_point; +mod intersects; +mod length; +mod line_interpolate_point; +mod line_locate_point; +mod minimum_rotated_rect; +mod relate; +mod remove_repeated_points; +mod rotate; +mod scale; +mod simplify; +mod simplify_vw; +mod simplify_vw_preserve; +mod skew; +mod translate; +pub mod util; +pub mod validation; +mod vincenty_length; +mod within; + +pub use affine_ops::affine_transform; +pub use area::{signed_area, unsigned_area}; +pub use bounding_rect::bounding_rect; +pub use center::center; +pub use centroid::centroid; +pub use chaikin_smoothing::chaikin_smoothing; +pub use chamberlain_duquette_area::{ + chamberlain_duquette_signed_area, chamberlain_duquette_unsigned_area, +}; +pub use concave_hull::concave_hull; +pub use contains::contains; +pub use convex_hull::convex_hull; +pub use densify::densify; +pub use dimensions::is_empty; +pub use distance::euclidean_distance; +pub use frechet_distance::frechet_distance; +pub use geodesic_area::{geodesic_area_signed, geodesic_area_unsigned, geodesic_perimeter}; +pub use geodesic_length::geodesic_length; +pub use haversine_length::haversine_length; +pub use interior_point::interior_point; +pub use intersects::intersects; +pub use length::euclidean_length; +pub use line_interpolate_point::line_interpolate_point; +pub use line_locate_point::line_locate_point; +pub use minimum_rotated_rect::minimum_rotated_rect; +pub use relate::relate_boolean; +pub use remove_repeated_points::remove_repeated_points; +pub use rotate::{rotate_around_center, rotate_around_centroid, rotate_around_point}; +pub use scale::{scale, scale_around_point}; +pub use simplify::simplify; +pub use simplify_vw::simplify_vw; +pub use simplify_vw_preserve::simplify_vw_preserve; +pub use skew::{skew, skew_around_point}; +pub use translate::translate; +pub use vincenty_length::vincenty_length; +pub use within::within; diff --git a/src/geoarrow/geoarrow-expr-geo/src/line_interpolate_point.rs b/src/geoarrow/geoarrow-expr-geo/src/line_interpolate_point.rs new file mode 100644 index 0000000000..8042a0d6b7 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/line_interpolate_point.rs @@ -0,0 +1,44 @@ +use geo::{Euclidean, InterpolateLine}; +use geo_traits::{GeometryTrait, to_geo::ToGeoLineString}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PointArray, builder::PointBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PointType, error::GeoArrowResult}; + +pub fn line_interpolate_point( + array: &dyn GeoArrowArray, + fraction: f64, + coord_type: CoordType, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, _line_interpolate_point_impl, fraction, coord_type) +} + +fn _line_interpolate_point_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + fraction: f64, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PointType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PointBuilder::with_capacity(typ, array.len()); + + for item in array.iter() { + if let Some(geom) = item { + match geom?.as_type() { + geo_traits::GeometryType::LineString(ls) => { + let geo_ls = ls.to_line_string(); + let point = Euclidean.point_at_ratio_from_start(&geo_ls, fraction); + builder.push_point(point.as_ref()); + } + _ => { + builder.push_null(); + } + } + } else { + builder.push_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/line_locate_point.rs b/src/geoarrow/geoarrow-expr-geo/src/line_locate_point.rs new file mode 100644 index 0000000000..4cdad9b8c5 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/line_locate_point.rs @@ -0,0 +1,57 @@ +use arrow_array::Float64Array; +use geo::LineLocatePoint; +use geo_traits::{GeometryTrait, to_geo::ToGeoLineString}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn line_locate_point( + line_array: &dyn GeoArrowArray, + point_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if line_array.len() != point_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(line_array, point_array, _line_locate_point_impl) + } +} + +fn _line_locate_point_impl<'a>( + line_array: &'a impl GeoArrowArrayAccessor<'a>, + point_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = Float64Array::builder(line_array.len()); + + for (maybe_line, maybe_point) in line_array.iter().zip(point_array.iter()) { + match (maybe_line, maybe_point) { + (Some(line), Some(point)) => { + let line_geom = line?; + let point_geom = geometry_to_geo(&point?)?; + + match line_geom.as_type() { + geo_traits::GeometryType::LineString(ls) => { + let geo_ls = ls.to_line_string(); + if let geo::Geometry::Point(geo_pt) = point_geom { + match geo_ls.line_locate_point(&geo_pt) { + Some(fraction) => builder.append_value(fraction), + None => builder.append_null(), + } + } else { + builder.append_null(); + } + } + _ => { + builder.append_null(); + } + } + } + _ => { + builder.append_null(); + } + } + } + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/minimum_rotated_rect.rs b/src/geoarrow/geoarrow-expr-geo/src/minimum_rotated_rect.rs new file mode 100644 index 0000000000..bef6a12d48 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/minimum_rotated_rect.rs @@ -0,0 +1,36 @@ +use geo::MinimumRotatedRect; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::PolygonArray, builder::PolygonBuilder, + downcast_geoarrow_array, +}; +use geoarrow_schema::{CoordType, Dimension, PolygonType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +pub fn minimum_rotated_rect( + array: &dyn GeoArrowArray, + coord_type: CoordType, +) -> GeoArrowResult { + downcast_geoarrow_array!(array, minimum_rotated_rect_impl, coord_type) +} + +fn minimum_rotated_rect_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult { + let typ = PolygonType::new(Dimension::XY, array.data_type().metadata().clone()) + .with_coord_type(coord_type); + let mut builder = PolygonBuilder::new(typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let poly = geo_geom.minimum_rotated_rect(); + builder.push_polygon(poly.as_ref())?; + } else { + builder.push_polygon(None::.as_ref())?; + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/relate.rs b/src/geoarrow/geoarrow-expr-geo/src/relate.rs new file mode 100644 index 0000000000..f9eabb9e83 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/relate.rs @@ -0,0 +1,45 @@ +use arrow_array::BooleanArray; +use geo::{Relate, relate::IntersectionMatrix}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn relate_boolean( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, + relate_cb: impl Fn(IntersectionMatrix) -> bool, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Input arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _relate_impl, relate_cb) + } +} + +fn _relate_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, + relate_cb: impl Fn(IntersectionMatrix) -> bool, +) -> GeoArrowResult { + let mut builder = BooleanArray::builder(left_array.len()); + + for (maybe_left, maybe_right) in left_array.iter().zip(right_array.iter()) { + match (maybe_left, maybe_right) { + (Some(left), Some(right)) => { + let left_geom = geometry_to_geo(&left?)?; + let right_geom = geometry_to_geo(&right?)?; + let matrix = left_geom.relate(&right_geom); + builder.append_value(relate_cb(matrix)); + } + _ => { + // If either is null, the result is null + builder.append_null(); + } + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/remove_repeated_points.rs b/src/geoarrow/geoarrow-expr-geo/src/remove_repeated_points.rs new file mode 100644 index 0000000000..9966c01650 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/remove_repeated_points.rs @@ -0,0 +1,122 @@ +use std::sync::Arc; + +use geo::RemoveRepeatedPoints; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn remove_repeated_points(array: &dyn GeoArrowArray) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => _rrp_linestring(array.as_line_string()), + Polygon(_) => _rrp_polygon(array.as_polygon()), + MultiLineString(_) => _rrp_multi_linestring(array.as_multi_line_string()), + MultiPolygon(_) => _rrp_multi_polygon(array.as_multi_polygon()), + _ => downcast_geoarrow_array!(array, _rrp_geometry_impl), + } +} + +fn _rrp_linestring(array: &LineStringArray) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&geo_geom.remove_repeated_points()))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rrp_polygon(array: &PolygonArray) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&geo_geom.remove_repeated_points()))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rrp_multi_linestring(array: &MultiLineStringArray) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&geo_geom.remove_repeated_points()))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rrp_multi_polygon(array: &MultiPolygonArray) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&geo_geom.remove_repeated_points()))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rrp_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let cleaned = _rrp_geometry(&geo_geom); + builder.push_geometry(Some(&cleaned))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rrp_geometry(geom: &geo::Geometry) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => geo::Geometry::LineString(g.remove_repeated_points()), + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.remove_repeated_points()), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.remove_repeated_points()) + } + geo::Geometry::MultiPolygon(g) => geo::Geometry::MultiPolygon(g.remove_repeated_points()), + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/rotate.rs b/src/geoarrow/geoarrow-expr-geo/src/rotate.rs new file mode 100644 index 0000000000..9d2d633ad8 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/rotate.rs @@ -0,0 +1,95 @@ +use std::sync::Arc; + +use geo::Rotate; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, builder::GeometryBuilder, downcast_geoarrow_array, +}; +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +/// Rotate each geometry around its centroid by the given angle in degrees. +pub fn rotate_around_centroid( + array: &dyn GeoArrowArray, + degrees: f64, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _rotate_centroid_impl, degrees) +} + +/// Rotate each geometry around its bounding box center by the given angle in degrees. +pub fn rotate_around_center( + array: &dyn GeoArrowArray, + degrees: f64, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _rotate_center_impl, degrees) +} + +/// Rotate each geometry around a given point by the given angle in degrees. +pub fn rotate_around_point( + array: &dyn GeoArrowArray, + degrees: f64, + point: geo::Point, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _rotate_point_impl, degrees, point) +} + +fn _rotate_centroid_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + degrees: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let rotated = geo_geom.rotate_around_centroid(degrees); + builder.push_geometry(Some(&rotated))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rotate_center_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + degrees: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let rotated = geo_geom.rotate_around_center(degrees); + builder.push_geometry(Some(&rotated))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _rotate_point_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + degrees: f64, + point: geo::Point, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let rotated = geo_geom.rotate_around_point(degrees, point); + builder.push_geometry(Some(&rotated))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/scale.rs b/src/geoarrow/geoarrow-expr-geo/src/scale.rs new file mode 100644 index 0000000000..b0e5cb0b40 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/scale.rs @@ -0,0 +1,71 @@ +use std::sync::Arc; + +use geo::Scale; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, builder::GeometryBuilder, downcast_geoarrow_array, +}; +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +/// Scale each geometry by the given x and y factors, around each geometry's centroid. +pub fn scale( + array: &dyn GeoArrowArray, + x_factor: f64, + y_factor: f64, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _scale_impl, x_factor, y_factor) +} + +/// Scale each geometry by the given x and y factors, around the given origin point. +pub fn scale_around_point( + array: &dyn GeoArrowArray, + x_factor: f64, + y_factor: f64, + origin: geo::Point, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _scale_around_point_impl, x_factor, y_factor, origin) +} + +fn _scale_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + x_factor: f64, + y_factor: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let scaled = geo_geom.scale_xy(x_factor, y_factor); + builder.push_geometry(Some(&scaled))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _scale_around_point_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + x_factor: f64, + y_factor: f64, + origin: geo::Point, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let scaled = geo_geom.scale_around_point(x_factor, y_factor, origin); + builder.push_geometry(Some(&scaled))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/simplify.rs b/src/geoarrow/geoarrow-expr-geo/src/simplify.rs new file mode 100644 index 0000000000..3b395e171c --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/simplify.rs @@ -0,0 +1,130 @@ +use std::sync::Arc; + +use geo::Simplify; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn simplify(array: &dyn GeoArrowArray, epsilon: f64) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => simplify_linestring(array.as_line_string(), epsilon), + Polygon(_) => simplify_polygon(array.as_polygon(), epsilon), + MultiLineString(_) => simplify_multi_linestring(array.as_multi_line_string(), epsilon), + MultiPolygon(_) => simplify_multi_polygon(array.as_multi_polygon(), epsilon), + _ => downcast_geoarrow_array!(array, simplify_geometry_impl, epsilon), + } +} + +fn simplify_linestring( + array: &LineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&geo_geom.simplify(epsilon)))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_polygon(array: &PolygonArray, epsilon: f64) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&geo_geom.simplify(epsilon)))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_multi_linestring( + array: &MultiLineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&geo_geom.simplify(epsilon)))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_multi_polygon( + array: &MultiPolygonArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&geo_geom.simplify(epsilon)))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + epsilon: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let simplified_geom = simplify_geometry(&geo_geom, epsilon); + builder.push_geometry(Some(&simplified_geom))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_geometry(geom: &geo::Geometry, epsilon: f64) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => geo::Geometry::LineString(g.simplify(epsilon)), + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.simplify(epsilon)), + geo::Geometry::MultiLineString(g) => geo::Geometry::MultiLineString(g.simplify(epsilon)), + geo::Geometry::MultiPolygon(g) => geo::Geometry::MultiPolygon(g.simplify(epsilon)), + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/simplify_vw.rs b/src/geoarrow/geoarrow-expr-geo/src/simplify_vw.rs new file mode 100644 index 0000000000..32233ea5b0 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/simplify_vw.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use geo::SimplifyVw; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn simplify_vw( + array: &dyn GeoArrowArray, + epsilon: f64, +) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => simplify_vw_linestring(array.as_line_string(), epsilon), + Polygon(_) => simplify_vw_polygon(array.as_polygon(), epsilon), + MultiLineString(_) => simplify_vw_multi_linestring(array.as_multi_line_string(), epsilon), + MultiPolygon(_) => simplify_vw_multi_polygon(array.as_multi_polygon(), epsilon), + _ => downcast_geoarrow_array!(array, simplify_vw_geometry_impl, epsilon), + } +} + +fn simplify_vw_linestring( + array: &LineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&geo_geom.simplify_vw(epsilon)))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_polygon( + array: &PolygonArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&geo_geom.simplify_vw(epsilon)))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_multi_linestring( + array: &MultiLineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&geo_geom.simplify_vw(epsilon)))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_multi_polygon( + array: &MultiPolygonArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&geo_geom.simplify_vw(epsilon)))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + epsilon: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let simplified_geom = simplify_vw_geometry(&geo_geom, epsilon); + builder.push_geometry(Some(&simplified_geom))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_geometry(geom: &geo::Geometry, epsilon: f64) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => geo::Geometry::LineString(g.simplify_vw(epsilon)), + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.simplify_vw(epsilon)), + geo::Geometry::MultiLineString(g) => geo::Geometry::MultiLineString(g.simplify_vw(epsilon)), + geo::Geometry::MultiPolygon(g) => geo::Geometry::MultiPolygon(g.simplify_vw(epsilon)), + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/simplify_vw_preserve.rs b/src/geoarrow/geoarrow-expr-geo/src/simplify_vw_preserve.rs new file mode 100644 index 0000000000..07d1cbb0d2 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/simplify_vw_preserve.rs @@ -0,0 +1,142 @@ +use std::sync::Arc; + +use geo::SimplifyVwPreserve; +use geo_traits::to_geo::{ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPolygon, ToGeoPolygon}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, + array::{LineStringArray, MultiLineStringArray, MultiPolygonArray, PolygonArray}, + builder::{ + GeometryBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPolygonBuilder, + PolygonBuilder, + }, + cast::AsGeoArrowArray, + downcast_geoarrow_array, +}; +use geoarrow_schema::{GeoArrowType, GeometryType, error::GeoArrowResult}; + +use crate::util::{copy_geoarrow_array_ref, to_geo::geometry_to_geo}; + +pub fn simplify_vw_preserve( + array: &dyn GeoArrowArray, + epsilon: f64, +) -> GeoArrowResult> { + use GeoArrowType::*; + match array.data_type() { + Point(_) | MultiPoint(_) | GeometryCollection(_) | Rect(_) => { + Ok(copy_geoarrow_array_ref(array)) + } + LineString(_) => simplify_vw_preserve_linestring(array.as_line_string(), epsilon), + Polygon(_) => simplify_vw_preserve_polygon(array.as_polygon(), epsilon), + MultiLineString(_) => { + simplify_vw_preserve_multi_linestring(array.as_multi_line_string(), epsilon) + } + MultiPolygon(_) => simplify_vw_preserve_multi_polygon(array.as_multi_polygon(), epsilon), + _ => downcast_geoarrow_array!(array, simplify_vw_preserve_geometry_impl, epsilon), + } +} + +fn simplify_vw_preserve_linestring( + array: &LineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = LineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_line_string(); + builder.push_line_string(Some(&geo_geom.simplify_vw_preserve(epsilon)))?; + } else { + builder.push_line_string(None::<&geo::LineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_preserve_polygon( + array: &PolygonArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = PolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_polygon(); + builder.push_polygon(Some(&geo_geom.simplify_vw_preserve(epsilon)))?; + } else { + builder.push_polygon(None::<&geo::Polygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_preserve_multi_linestring( + array: &MultiLineStringArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiLineStringBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_line_string(); + builder.push_multi_line_string(Some(&geo_geom.simplify_vw_preserve(epsilon)))?; + } else { + builder.push_multi_line_string(None::<&geo::MultiLineString>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_preserve_multi_polygon( + array: &MultiPolygonArray, + epsilon: f64, +) -> GeoArrowResult> { + let mut builder = MultiPolygonBuilder::new(array.extension_type().clone()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geom?.to_multi_polygon(); + builder.push_multi_polygon(Some(&geo_geom.simplify_vw_preserve(epsilon)))?; + } else { + builder.push_multi_polygon(None::<&geo::MultiPolygon>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_preserve_geometry_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + epsilon: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let simplified_geom = simplify_vw_preserve_geometry(&geo_geom, &epsilon); + builder.push_geometry(Some(&simplified_geom))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn simplify_vw_preserve_geometry(geom: &geo::Geometry, epsilon: &f64) -> geo::Geometry { + match geom { + geo::Geometry::LineString(g) => geo::Geometry::LineString(g.simplify_vw_preserve(*epsilon)), + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.simplify_vw_preserve(*epsilon)), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.simplify_vw_preserve(*epsilon)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(g.simplify_vw_preserve(*epsilon)) + } + _ => geom.clone(), + } +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/skew.rs b/src/geoarrow/geoarrow-expr-geo/src/skew.rs new file mode 100644 index 0000000000..311b2b83fe --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/skew.rs @@ -0,0 +1,71 @@ +use std::sync::Arc; + +use geo::Skew; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, builder::GeometryBuilder, downcast_geoarrow_array, +}; +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +/// Skew each geometry by the given x and y degrees, around each geometry's centroid. +pub fn skew( + array: &dyn GeoArrowArray, + x_degrees: f64, + y_degrees: f64, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _skew_impl, x_degrees, y_degrees) +} + +/// Skew each geometry by the given x and y degrees, around the given origin point. +pub fn skew_around_point( + array: &dyn GeoArrowArray, + x_degrees: f64, + y_degrees: f64, + origin: geo::Point, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _skew_around_point_impl, x_degrees, y_degrees, origin) +} + +fn _skew_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + x_degrees: f64, + y_degrees: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let skewed = geo_geom.skew_xy(x_degrees, y_degrees); + builder.push_geometry(Some(&skewed))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} + +fn _skew_around_point_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + x_degrees: f64, + y_degrees: f64, + origin: geo::Point, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let skewed = geo_geom.skew_around_point(x_degrees, y_degrees, origin); + builder.push_geometry(Some(&skewed))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/translate.rs b/src/geoarrow/geoarrow-expr-geo/src/translate.rs new file mode 100644 index 0000000000..0671f389a3 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/translate.rs @@ -0,0 +1,39 @@ +use std::sync::Arc; + +use geo::Translate; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, builder::GeometryBuilder, downcast_geoarrow_array, +}; +use geoarrow_schema::{GeometryType, error::GeoArrowResult}; + +use crate::util::to_geo::geometry_to_geo; + +/// Translate each geometry by the given x and y offsets. +pub fn translate( + array: &dyn GeoArrowArray, + x_offset: f64, + y_offset: f64, +) -> GeoArrowResult> { + downcast_geoarrow_array!(array, _translate_impl, x_offset, y_offset) +} + +fn _translate_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + x_offset: f64, + y_offset: f64, +) -> GeoArrowResult> { + let geom_typ = GeometryType::new(array.data_type().metadata().clone()); + let mut builder = GeometryBuilder::new(geom_typ); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + let translated = geo_geom.translate(x_offset, y_offset); + builder.push_geometry(Some(&translated))?; + } else { + builder.push_geometry(None::<&geo::Geometry>.as_ref())?; + } + } + + Ok(Arc::new(builder.finish())) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/util/downcast.rs b/src/geoarrow/geoarrow-expr-geo/src/util/downcast.rs new file mode 100644 index 0000000000..66a8aa15cd --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/util/downcast.rs @@ -0,0 +1,1182 @@ +/// Two argument version of the `downcast_geoarrow_array!` macro. +/// Downcast any combination of two [GeoArrowArray][geoarrow_array::GeoArrowArray] to a concrete-typed array based on its [`GeoArrowType`][geoarrow_schema::GeoArrowType]. +/// +/// This is in private utils in geoarrow-geo because we don't yet have a stable API for this macro. +macro_rules! downcast_geoarrow_array_two_args { + ($array1:ident, $array2:ident, $fn:expr $(, $args:expr )* $(,)?) => { + match $array1.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::LineString(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::Polygon(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::MultiPoint(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::MultiLineString(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::MultiPolygon(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::Geometry(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::GeometryCollection(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::Rect(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_rect($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::Wkb(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::LargeWkb(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::WkbView(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::Wkt(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::LargeWkt(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + geoarrow_schema::GeoArrowType::WktView(_) => match $array2.data_type() { + geoarrow_schema::GeoArrowType::Point(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Polygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPoint(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_point($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiLineString(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_line_string($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::MultiPolygon(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_multi_polygon($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Geometry(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::GeometryCollection(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_geometry_collection($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Rect(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_rect($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkb(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WkbView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkb_view($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::Wkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::LargeWkt(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt::($array2) + $(, $args )* + ), + geoarrow_schema::GeoArrowType::WktView(_) => $fn( + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array1), + geoarrow_array::cast::AsGeoArrowArray::as_wkt_view($array2) + $(, $args )* + ), + }, + } + }; +} + +pub(crate) use downcast_geoarrow_array_two_args; + +// #[cfg(test)] +// mod tests { +// use geoarrow_array::GeoArrowArray; +// use geoarrow_schema::error::GeoArrowResult; + +// // Ensure macro gets called, so an error will appear to ensure exhaustiveness +// #[allow(dead_code)] +// fn _test_two_args_macro_exhaustiveness( +// arr1: &dyn GeoArrowArray, +// arr2: &dyn GeoArrowArray, +// ) -> GeoArrowResult<()> { +// downcast_geoarrow_array_two_args!(arr1, arr2, |_a1, _a2| Ok(())) +// } +// } diff --git a/src/geoarrow/geoarrow-expr-geo/src/util/mod.rs b/src/geoarrow/geoarrow-expr-geo/src/util/mod.rs new file mode 100644 index 0000000000..0e52728fb2 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/util/mod.rs @@ -0,0 +1,10 @@ +use std::sync::Arc; + +use geoarrow_array::GeoArrowArray; + +pub(crate) mod downcast; +pub mod to_geo; + +pub(crate) fn copy_geoarrow_array_ref(array: &dyn GeoArrowArray) -> Arc { + array.slice(0, array.len()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/util/to_geo.rs b/src/geoarrow/geoarrow-expr-geo/src/util/to_geo.rs new file mode 100644 index 0000000000..1db9ef1497 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/util/to_geo.rs @@ -0,0 +1,68 @@ +//! Convert structs that implement [geo_traits] to [geo] objects. +//! +//! Note that this is the same underlying implementation as upstream [geo] in +//! . However, the trait-based implementation hits this +//! compiler regression , +//! , which prevents from compiling in release +//! mode on a stable Rust version. For some reason, the **function-based implementation** does not +//! hit this regression, and thus allows building geoarrow without using latest nightly and a +//! custom `RUSTFLAGS`. +//! +//! Note that it's only `GeometryTrait` and `GeometryCollectionTrait` that hit this compiler bug. +//! Other traits can use the upstream impls. + +use geo::{CoordNum, Geometry, GeometryCollection}; +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, GeometryType, + to_geo::{ + ToGeoLine, ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, + ToGeoPoint, ToGeoPolygon, ToGeoRect, ToGeoTriangle, + }, +}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +/// Convert any [geo_traits] Geometry to a [`geo::Geometry`]. +/// +/// Only the first two dimensions will be kept. +pub fn geometry_to_geo( + geometry: &impl GeometryTrait, +) -> GeoArrowResult> { + use GeometryType::*; + + match geometry.as_type() { + Point(geom) => Ok(Geometry::Point(geom.try_to_point().ok_or( + GeoArrowError::IncorrectGeometryType( + "geo crate does not support empty points.".to_string(), + ), + )?)), + LineString(geom) => Ok(Geometry::LineString(geom.to_line_string())), + Polygon(geom) => Ok(Geometry::Polygon(geom.to_polygon())), + MultiPoint(geom) => Ok(Geometry::MultiPoint(geom.try_to_multi_point().ok_or( + GeoArrowError::IncorrectGeometryType( + "geo crate does not support empty points.".to_string(), + ), + )?)), + MultiLineString(geom) => Ok(Geometry::MultiLineString(geom.to_multi_line_string())), + MultiPolygon(geom) => Ok(Geometry::MultiPolygon(geom.to_multi_polygon())), + GeometryCollection(geom) => Ok(Geometry::GeometryCollection(geometry_collection_to_geo( + geom, + )?)), + Rect(geom) => Ok(Geometry::Rect(geom.to_rect())), + Line(geom) => Ok(Geometry::Line(geom.to_line())), + Triangle(geom) => Ok(Geometry::Triangle(geom.to_triangle())), + } +} + +/// Convert any GeometryCollection to a [`GeometryCollection`]. +/// +/// Only the first two dimensions will be kept. +fn geometry_collection_to_geo( + geometry_collection: &impl GeometryCollectionTrait, +) -> GeoArrowResult> { + Ok(GeometryCollection::new_from( + geometry_collection + .geometries() + .map(|geometry| geometry_to_geo(&geometry)) + .collect::>()?, + )) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/validation.rs b/src/geoarrow/geoarrow-expr-geo/src/validation.rs new file mode 100644 index 0000000000..d497c13a29 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/validation.rs @@ -0,0 +1,53 @@ +use arrow_array::{ + BooleanArray, StringViewArray, + builder::{BooleanBuilder, StringViewBuilder}, +}; +use geo::Validation; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +use crate::util::to_geo::geometry_to_geo; + +pub fn is_valid(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _is_valid_impl) +} + +fn _is_valid_impl<'a>(array: &'a impl GeoArrowArrayAccessor<'a>) -> GeoArrowResult { + let mut builder = BooleanBuilder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + builder.append_value(geo_geom.is_valid()); + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} + +pub fn is_valid_reason(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _is_valid_reason_impl) +} + +fn _is_valid_reason_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = StringViewBuilder::with_capacity(array.len()); + + for item in array.iter() { + if let Some(geom) = item { + let geo_geom = geometry_to_geo(&geom?)?; + if let Err(err) = geo_geom.check_validation() { + builder.append_value(err.to_string()); + } else { + builder.append_value("Valid Geometry"); + } + } else { + builder.append_null(); + } + } + + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/vincenty_length.rs b/src/geoarrow/geoarrow-expr-geo/src/vincenty_length.rs new file mode 100644 index 0000000000..20d6029416 --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/vincenty_length.rs @@ -0,0 +1,40 @@ +use arrow_array::Float64Array; +use geo::VincentyLength; +use geo_traits::{ + GeometryTrait, + to_geo::{ToGeoLine, ToGeoLineString, ToGeoMultiLineString}, +}; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; + +pub fn vincenty_length(array: &dyn GeoArrowArray) -> GeoArrowResult { + downcast_geoarrow_array!(array, _vincenty_length_impl) +} + +fn _vincenty_length_impl<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut result = Float64Array::builder(array.len()); + for geom in array.iter() { + if let Some(geom) = geom { + match geom?.as_type() { + geo_traits::GeometryType::Line(l) => { + let length = l.to_line().vincenty_length().unwrap_or(0.0); + result.append_value(length); + } + geo_traits::GeometryType::LineString(ls) => { + let length = ls.to_line_string().vincenty_length().unwrap_or(0.0); + result.append_value(length); + } + geo_traits::GeometryType::MultiLineString(mls) => { + let length = mls.to_multi_line_string().vincenty_length().unwrap_or(0.0); + result.append_value(length); + } + _ => result.append_value(0.0), + } + } else { + result.append_null(); + } + } + Ok(result.finish()) +} diff --git a/src/geoarrow/geoarrow-expr-geo/src/within.rs b/src/geoarrow/geoarrow-expr-geo/src/within.rs new file mode 100644 index 0000000000..800270c82d --- /dev/null +++ b/src/geoarrow/geoarrow-expr-geo/src/within.rs @@ -0,0 +1,41 @@ +use arrow_array::BooleanArray; +use geo::Within; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor}; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; + +use crate::util::{downcast::downcast_geoarrow_array_two_args, to_geo::geometry_to_geo}; + +pub fn within( + left_array: &dyn GeoArrowArray, + right_array: &dyn GeoArrowArray, +) -> GeoArrowResult { + if left_array.len() != right_array.len() { + Err(GeoArrowError::InvalidGeoArrow( + "Arrays must have the same length".to_string(), + )) + } else { + downcast_geoarrow_array_two_args!(left_array, right_array, _within_impl) + } +} + +fn _within_impl<'a>( + left_array: &'a impl GeoArrowArrayAccessor<'a>, + right_array: &'a impl GeoArrowArrayAccessor<'a>, +) -> GeoArrowResult { + let mut builder = BooleanArray::builder(left_array.len()); + + for (maybe_left, maybe_right) in left_array.iter().zip(right_array.iter()) { + match (maybe_left, maybe_right) { + (Some(left), Some(right)) => { + let left_geom = geometry_to_geo(&left?)?; + let right_geom = geometry_to_geo(&right?)?; + let result = left_geom.is_within(&right_geom); + builder.append_value(result); + } + _ => { + builder.append_null(); + } + } + } + Ok(builder.finish()) +} diff --git a/src/geoarrow/geoarrow-schema/Cargo.toml b/src/geoarrow/geoarrow-schema/Cargo.toml new file mode 100644 index 0000000000..aedc3c671c --- /dev/null +++ b/src/geoarrow/geoarrow-schema/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "geoarrow-schema" +version = "0.7.0" +authors = ["Kyle Barron "] +edition = "2024" +license = "MIT OR Apache-2.0" +repository = "https://github.com/geoarrow/geoarrow-rs" +description = "GeoArrow geometry type and metadata definitions." + +[dependencies] +arrow-schema = {workspace = true} +geo-traits = {workspace = true} +serde = {workspace = true} +serde_json = {workspace = true} +thiserror = {workspace = true} diff --git a/src/geoarrow/geoarrow-schema/src/coord_type.rs b/src/geoarrow/geoarrow-schema/src/coord_type.rs new file mode 100644 index 0000000000..06b3ccae90 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/coord_type.rs @@ -0,0 +1,30 @@ +/// The permitted GeoArrow coordinate representations. +/// +/// GeoArrow permits coordinate types to either be "Interleaved", where the X and Y coordinates are +/// in a single buffer as `XYXYXY` or "Separated", where the X and Y coordinates are in multiple +/// buffers as `XXXX` and `YYYY`. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub enum CoordType { + /// Interleaved coordinates. + /// + /// This stores coordinates in an Arrow + /// [fixed-size-list-typed][arrow_schema::DataType::FixedSizeList] array. + /// + /// The size of the internal fixed-size list depends on the [dimension][crate::Dimension] of + /// the array. + /// + /// ```notest + /// FixedSizeList[n_dim] + /// ``` + Interleaved, + + /// Separated coordinates. + /// + /// This stores coordinates in an Arrow [struct-typed][arrow_schema::DataType::Struct] array: + /// + /// ```notest + /// Struct]] + /// ``` + #[default] + Separated, +} diff --git a/src/geoarrow/geoarrow-schema/src/crs.rs b/src/geoarrow/geoarrow-schema/src/crs.rs new file mode 100644 index 0000000000..2d8b869268 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/crs.rs @@ -0,0 +1,310 @@ +//! Defines GeoArrow CRS metadata and CRS transforms used for writing GeoArrow data to file formats +//! that require different CRS representations. + +use std::fmt::Debug; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::error::{GeoArrowError, GeoArrowResult}; + +/// Coordinate Reference System information. +/// +/// As of GeoArrow version 0.2, GeoArrow supports various CRS representations: +/// +/// - A JSON object describing the coordinate reference system (CRS) +/// using [PROJJSON](https://proj.org/specifications/projjson.html). +/// - A string containing a serialized CRS representation. This option +/// is intended as a fallback for producers (e.g., database drivers or +/// file readers) that are provided a CRS in some form but do not have the +/// means to convert it to PROJJSON. +/// - Omitted, indicating that the producer does not have any information about +/// the CRS. +/// +/// For maximum compatibility, producers should write PROJJSON. +/// +/// Note that regardless of the axis order specified by the CRS, axis order will be interpreted +/// according to the wording in the [GeoPackage WKB binary +/// encoding](https://www.geopackage.org/spec130/index.html#gpb_format): axis order is always +/// (longitude, latitude) and (easting, northing) regardless of the the axis order encoded in the +/// CRS specification. +/// +/// Note that [`PartialEq`] and [`Eq`] currently use their default, derived implementations, so +/// only `Crs` that are structurally exactly equal will compare as equal. Two different +/// representations of the same logical CRS will not compare as equal. +#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct Crs { + /// One of: + /// + /// - A JSON object describing the coordinate reference system (CRS) + /// using [PROJJSON](https://proj.org/specifications/projjson.html). + /// - A string containing a serialized CRS representation. This option + /// is intended as a fallback for producers (e.g., database drivers or + /// file readers) that are provided a CRS in some form but do not have the + /// means to convert it to PROJJSON. + /// - Omitted, indicating that the producer does not have any information about + /// the CRS. + /// + /// For maximum compatibility, producers should write PROJJSON where possible. + /// Note that regardless of the axis order specified by the CRS, axis order will be interpreted + /// [GeoPackage WKB binary encoding](https://www.geopackage.org/spec130/index.html#gpb_format): + /// axis order is always (longitude, latitude) and (easting, northing) + /// regardless of the the axis order encoded in the CRS specification. + crs: Option, + + /// An optional string disambiguating the value of the `crs` field. + /// + /// The `"crs_type"` should be omitted if the producer cannot guarantee the validity + /// of any of the above values (e.g., if it just serialized a CRS object + /// specifically into one of these representations). + #[serde(skip_serializing_if = "Option::is_none")] + crs_type: Option, +} + +impl Crs { + /// Construct from a PROJJSON object. + /// + /// Note that `value` should be a _parsed_ JSON object; this should not contain + /// `Value::String`. + pub fn from_projjson(value: Value) -> Self { + Self { + crs: Some(value), + crs_type: Some(CrsType::Projjson), + } + } + + /// Construct from a WKT:2019 string. + pub fn from_wkt2_2019(value: String) -> Self { + Self { + crs: Some(Value::String(value)), + crs_type: Some(CrsType::Wkt2_2019), + } + } + + /// Construct from an opaque string. + pub fn from_unknown_crs_type(value: String) -> Self { + Self { + crs: Some(Value::String(value)), + crs_type: None, + } + } + + /// Construct from an authority:code string. + pub fn from_authority_code(value: String) -> Self { + assert!(value.contains(':'), "':' should be authority:code CRS"); + Self { + crs: Some(Value::String(value)), + crs_type: Some(CrsType::AuthorityCode), + } + } + + /// Construct from an opaque string identifier + pub fn from_srid(value: String) -> Self { + Self { + crs: Some(Value::String(value)), + crs_type: Some(CrsType::Srid), + } + } + + /// Access the underlying [CrsType]. + pub fn crs_type(&self) -> Option { + self.crs_type + } + + /// Access the underlying CRS value. + /// + /// The return value is one of: + /// + /// - A JSON object ([`Value::Object`]) describing the coordinate reference system (CRS) + /// using [PROJJSON](https://proj.org/specifications/projjson.html). + /// - A string ([`Value::String`]) containing a serialized CRS representation. This option + /// is intended as a fallback for producers (e.g., database drivers or + /// file readers) that are provided a CRS in some form but do not have the + /// means to convert it to PROJJSON. + /// - Omitted, indicating that the producer does not have any information about + /// the CRS. + /// + /// Consult [`crs_type`][Self::crs_type] to accurately determine the CRS type. + pub fn crs_value(&self) -> Option<&Value> { + self.crs.as_ref() + } + + /// Return `true` if we should include a CRS key in the GeoArrow metadata + pub(crate) fn should_serialize(&self) -> bool { + self.crs.is_some() + } +} + +/// An optional string disambiguating the value of the `crs` field. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum CrsType { + /// Indicates that the `"crs"` field was written as + /// [PROJJSON](https://proj.org/specifications/projjson.html). + #[serde(rename = "projjson")] + Projjson, + + /// Indicates that the `"crs"` field was written as + /// [WKT2:2019](https://www.ogc.org/publications/standard/wkt-crs/). + #[serde(rename = "wkt2:2019")] + Wkt2_2019, + + /// Indicates that the `"crs"` field contains an identifier + /// in the form `AUTHORITY:CODE`. This should only be used as a last resort + /// (i.e., producers should prefer writing a complete description of the CRS). + #[serde(rename = "authority_code")] + AuthorityCode, + + /// Indicates that the `"crs"` field contains an opaque identifier + /// that requires the consumer to communicate with the producer outside of + /// this metadata. This should only be used as a last resort for database + /// drivers or readers that have no other option. + #[serde(rename = "srid")] + Srid, +} + +/// CRS transforms used for writing GeoArrow data to file formats that require different CRS +/// representations. +pub trait CrsTransform: Debug { + /// Convert the CRS contained in this Metadata to a PROJJSON object. + /// + /// Users should prefer calling `extract_projjson`, which will first unwrap the underlying + /// array metadata if it's already PROJJSON. + fn _convert_to_projjson(&self, crs: &Crs) -> GeoArrowResult>; + + /// Convert the CRS contained in this Metadata to a WKT string. + /// + /// Users should prefer calling `extract_wkt`, which will first unwrap the underlying + /// array metadata if it's already PROJJSON. + fn _convert_to_wkt(&self, crs: &Crs) -> GeoArrowResult>; + + /// Extract PROJJSON from the provided metadata. + /// + /// If the CRS is already stored as PROJJSON, this will return that. Otherwise it will call + /// [`Self::_convert_to_projjson`]. + fn extract_projjson(&self, crs: &Crs) -> GeoArrowResult> { + match crs.crs_type() { + Some(CrsType::Projjson) => Ok(crs.crs_value().cloned()), + _ => self._convert_to_projjson(crs), + } + } + + /// Extract WKT from the provided metadata. + /// + /// If the CRS is already stored as WKT, this will return that. Otherwise it will call + /// [`Self::_convert_to_wkt`]. + #[allow(clippy::collapsible_if)] + fn extract_wkt(&self, crs: &Crs) -> GeoArrowResult> { + if let (Some(crs), Some(crs_type)) = (crs.crs_value(), crs.crs_type()) { + if crs_type == CrsType::Wkt2_2019 { + if let Value::String(inner) = crs { + return Ok::<_, GeoArrowError>(Some(inner.clone())); + } + } + } + + self._convert_to_wkt(crs) + } +} + +/// A default implementation for [CrsTransform] which does not do any CRS conversion. +/// +/// Instead of raising an error, this will **silently drop any CRS information when writing data**. +#[derive(Debug, Clone, Default)] +pub struct DefaultCrsTransform {} + +impl CrsTransform for DefaultCrsTransform { + fn _convert_to_projjson(&self, _crs: &Crs) -> GeoArrowResult> { + // Unable to convert CRS to PROJJSON + // So we proceed with missing CRS + // TODO: we should probably log this. + Ok(None) + } + + fn _convert_to_wkt(&self, _crs: &Crs) -> GeoArrowResult> { + // Unable to convert CRS to WKT + // So we proceed with missing CRS + // TODO: we should probably log this. + Ok(None) + } +} + +// #[cfg(test)] +// mod test { +// use serde_json::json; + +// use super::*; + +// #[test] +// fn crs_omitted() { +// let crs = Crs::default(); +// assert!(crs.crs_value().is_none()); +// assert!(crs.crs_type().is_none()); +// assert!(!crs.should_serialize()); +// } + +// #[test] +// fn crs_projjson() { +// let crs = Crs::from_projjson(json!({})); +// assert!(crs.crs_value().is_some_and(|x| x.is_object())); +// assert!( +// crs.crs_type() +// .is_some_and(|x| matches!(x, CrsType::Projjson)) +// ); +// assert!(crs.should_serialize()); +// assert_eq!( +// serde_json::to_string(&crs).unwrap(), +// r#"{"crs":{},"crs_type":"projjson"}"# +// ); +// } + +// #[test] +// fn crs_wkt2() { +// let crs = Crs::from_wkt2_2019("TESTCRS[]".to_string()); +// assert_eq!( +// crs.crs_value(), +// Some(&Value::String("TESTCRS[]".to_string())) +// ); +// assert!(matches!(crs.crs_type(), Some(CrsType::Wkt2_2019))); +// assert!(crs.should_serialize()); +// assert_eq!( +// serde_json::to_string(&crs).unwrap(), +// r#"{"crs":"TESTCRS[]","crs_type":"wkt2:2019"}"# +// ); +// } + +// #[test] +// fn crs_authority_code() { +// let crs = Crs::from_authority_code("GEOARROW:1234".to_string()); +// assert_eq!( +// crs.crs_value(), +// Some(&Value::String("GEOARROW:1234".to_string())) +// ); +// assert!(matches!(crs.crs_type(), Some(CrsType::AuthorityCode))); +// assert!(crs.should_serialize()); +// assert_eq!( +// serde_json::to_string(&crs).unwrap(), +// r#"{"crs":"GEOARROW:1234","crs_type":"authority_code"}"# +// ); +// } + +// #[test] +// fn crs_srid() { +// let crs = Crs::from_srid("1234".to_string()); +// assert_eq!(crs.crs_value(), Some(&Value::String("1234".to_string())),); +// assert!(matches!(crs.crs_type(), Some(CrsType::Srid))); +// assert!(crs.should_serialize()); +// assert_eq!( +// serde_json::to_string(&crs).unwrap(), +// r#"{"crs":"1234","crs_type":"srid"}"# +// ); +// } + +// #[test] +// fn crs_unknown() { +// let crs = Crs::from_unknown_crs_type("1234".to_string()); +// assert_eq!(crs.crs_value(), Some(&Value::String("1234".to_string())),); +// assert!(crs.crs_type().is_none()); +// assert!(crs.should_serialize()); +// assert_eq!(serde_json::to_string(&crs).unwrap(), r#"{"crs":"1234"}"#); +// } +// } diff --git a/src/geoarrow/geoarrow-schema/src/datatype.rs b/src/geoarrow/geoarrow-schema/src/datatype.rs new file mode 100644 index 0000000000..d3fc713e4d --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/datatype.rs @@ -0,0 +1,433 @@ +//! Contains the implementation of [`GeoArrowType`], which defines all geometry arrays in this +//! crate. + +use std::sync::Arc; + +use arrow_schema::{DataType, Field, extension::ExtensionType}; + +use crate::{ + BoxType, CoordType, Dimension, GeometryCollectionType, GeometryType, LineStringType, Metadata, + MultiLineStringType, MultiPointType, MultiPolygonType, PointType, PolygonType, WkbType, + WktType, + error::{GeoArrowError, GeoArrowResult}, +}; + +/// Geospatial data types supported by GeoArrow. +/// +/// The variants of this enum include all possible GeoArrow geometry types, including both "native" +/// and "serialized" encodings. +/// +/// Each variant uniquely identifies the physical buffer layout for the respective array type. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum GeoArrowType { + /// A Point. + Point(PointType), + + /// A LineString. + LineString(LineStringType), + + /// A Polygon. + Polygon(PolygonType), + + /// A MultiPoint. + MultiPoint(MultiPointType), + + /// A MultiLineString. + MultiLineString(MultiLineStringType), + + /// A MultiPolygon. + MultiPolygon(MultiPolygonType), + + /// A GeometryCollection. + GeometryCollection(GeometryCollectionType), + + /// A Rect. + Rect(BoxType), + + /// A Geometry with unknown types or dimensions. + Geometry(GeometryType), + + /// A WKB stored in a `BinaryArray` with `i32` offsets. + Wkb(WkbType), + + /// A WKB stored in a `LargeBinaryArray` with `i64` offsets. + LargeWkb(WkbType), + + /// A WKB stored in a `BinaryViewArray`. + WkbView(WkbType), + + /// A WKT stored in a `StringArray` with `i32` offsets. + Wkt(WktType), + + /// A WKT stored in a `LargeStringArray` with `i64` offsets. + LargeWkt(WktType), + + /// A WKT stored in a `StringViewArray`. + WktView(WktType), +} + +impl From for DataType { + fn from(value: GeoArrowType) -> Self { + value.to_data_type() + } +} + +impl GeoArrowType { + /// Get the [`CoordType`] of this data type. + /// + /// WKB and WKT variants will return `None`. + pub fn coord_type(&self) -> Option { + use GeoArrowType::*; + match self { + Point(t) => Some(t.coord_type()), + LineString(t) => Some(t.coord_type()), + Polygon(t) => Some(t.coord_type()), + MultiPoint(t) => Some(t.coord_type()), + MultiLineString(t) => Some(t.coord_type()), + MultiPolygon(t) => Some(t.coord_type()), + GeometryCollection(t) => Some(t.coord_type()), + Rect(_) => Some(CoordType::Separated), + Geometry(t) => Some(t.coord_type()), + Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => None, + } + } + + /// Get the [`Dimension`] of this data type, if it has one. + /// + /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return `None`. + pub fn dimension(&self) -> Option { + use GeoArrowType::*; + match self { + Point(t) => Some(t.dimension()), + LineString(t) => Some(t.dimension()), + Polygon(t) => Some(t.dimension()), + MultiPoint(t) => Some(t.dimension()), + MultiLineString(t) => Some(t.dimension()), + MultiPolygon(t) => Some(t.dimension()), + GeometryCollection(t) => Some(t.dimension()), + Rect(t) => Some(t.dimension()), + Geometry(_) | Wkb(_) | LargeWkb(_) | WkbView(_) | Wkt(_) | LargeWkt(_) | WktView(_) => { + None + } + } + } + + /// Returns the [Metadata] contained within this type. + pub fn metadata(&self) -> &Arc { + use GeoArrowType::*; + match self { + Point(t) => t.metadata(), + LineString(t) => t.metadata(), + Polygon(t) => t.metadata(), + MultiPoint(t) => t.metadata(), + MultiLineString(t) => t.metadata(), + MultiPolygon(t) => t.metadata(), + GeometryCollection(t) => t.metadata(), + Rect(t) => t.metadata(), + Geometry(t) => t.metadata(), + Wkb(t) | LargeWkb(t) | WkbView(t) => t.metadata(), + Wkt(t) | LargeWkt(t) | WktView(t) => t.metadata(), + } + } + /// Converts a [`GeoArrowType`] into the relevant arrow [`DataType`]. + /// + /// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not + /// part of a [`Field`] with GeoArrow extension metadata in its field metadata. + /// + /// # Examples + /// + /// ``` + /// # use arrow_schema::DataType; + /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType}; + /// # + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let data_type = GeoArrowType::Point(point_type).to_data_type(); + /// assert!(matches!(data_type, DataType::Struct(_))); + /// ``` + pub fn to_data_type(&self) -> DataType { + use GeoArrowType::*; + match self { + Point(t) => t.data_type(), + LineString(t) => t.data_type(), + Polygon(t) => t.data_type(), + MultiPoint(t) => t.data_type(), + MultiLineString(t) => t.data_type(), + MultiPolygon(t) => t.data_type(), + GeometryCollection(t) => t.data_type(), + Rect(t) => t.data_type(), + Geometry(t) => t.data_type(), + Wkb(_) => DataType::Binary, + LargeWkb(_) => DataType::LargeBinary, + WkbView(_) => DataType::BinaryView, + Wkt(_) => DataType::Utf8, + LargeWkt(_) => DataType::LargeUtf8, + WktView(_) => DataType::Utf8View, + } + } + + /// Converts this [`GeoArrowType`] into an arrow [`Field`], maintaining GeoArrow extension + /// metadata. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType}; + /// # + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let geoarrow_type = GeoArrowType::Point(point_type); + /// let field = geoarrow_type.to_field("geometry", true); + /// assert_eq!(field.name(), "geometry"); + /// assert!(field.is_nullable()); + /// assert_eq!(field.metadata()["ARROW:extension:name"], "geoarrow.point"); + /// ``` + pub fn to_field>(&self, name: N, nullable: bool) -> Field { + use GeoArrowType::*; + match self { + Point(t) => t.to_field(name, nullable), + LineString(t) => t.to_field(name, nullable), + Polygon(t) => t.to_field(name, nullable), + MultiPoint(t) => t.to_field(name, nullable), + MultiLineString(t) => t.to_field(name, nullable), + MultiPolygon(t) => t.to_field(name, nullable), + GeometryCollection(t) => t.to_field(name, nullable), + Rect(t) => t.to_field(name, nullable), + Geometry(t) => t.to_field(name, nullable), + Wkb(t) | LargeWkb(t) | WkbView(t) => { + Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone()) + } + Wkt(t) | LargeWkt(t) | WktView(t) => { + Field::new(name, self.to_data_type(), nullable).with_extension_type(t.clone()) + } + } + } + + /// Applies the provided [CoordType] onto self. + /// + /// [`Rect`][Self::Rect] and WKB and WKT variants will return the same type as they do not have + /// a parameterized coordinate types. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_schema::{CoordType, Dimension, GeoArrowType, PointType}; + /// # + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let geoarrow_type = GeoArrowType::Point(point_type); + /// let new_type = geoarrow_type.with_coord_type(CoordType::Separated); + /// + /// assert_eq!(new_type.coord_type(), Some(CoordType::Separated)); + /// ``` + pub fn with_coord_type(self, coord_type: CoordType) -> GeoArrowType { + use GeoArrowType::*; + match self { + Point(t) => Point(t.with_coord_type(coord_type)), + LineString(t) => LineString(t.with_coord_type(coord_type)), + Polygon(t) => Polygon(t.with_coord_type(coord_type)), + MultiPoint(t) => MultiPoint(t.with_coord_type(coord_type)), + MultiLineString(t) => MultiLineString(t.with_coord_type(coord_type)), + MultiPolygon(t) => MultiPolygon(t.with_coord_type(coord_type)), + GeometryCollection(t) => GeometryCollection(t.with_coord_type(coord_type)), + Rect(t) => Rect(t), + Geometry(t) => Geometry(t.with_coord_type(coord_type)), + _ => self, + } + } + + /// Applies the provided [Dimension] onto self. + /// + /// [`Geometry`][Self::Geometry] and WKB and WKT variants will return the same type as they do + /// not have a parameterized dimension. + /// + /// # Examples + /// + /// ``` + /// # use geoarrow_schema::{Dimension, GeoArrowType, PointType}; + /// # + /// let point_type = PointType::new(Dimension::XY, Default::default()); + /// let geoarrow_type = GeoArrowType::Point(point_type); + /// let new_type = geoarrow_type.with_dimension(Dimension::XYZ); + /// + /// assert_eq!(new_type.dimension(), Some(Dimension::XYZ)); + /// ``` + pub fn with_dimension(self, dim: Dimension) -> GeoArrowType { + use GeoArrowType::*; + match self { + Point(t) => Point(t.with_dimension(dim)), + LineString(t) => LineString(t.with_dimension(dim)), + Polygon(t) => Polygon(t.with_dimension(dim)), + MultiPoint(t) => MultiPoint(t.with_dimension(dim)), + MultiLineString(t) => MultiLineString(t.with_dimension(dim)), + MultiPolygon(t) => MultiPolygon(t.with_dimension(dim)), + GeometryCollection(t) => GeometryCollection(t.with_dimension(dim)), + Rect(t) => Rect(t.with_dimension(dim)), + Geometry(t) => Geometry(t), + _ => self, + } + } + + /// Applies the provided [Metadata] onto self. + pub fn with_metadata(self, meta: Arc) -> GeoArrowType { + use GeoArrowType::*; + match self { + Point(t) => Point(t.with_metadata(meta)), + LineString(t) => LineString(t.with_metadata(meta)), + Polygon(t) => Polygon(t.with_metadata(meta)), + MultiPoint(t) => MultiPoint(t.with_metadata(meta)), + MultiLineString(t) => MultiLineString(t.with_metadata(meta)), + MultiPolygon(t) => MultiPolygon(t.with_metadata(meta)), + GeometryCollection(t) => GeometryCollection(t.with_metadata(meta)), + Rect(t) => Rect(t.with_metadata(meta)), + Geometry(t) => Geometry(t.with_metadata(meta)), + Wkb(t) => Wkb(t.with_metadata(meta)), + LargeWkb(t) => LargeWkb(t.with_metadata(meta)), + WkbView(t) => WkbView(t.with_metadata(meta)), + Wkt(t) => Wkt(t.with_metadata(meta)), + LargeWkt(t) => LargeWkt(t.with_metadata(meta)), + WktView(t) => WktView(t.with_metadata(meta)), + } + } + + /// Create a new [`GeoArrowType`] from an Arrow [`Field`], requiring GeoArrow metadata to be + /// set. + /// + /// If the field does not have at least a GeoArrow extension name, an error will be returned. + /// + /// Create a new [`GeoArrowType`] from an Arrow [`Field`]. + /// + /// This method requires GeoArrow metadata to be correctly set. If you wish to allow data type + /// coercion without GeoArrow metadata, use [`GeoArrowType::from_arrow_field`] instead. + /// + /// - An `Ok(Some(_))` return value indicates that the field has valid GeoArrow extension metadata, and thus was able to match to a specific GeoArrow type. + /// - An `Ok(None)` return value indicates that the field either does not have any Arrow extension name or the extension name is not a GeoArrow extension name. + /// - An `Err` return value indicates that the field has a GeoArrow extension name, but it is + /// invalid. This can happen if the field's [`DataType`] is not compatible with the allowed + /// types for the given GeoArrow type, or if the GeoArrow metadata is malformed. + pub fn from_extension_field(field: &Field) -> GeoArrowResult> { + if let Some(extension_name) = field.extension_type_name() { + use GeoArrowType::*; + let data_type = match extension_name { + PointType::NAME => Point(field.try_extension_type()?), + LineStringType::NAME => LineString(field.try_extension_type()?), + PolygonType::NAME => Polygon(field.try_extension_type()?), + MultiPointType::NAME => MultiPoint(field.try_extension_type()?), + MultiLineStringType::NAME => MultiLineString(field.try_extension_type()?), + MultiPolygonType::NAME => MultiPolygon(field.try_extension_type()?), + GeometryCollectionType::NAME => GeometryCollection(field.try_extension_type()?), + BoxType::NAME => Rect(field.try_extension_type()?), + GeometryType::NAME => Geometry(field.try_extension_type()?), + WkbType::NAME => match field.data_type() { + DataType::Binary => Wkb(field.try_extension_type()?), + DataType::LargeBinary => LargeWkb(field.try_extension_type()?), + DataType::BinaryView => WkbView(field.try_extension_type()?), + _ => { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Expected binary type for a field with extension name 'geoarrow.wkb', got '{}'", + field.data_type() + ))); + } + }, + WktType::NAME => match field.data_type() { + DataType::Utf8 => Wkt(field.try_extension_type()?), + DataType::LargeUtf8 => LargeWkt(field.try_extension_type()?), + DataType::Utf8View => WktView(field.try_extension_type()?), + _ => { + return Err(GeoArrowError::InvalidGeoArrow(format!( + "Expected string type for a field with extension name 'geoarrow.wkt', got '{}'", + field.data_type() + ))); + } + }, + _ => return Ok(None), + }; + Ok(Some(data_type)) + } else { + Ok(None) + } + } + + /// Create a new [`GeoArrowType`] from an Arrow [`Field`], inferring the GeoArrow type if + /// GeoArrow metadata is not present. + /// + /// This will first try [`GeoArrowType::from_extension_field`], and if that fails, will try to + /// infer the GeoArrow type from the field's [DataType]. This only works for Point, WKB, and + /// WKT types, as those are the only types that can be unambiguously inferred from an Arrow + /// [DataType]. + pub fn from_arrow_field(field: &Field) -> GeoArrowResult { + use GeoArrowType::*; + if let Some(geo_type) = Self::from_extension_field(field)? { + Ok(geo_type) + } else { + let metadata = Arc::new(Metadata::try_from(field)?); + let data_type = match field.data_type() { + DataType::Struct(struct_fields) => { + if !struct_fields.iter().all(|f| matches!(f.data_type(), DataType::Float64) ) { + return Err(GeoArrowError::InvalidGeoArrow("all struct fields must be Float64 when inferring point type.".to_string())); + } + + match struct_fields.len() { + 2 => GeoArrowType::Point(PointType::new( Dimension::XY, metadata).with_coord_type(CoordType::Separated)), + 3 => GeoArrowType::Point(PointType::new( Dimension::XYZ, metadata).with_coord_type(CoordType::Separated)), + 4 => GeoArrowType::Point(PointType::new( Dimension::XYZM, metadata).with_coord_type(CoordType::Separated)), + l => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid number of struct fields: {l}"))), + } + }, + DataType::FixedSizeList(inner_field, list_size) => { + if !matches!(inner_field.data_type(), DataType::Float64 ) { + return Err(GeoArrowError::InvalidGeoArrow(format!("invalid inner field type of fixed size list: {}", inner_field.data_type()))); + } + + match list_size { + 2 => GeoArrowType::Point(PointType::new(Dimension::XY, metadata).with_coord_type(CoordType::Interleaved)), + 3 => GeoArrowType::Point(PointType::new(Dimension::XYZ, metadata).with_coord_type(CoordType::Interleaved)), + 4 => GeoArrowType::Point(PointType::new(Dimension::XYZM, metadata).with_coord_type(CoordType::Interleaved)), + _ => return Err(GeoArrowError::InvalidGeoArrow(format!("invalid list_size: {list_size}"))), + } + }, + DataType::Binary => Wkb(WkbType::new(metadata)), + DataType::LargeBinary => LargeWkb(WkbType::new(metadata)), + DataType::BinaryView => WkbView(WkbType::new(metadata)), + DataType::Utf8 => Wkt(WktType::new(metadata)), + DataType::LargeUtf8 => LargeWkt(WktType::new(metadata)), + DataType::Utf8View => WktView(WktType::new(metadata)), + _ => return Err(GeoArrowError::InvalidGeoArrow("Only FixedSizeList, Struct, Binary, LargeBinary, BinaryView, String, LargeString, and StringView arrays are unambiguously typed for a GeoArrow type and can be used without extension metadata.\nEnsure your array input has GeoArrow metadata.".to_string())), + }; + + Ok(data_type) + } + } +} + +macro_rules! impl_into_geoarrowtype { + ($source_type:ident, $variant:expr) => { + impl From<$source_type> for GeoArrowType { + fn from(value: $source_type) -> Self { + $variant(value) + } + } + }; +} + +impl_into_geoarrowtype!(PointType, GeoArrowType::Point); +impl_into_geoarrowtype!(LineStringType, GeoArrowType::LineString); +impl_into_geoarrowtype!(PolygonType, GeoArrowType::Polygon); +impl_into_geoarrowtype!(MultiPointType, GeoArrowType::MultiPoint); +impl_into_geoarrowtype!(MultiLineStringType, GeoArrowType::MultiLineString); +impl_into_geoarrowtype!(MultiPolygonType, GeoArrowType::MultiPolygon); +impl_into_geoarrowtype!(GeometryCollectionType, GeoArrowType::GeometryCollection); +impl_into_geoarrowtype!(BoxType, GeoArrowType::Rect); +impl_into_geoarrowtype!(GeometryType, GeoArrowType::Geometry); + +impl TryFrom<&Field> for GeoArrowType { + type Error = GeoArrowError; + + fn try_from(field: &Field) -> GeoArrowResult { + if let Some(geo_type) = Self::from_extension_field(field)? { + Ok(geo_type) + } else { + Err(GeoArrowError::InvalidGeoArrow( + "Expected GeoArrow extension metadata, found none or unsupported extension." + .to_string(), + )) + } + } +} diff --git a/src/geoarrow/geoarrow-schema/src/dimension.rs b/src/geoarrow/geoarrow-schema/src/dimension.rs new file mode 100644 index 0000000000..89c8ab7b41 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/dimension.rs @@ -0,0 +1,247 @@ +use std::{collections::HashSet, fmt::Display}; + +use arrow_schema::{ArrowError, Field, Fields}; + +use crate::error::{GeoArrowError, GeoArrowResult}; + +/// The dimension of the geometry array. +/// +/// [Dimension] implements [TryFrom] for integers: +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Dimension { + /// Two-dimensional. + XY, + + /// Three-dimensional. + XYZ, + + /// XYM (2D with measure). + XYM, + + /// XYZM (3D with measure). + XYZM, +} + +impl Dimension { + pub(crate) fn from_interleaved_field(field: &Field) -> GeoArrowResult { + let dim = match field.name().as_str() { + "xy" => Dimension::XY, + "xyz" => Dimension::XYZ, + "xym" => Dimension::XYM, + "xyzm" => Dimension::XYZM, + _ => { + return Err(ArrowError::SchemaError(format!( + "Invalid interleaved field name: {}", + field.name() + )) + .into()); + } + }; + Ok(dim) + } + + pub(crate) fn from_separated_field(fields: &Fields) -> GeoArrowResult { + let dim = if fields.len() == 2 { + Self::XY + } else if fields.len() == 3 { + let field_names: HashSet<&str> = + HashSet::from_iter(fields.iter().map(|f| f.name().as_str())); + let xym_field_names = HashSet::<&str>::from_iter(["x", "y", "m"]); + let xyz_field_names = HashSet::<&str>::from_iter(["x", "y", "z"]); + + if field_names.eq(&xym_field_names) { + Self::XYM + } else if field_names.eq(&xyz_field_names) { + Self::XYZ + } else { + return Err(ArrowError::SchemaError(format!( + "Invalid field names for separated coordinates with 3 dimensions: {field_names:?}", + + )) + .into()); + } + } else if fields.len() == 4 { + Self::XYZM + } else { + return Err(ArrowError::SchemaError(format!( + "Invalid fields for separated coordinates: {fields:?}", + )) + .into()); + }; + Ok(dim) + } + + /// Returns the number of dimensions. + pub fn size(&self) -> usize { + match self { + Dimension::XY => 2, + Dimension::XYZ => 3, + Dimension::XYM => 3, + Dimension::XYZM => 4, + } + } +} + +impl From for geo_traits::Dimensions { + fn from(value: Dimension) -> Self { + match value { + Dimension::XY => geo_traits::Dimensions::Xy, + Dimension::XYZ => geo_traits::Dimensions::Xyz, + Dimension::XYM => geo_traits::Dimensions::Xym, + Dimension::XYZM => geo_traits::Dimensions::Xyzm, + } + } +} + +impl TryFrom for Dimension { + type Error = GeoArrowError; + + fn try_from(value: geo_traits::Dimensions) -> std::result::Result { + match value { + geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => Ok(Dimension::XY), + geo_traits::Dimensions::Xyz | geo_traits::Dimensions::Unknown(3) => Ok(Dimension::XYZ), + geo_traits::Dimensions::Xym => Ok(Dimension::XYM), + geo_traits::Dimensions::Xyzm | geo_traits::Dimensions::Unknown(4) => { + Ok(Dimension::XYZM) + } + _ => Err(GeoArrowError::InvalidGeoArrow(format!( + "Unsupported dimension {value:?}" + ))), + } + } +} + +impl Display for Dimension { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Dimension::XY => write!(f, "XY"), + Dimension::XYZ => write!(f, "XYZ"), + Dimension::XYM => write!(f, "XYM"), + Dimension::XYZM => write!(f, "XYZM"), + } + } +} + +// #[cfg(test)] +// mod test { +// use std::iter::zip; + +// use arrow_schema::DataType; + +// use super::*; + +// #[test] +// fn from_interleaved() { +// assert!(matches!( +// Dimension::from_interleaved_field(&Field::new("xy", DataType::Null, false)).unwrap(), +// Dimension::XY +// )); + +// assert!(matches!( +// Dimension::from_interleaved_field(&Field::new("xyz", DataType::Null, false)).unwrap(), +// Dimension::XYZ +// )); + +// assert!(matches!( +// Dimension::from_interleaved_field(&Field::new("xym", DataType::Null, false)).unwrap(), +// Dimension::XYM +// )); + +// assert!(matches!( +// Dimension::from_interleaved_field(&Field::new("xyzm", DataType::Null, false)).unwrap(), +// Dimension::XYZM +// )); +// } + +// #[test] +// fn from_bad_interleaved() { +// assert!( +// Dimension::from_interleaved_field(&Field::new("banana", DataType::Null, false)) +// .is_err() +// ); +// assert!( +// Dimension::from_interleaved_field(&Field::new("x", DataType::Null, false)).is_err() +// ); +// assert!( +// Dimension::from_interleaved_field(&Field::new("xyzmt", DataType::Null, false)).is_err() +// ); +// } + +// fn test_fields(dims: &[&str]) -> Fields { +// dims.iter() +// .map(|dim| Field::new(*dim, DataType::Null, false)) +// .collect() +// } + +// #[test] +// fn from_separated() { +// assert!(matches!( +// Dimension::from_separated_field(&test_fields(&["x", "y"])).unwrap(), +// Dimension::XY +// )); + +// assert!(matches!( +// Dimension::from_separated_field(&test_fields(&["x", "y", "z"])).unwrap(), +// Dimension::XYZ +// )); + +// assert!(matches!( +// Dimension::from_separated_field(&test_fields(&["x", "y", "m"])).unwrap(), +// Dimension::XYM +// )); + +// assert!(matches!( +// Dimension::from_separated_field(&test_fields(&["x", "y", "z", "m"])).unwrap(), +// Dimension::XYZM +// )); +// } + +// #[test] +// fn from_bad_separated() { +// assert!(Dimension::from_separated_field(&test_fields(&["x"])).is_err()); +// assert!(Dimension::from_separated_field(&test_fields(&["x", "y", "a"])).is_err()); +// assert!(Dimension::from_separated_field(&test_fields(&["x", "y", "z", "m", "t"])).is_err()); +// } + +// #[test] +// fn geotraits_dimensions() { +// let geoarrow_dims = [ +// Dimension::XY, +// Dimension::XYZ, +// Dimension::XYM, +// Dimension::XYZM, +// ]; +// let geotraits_dims = [ +// geo_traits::Dimensions::Xy, +// geo_traits::Dimensions::Xyz, +// geo_traits::Dimensions::Xym, +// geo_traits::Dimensions::Xyzm, +// ]; + +// for (geoarrow_dim, geotraits_dim) in zip(geoarrow_dims, geotraits_dims) { +// let into_geotraits_dim: geo_traits::Dimensions = geoarrow_dim.into(); +// assert_eq!(into_geotraits_dim, geotraits_dim); + +// let into_geoarrow_dim: Dimension = geotraits_dim.try_into().unwrap(); +// assert_eq!(into_geoarrow_dim, geoarrow_dim); + +// assert_eq!(geoarrow_dim.size(), geotraits_dim.size()); +// } + +// let dims2: Dimension = geo_traits::Dimensions::Unknown(2).try_into().unwrap(); +// assert_eq!(dims2, Dimension::XY); + +// let dims3: Dimension = geo_traits::Dimensions::Unknown(3).try_into().unwrap(); +// assert_eq!(dims3, Dimension::XYZ); + +// let dims4: Dimension = geo_traits::Dimensions::Unknown(4).try_into().unwrap(); +// assert_eq!(dims4, Dimension::XYZM); + +// let dims_err: Result = +// geo_traits::Dimensions::Unknown(0).try_into(); +// assert_eq!( +// dims_err.unwrap_err().to_string(), +// "Data not conforming to GeoArrow specification: Unsupported dimension Unknown(0)" +// ); +// } +// } diff --git a/src/geoarrow/geoarrow-schema/src/edges.rs b/src/geoarrow/geoarrow-schema/src/edges.rs new file mode 100644 index 0000000000..054e2a5479 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/edges.rs @@ -0,0 +1,74 @@ +use serde::{Deserialize, Serialize}; + +/// The edge interpretation between explicitly defined vertices. +/// +/// This does not affect format conversions (e.g., parsing `geoarrow.wkb` as +/// `geoarrow.linestring`), but does affect distance, intersection, bounding, overlay, length, and +/// area calculations. The `edges` key must be omitted to indicate planar/linear edges or be one +/// of: +/// +/// If the `edges` key is omitted, edges will be interpreted following the language of +/// [Simple features access](https://www.opengeospatial.org/standards/sfa): +/// +/// > **simple feature** feature with all geometric attributes described piecewise +/// > by straight line or planar interpolation between sets of points (Section 4.19). +/// +/// If an implementation only has support for a single edge interpretation (e.g., +/// a library with only planar edge support), an array with a different edge type +/// may be imported without losing information if the geometries in the array +/// do not contain edges (e.g., `geoarrow.point`, `geoarrow.multipoint`, a +/// `geoarrow.wkb`/`geoarrow.wkt` that only contains points and multipoints, or any +/// array that only contains empty geometries). For arrays that contain edges, +/// the error introduced by ignoring the original edge interpretation is similar to +/// the error introduced by applying a coordinate transformation to vertices (which +/// is usually small but may be large or create invalid geometries, particularly if +/// vertices are not closely spaced). Ignoring the original edge interpretation will +/// silently introduce invalid and/or misinterpreted geometries for any edge that crosses +/// the antimeridian (i.e., longitude 180/-180) when translating from non-planar +/// to planar edges. +/// +/// Implementations may implicitly import arrays with an unsupported edge type if the +/// arrays do not contain edges. Implementations may otherwise import arrays with an +/// unsupported edge type with an explicit opt-in from a user or if accompanied +/// by a prominent warning. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum Edges { + /// Edges in the longitude-latitude dimensions follow a path calculated by + /// the formula in Thomas, Paul D. Mathematical models for navigation systems. US Naval + /// Oceanographic Office, 1965 using the ellipsoid specified by the `"crs"`. + #[serde(rename = "andoyer")] + Andoyer, + + /// Edges in the longitude-latitude dimensions follow a path calculated by the formula in + /// [Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): + /// 43-55](https://link.springer.com/content/pdf/10.1007/s00190-012-0578-z.pdf) and + /// [GeographicLib](https://geographiclib.sourceforge.io/) using the ellipsoid specified by the + /// `"crs"`. GeographicLib available via modern versions of PROJ. + #[serde(rename = "karney")] + Karney, + + /// Edges in the longitude-latitude dimensions follow the + /// shortest distance between vertices approximated as the shortest distance + /// between the vertices on a perfect sphere. This edge interpretation is used by + /// [BigQuery Geography](https://cloud.google.com/bigquery/docs/geospatial-data#coordinate_systems_and_edges), + /// and [Snowflake Geography](https://docs.snowflake.com/en/sql-reference/data-types-geospatial). + /// + /// A common library for interpreting edges in this way is + /// [Google's s2geometry](https://github.com/google/s2geometry); a common formula + /// for calculating distances along this trajectory is the + /// [Haversine Formula](https://en.wikipedia.org/wiki/Haversine_formula). + #[serde(rename = "spherical")] + Spherical, + + /// Edges in the longitude-latitude dimensions follow a path calculated by + /// the formula in Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. + /// US Naval Oceanographic Office, 1970 using the ellipsoid specified by the `"crs"`. + #[serde(rename = "thomas")] + Thomas, + + /// Edges in the longitude-latitude dimensions follow a path calculated + /// using [Vincenty's formula](https://en.wikipedia.org/wiki/Vincenty%27s_formulae) and + /// the ellipsoid specified by the `"crs"`. + #[serde(rename = "vincenty")] + Vincenty, +} diff --git a/src/geoarrow/geoarrow-schema/src/error.rs b/src/geoarrow/geoarrow-schema/src/error.rs new file mode 100644 index 0000000000..1788716409 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/error.rs @@ -0,0 +1,71 @@ +//! Defines [`GeoArrowError`], representing all errors returned by this crate. + +use std::{error::Error, fmt::Debug}; + +use arrow_schema::ArrowError; +use thiserror::Error; + +/// Enum with all errors in this crate. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum GeoArrowError { + /// [ArrowError] + #[error(transparent)] + Arrow(#[from] ArrowError), + + /// CRS error + #[error("CRS related error: {0}")] + Crs(String), + + /// Wraps an external error. + #[error("External error: {0}")] + External(#[from] Box), + + /// FlatGeobuf error + #[error("FlatGeobuf error: {0}")] + FlatGeobuf(String), + + /// GeoParquet error + #[error("GeoParquet error: {0}")] + GeoParquet(String), + + /// [std::io::Error] + #[error(transparent)] + IOError(#[from] std::io::Error), + + /// Invalid data not conforming to GeoArrow specification + #[error("Data not conforming to GeoArrow specification: {0}")] + InvalidGeoArrow(String), + + /// Incorrect geometry type for operation + #[error("Incorrect geometry type for operation: {0}")] + IncorrectGeometryType(String), + + /// Whenever pushing to a container fails because it does not support more entries. + /// + /// The solution is usually to use a higher-capacity container-backing type. + #[error("Overflow: data does not fit in i32 offsets.")] + Overflow, + + /// WKB Error + #[error("WKB error: {0}")] + Wkb(String), + + /// WKT Error + #[error("WKT error: {0}")] + Wkt(String), +} + +/// Crate-specific result type. +pub type GeoArrowResult = std::result::Result; + +impl From for ArrowError { + /// Many APIs where we pass in a callback into the Arrow crate require the returned error type + /// to be ArrowError, so implementing this `From` makes the conversion less verbose there. + fn from(err: GeoArrowError) -> Self { + match err { + GeoArrowError::Arrow(err) => err, + _ => ArrowError::ExternalError(Box::new(err)), + } + } +} diff --git a/src/geoarrow/geoarrow-schema/src/lib.rs b/src/geoarrow/geoarrow-schema/src/lib.rs new file mode 100644 index 0000000000..c917967001 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/lib.rs @@ -0,0 +1,20 @@ +mod coord_type; +pub mod crs; +mod datatype; +mod dimension; +mod edges; +pub mod error; +mod metadata; +mod r#type; +pub mod type_id; + +pub use coord_type::CoordType; +pub use crs::{Crs, CrsType}; +pub use datatype::GeoArrowType; +pub use dimension::Dimension; +pub use edges::Edges; +pub use metadata::Metadata; +pub use r#type::{ + BoxType, GeometryCollectionType, GeometryType, LineStringType, MultiLineStringType, + MultiPointType, MultiPolygonType, PointType, PolygonType, RectType, WkbType, WktType, +}; diff --git a/src/geoarrow/geoarrow-schema/src/metadata.rs b/src/geoarrow/geoarrow-schema/src/metadata.rs new file mode 100644 index 0000000000..dfb6435466 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/metadata.rs @@ -0,0 +1,202 @@ +use arrow_schema::{ArrowError, Field}; +use serde::{Deserialize, Serialize}; + +use crate::{Edges, crs::Crs}; + +/// GeoArrow extension metadata. +/// +/// This follows the extension metadata [defined by the GeoArrow +/// specification](https://geoarrow.org/extension-types). +/// +/// This struct is contained within all GeoArrow geometry type definitions, such as +/// [`PointType`][crate::PointType], [`GeometryType`][crate::GeometryType], or +/// [`WkbType`][crate::WkbType]. +#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct Metadata { + // Raise the underlying crs fields to this level. + // https://serde.rs/attr-flatten.html + #[serde(flatten)] + crs: Crs, + + /// If present, instructs consumers that edges follow a spherical path rather than a planar + /// one. If this value is omitted, edges will be interpreted as planar. + #[serde(skip_serializing_if = "Option::is_none")] + edges: Option, +} + +impl Metadata { + /// Creates a new [`Metadata`] object. + pub fn new(crs: Crs, edges: Option) -> Self { + Self { crs, edges } + } + + /// Expose the underlying Coordinate Reference System information. + pub fn crs(&self) -> &Crs { + &self.crs + } + + /// Expose the underlying edge interpolation + pub fn edges(&self) -> Option { + self.edges + } + + /// Serialize this metadata to a string. + /// + /// If `None`, no extension metadata should be written. + pub(crate) fn serialize(&self) -> Option { + if self.crs.should_serialize() || self.edges.is_some() { + Some(serde_json::to_string(&self).unwrap()) + } else { + None + } + } + + /// Deserialize metadata from a string. + pub(crate) fn deserialize>(metadata: Option) -> Result { + if let Some(ext_meta) = metadata { + Ok(serde_json::from_str(ext_meta.as_ref()) + .map_err(|err| ArrowError::ExternalError(Box::new(err)))?) + } else { + Ok(Default::default()) + } + } +} + +impl TryFrom<&Field> for Metadata { + type Error = ArrowError; + + fn try_from(value: &Field) -> Result { + Self::deserialize(value.extension_type_metadata()) + } +} + +// #[cfg(test)] +// mod test { +// use std::{collections::HashMap, str::FromStr}; + +// use arrow_schema::DataType; +// use serde_json::{Value, json}; + +// use super::*; + +// const EPSG_4326_WKT: &str = r#"GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",MEMBER["World Geodetic System 1984 (Transit)"],MEMBER["World Geodetic System 1984 (G730)"],MEMBER["World Geodetic System 1984 (G873)"],MEMBER["World Geodetic System 1984 (G1150)"],MEMBER["World Geodetic System 1984 (G1674)"],MEMBER["World Geodetic System 1984 (G1762)"],MEMBER["World Geodetic System 1984 (G2139)"],ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["geodetic longitude (Lon)",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],USAGE[SCOPE["Horizontal component of 3D system."],AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"#; + +// const EPSG_4326_PROJJSON: &str = r#"{"$schema":"https://proj.org/schemas/v0.7/projjson.schema.json","type":"GeographicCRS","name":"WGS 84","datum_ensemble":{"name":"World Geodetic System 1984 ensemble","members":[{"name":"World Geodetic System 1984 (Transit)","id":{"authority":"EPSG","code":1166}},{"name":"World Geodetic System 1984 (G730)","id":{"authority":"EPSG","code":1152}},{"name":"World Geodetic System 1984 (G873)","id":{"authority":"EPSG","code":1153}},{"name":"World Geodetic System 1984 (G1150)","id":{"authority":"EPSG","code":1154}},{"name":"World Geodetic System 1984 (G1674)","id":{"authority":"EPSG","code":1155}},{"name":"World Geodetic System 1984 (G1762)","id":{"authority":"EPSG","code":1156}},{"name":"World Geodetic System 1984 (G2139)","id":{"authority":"EPSG","code":1309}}],"ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563},"accuracy":"2.0","id":{"authority":"EPSG","code":6326}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"scope":"Horizontal component of 3D system.","area":"World.","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"EPSG","code":4326}}"#; + +// #[test] +// fn test_crs_authority_code() { +// let crs = Crs::from_authority_code("EPSG:4326".to_string()); +// let metadata = Metadata::new(crs, Some(Edges::Spherical)); + +// let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code","edges":"spherical"}"#; +// let serialized = metadata.serialize(); +// assert_eq!(serialized.as_deref(), Some(expected)); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn test_crs_authority_code_no_edges() { +// let crs = Crs::from_authority_code("EPSG:4326".to_string()); +// let metadata = Metadata::new(crs, None); + +// let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code"}"#; + +// let serialized = metadata.serialize(); +// assert_eq!(serialized.as_deref(), Some(expected)); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn test_crs_wkt() { +// let crs = Crs::from_wkt2_2019(EPSG_4326_WKT.to_string()); +// let metadata = Metadata::new(crs, None); + +// let expected = r#"{"crs":"GEOGCRS[\"WGS 84\",ENSEMBLE[\"World Geodetic System 1984 ensemble\",MEMBER[\"World Geodetic System 1984 (Transit)\"],MEMBER[\"World Geodetic System 1984 (G730)\"],MEMBER[\"World Geodetic System 1984 (G873)\"],MEMBER[\"World Geodetic System 1984 (G1150)\"],MEMBER[\"World Geodetic System 1984 (G1674)\"],MEMBER[\"World Geodetic System 1984 (G1762)\"],MEMBER[\"World Geodetic System 1984 (G2139)\"],ELLIPSOID[\"WGS 84\",6378137,298.257223563,LENGTHUNIT[\"metre\",1]],ENSEMBLEACCURACY[2.0]],PRIMEM[\"Greenwich\",0,ANGLEUNIT[\"degree\",0.0174532925199433]],CS[ellipsoidal,2],AXIS[\"geodetic latitude (Lat)\",north,ORDER[1],ANGLEUNIT[\"degree\",0.0174532925199433]],AXIS[\"geodetic longitude (Lon)\",east,ORDER[2],ANGLEUNIT[\"degree\",0.0174532925199433]],USAGE[SCOPE[\"Horizontal component of 3D system.\"],AREA[\"World.\"],BBOX[-90,-180,90,180]],ID[\"EPSG\",4326]]","crs_type":"wkt2:2019"}"#; + +// let serialized = metadata.serialize(); +// assert_eq!(serialized.as_deref(), Some(expected)); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn test_projjson() { +// let crs = Crs::from_projjson(Value::from_str(EPSG_4326_PROJJSON).unwrap()); +// let metadata = Metadata::new(crs, None); + +// let expected = r#"{"crs":{"$schema":"https://proj.org/schemas/v0.7/projjson.schema.json","type":"GeographicCRS","name":"WGS 84","datum_ensemble":{"name":"World Geodetic System 1984 ensemble","members":[{"name":"World Geodetic System 1984 (Transit)","id":{"authority":"EPSG","code":1166}},{"name":"World Geodetic System 1984 (G730)","id":{"authority":"EPSG","code":1152}},{"name":"World Geodetic System 1984 (G873)","id":{"authority":"EPSG","code":1153}},{"name":"World Geodetic System 1984 (G1150)","id":{"authority":"EPSG","code":1154}},{"name":"World Geodetic System 1984 (G1674)","id":{"authority":"EPSG","code":1155}},{"name":"World Geodetic System 1984 (G1762)","id":{"authority":"EPSG","code":1156}},{"name":"World Geodetic System 1984 (G2139)","id":{"authority":"EPSG","code":1309}}],"ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563},"accuracy":"2.0","id":{"authority":"EPSG","code":6326}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"scope":"Horizontal component of 3D system.","area":"World.","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"EPSG","code":4326}},"crs_type":"projjson"}"#; + +// let serialized = metadata.serialize(); + +// // We use Value for equality checking because JSON string formatting is different +// assert_eq!( +// Value::from_str(serialized.as_deref().unwrap()).unwrap(), +// Value::from_str(expected).unwrap() +// ); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn test_unknown_crs() { +// let crs = Crs::from_unknown_crs_type("CRS".to_string()); +// let metadata = Metadata::new(crs, None); + +// let expected = r#"{"crs":"CRS"}"#; + +// let serialized = metadata.serialize(); +// assert_eq!(serialized.as_deref(), Some(expected)); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn test_empty_metadata() { +// let metadata = Metadata::default(); +// let serialized = metadata.serialize(); +// assert_eq!(serialized.as_deref(), None); + +// assert_eq!( +// metadata, +// Metadata::deserialize(serialized.as_deref()).unwrap() +// ); +// } + +// #[test] +// fn from_field() { +// let field = Field::new("", DataType::Null, false).with_metadata(HashMap::from([( +// "ARROW:extension:metadata".to_string(), +// r#"{"crs": {}, "crs_type": "projjson", "edges": "spherical"}"#.to_string(), +// )])); + +// let metadata = Metadata::try_from(&field).unwrap(); +// assert_eq!(metadata.crs(), &Crs::from_projjson(json!({}))); +// assert_eq!(metadata.edges(), Some(Edges::Spherical)); + +// let bad_field = Field::new("", DataType::Null, false).with_metadata(HashMap::from([( +// "ARROW:extension:metadata".to_string(), +// "not valid json".to_string(), +// )])); +// assert_eq!( +// Metadata::try_from(&bad_field).unwrap_err().to_string(), +// "External error: expected ident at line 1 column 2" +// ); +// } +// } diff --git a/src/geoarrow/geoarrow-schema/src/type.rs b/src/geoarrow/geoarrow-schema/src/type.rs new file mode 100644 index 0000000000..217aed1046 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/type.rs @@ -0,0 +1,1587 @@ +use std::{ + collections::HashSet, + sync::{Arc, LazyLock}, +}; + +use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode, extension::ExtensionType}; + +use crate::{CoordType, Dimension, error::GeoArrowError, metadata::Metadata}; + +macro_rules! define_basic_type { + ( + $(#[$($attrss:meta)*])* + $struct_name:ident + ) => { + $(#[$($attrss)*])* + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct $struct_name { + coord_type: CoordType, + dim: Dimension, + metadata: Arc, + } + + impl $struct_name { + /// Construct a new type from parts. + pub fn new(dim: Dimension, metadata: Arc) -> Self { + Self { + coord_type: Default::default(), + dim, + metadata, + } + } + + /// Change the underlying [`CoordType`] + pub fn with_coord_type(self, coord_type: CoordType) -> Self { + Self { coord_type, ..self } + } + + /// Change the underlying [`Dimension`] + pub fn with_dimension(self, dim: Dimension) -> Self { + Self { dim, ..self } + } + + /// Change the underlying [`Metadata`] + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { metadata, ..self } + } + + /// Retrieve the underlying [`CoordType`] + pub fn coord_type(&self) -> CoordType { + self.coord_type + } + + /// Retrieve the underlying [`Dimension`] + pub fn dimension(&self) -> Dimension { + self.dim + } + + /// Retrieve the underlying [`Metadata`] + pub fn metadata(&self) -> &Arc { + &self.metadata + } + + /// Convert this type to a [`Field`], retaining extension metadata. + pub fn to_field>(&self, name: N, nullable: bool) -> Field { + Field::new(name, self.data_type(), nullable).with_extension_type(self.clone()) + } + + /// Extract into components + pub fn into_inner(self) -> (CoordType, Dimension, Arc) { + (self.coord_type, self.dim, self.metadata) + } + } + }; +} + +define_basic_type!( + /// A GeoArrow Point type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#point). + PointType +); +define_basic_type!( + /// A GeoArrow LineString type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#linestring). + LineStringType +); +define_basic_type!( + /// A GeoArrow Polygon type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#polygon). + PolygonType +); +define_basic_type!( + /// A GeoArrow MultiPoint type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multipoint). + MultiPointType +); +define_basic_type!( + /// A GeoArrow MultiLineString type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multilinestring). + MultiLineStringType +); +define_basic_type!( + /// A GeoArrow MultiPolygon type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#multipolygon). + MultiPolygonType +); +define_basic_type!( + /// A GeoArrow GeometryCollection type. + /// + /// Refer to the [GeoArrow + /// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#geometrycollection). + GeometryCollectionType +); + +impl PointType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{CoordType, Dimension, PointType}; + /// + /// let geom_type = PointType::new(Dimension::XY, Default::default()).with_coord_type(CoordType::Interleaved); + /// let expected_type = + /// DataType::FixedSizeList(Field::new("xy", DataType::Float64, false).into(), 2); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + coord_type_to_data_type(self.coord_type, self.dim) + } +} + +impl ExtensionType for PointType { + const NAME: &'static str = "geoarrow.point"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_point(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_point(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_point(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::FixedSizeList(inner_field, list_size) => { + let dim_parsed_from_field = Dimension::from_interleaved_field(inner_field)?; + if dim_parsed_from_field.size() != *list_size as usize { + Err(GeoArrowError::InvalidGeoArrow(format!( + "Field metadata suggests list of size {}, but list size is {}", + dim_parsed_from_field.size(), + list_size + )) + .into()) + } else { + Ok((CoordType::Interleaved, dim_parsed_from_field)) + } + } + DataType::Struct(struct_fields) => Ok(( + CoordType::Separated, + Dimension::from_separated_field(struct_fields)?, + )), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } +} + +impl LineStringType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{Dimension, LineStringType}; + /// + /// let geom_type = LineStringType::new(Dimension::XY, Default::default()); + /// let expected_coord_type = DataType::Struct( + /// vec![ + /// Field::new("x", DataType::Float64, false), + /// Field::new("y", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// let expected_type = DataType::List(Field::new("vertices", expected_coord_type, false).into()); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let coords_type = coord_type_to_data_type(self.coord_type, self.dim); + let vertices_field = Field::new("vertices", coords_type, false).into(); + DataType::LargeList(vertices_field) + } +} + +impl ExtensionType for LineStringType { + const NAME: &'static str = "geoarrow.linestring"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_linestring(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_linestring(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_linestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::List(inner_field) | DataType::LargeList(inner_field) => { + parse_point(inner_field.data_type()) + } + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } +} + +impl PolygonType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{Dimension, PolygonType}; + /// + /// let geom_type = PolygonType::new(Dimension::XYZ, Default::default()); + /// + /// let expected_coord_type = DataType::Struct( + /// vec![ + /// Field::new("x", DataType::Float64, false), + /// Field::new("y", DataType::Float64, false), + /// Field::new("z", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// let vertices_field = Field::new("vertices", expected_coord_type, false); + /// let rings_field = Field::new_list("rings", vertices_field, false); + /// let expected_type = DataType::List(rings_field.into()); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let coords_type = coord_type_to_data_type(self.coord_type, self.dim); + let vertices_field = Field::new("vertices", coords_type, false); + let rings_field = Field::new_large_list("rings", vertices_field, false).into(); + DataType::LargeList(rings_field) + } +} + +impl ExtensionType for PolygonType { + const NAME: &'static str = "geoarrow.polygon"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_polygon(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_polygon(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_polygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::List(inner1) => match inner1.data_type() { + DataType::List(inner2) => parse_point(inner2.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner polygon data type: {dt}" + ))), + }, + DataType::LargeList(inner1) => match inner1.data_type() { + DataType::LargeList(inner2) => parse_point(inner2.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner polygon data type: {dt}" + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "Unexpected root data type parsing polygon {dt}" + ))), + } +} + +impl MultiPointType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{Dimension, MultiPointType}; + /// + /// let geom_type = MultiPointType::new(Dimension::XYZ, Default::default()); + /// + /// let expected_coord_type = DataType::Struct( + /// vec![ + /// Field::new("x", DataType::Float64, false), + /// Field::new("y", DataType::Float64, false), + /// Field::new("z", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// let vertices_field = Field::new("points", expected_coord_type, false); + /// let expected_type = DataType::List(vertices_field.into()); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let coords_type = coord_type_to_data_type(self.coord_type, self.dim); + let vertices_field = Field::new("points", coords_type, false).into(); + DataType::LargeList(vertices_field) + } +} + +impl ExtensionType for MultiPointType { + const NAME: &'static str = "geoarrow.multipoint"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_multipoint(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_multipoint(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_multipoint(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::List(inner_field) => parse_point(inner_field.data_type()), + DataType::LargeList(inner_field) => parse_point(inner_field.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } +} + +impl MultiLineStringType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{Dimension, MultiLineStringType}; + /// + /// let geom_type = + /// MultiLineStringType::new(Dimension::XYZ, Default::default()); + /// + /// let expected_coord_type = DataType::Struct( + /// vec![ + /// Field::new("x", DataType::Float64, false), + /// Field::new("y", DataType::Float64, false), + /// Field::new("z", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// let vertices_field = Field::new("vertices", expected_coord_type, false); + /// let linestrings_field = Field::new_list("linestrings", vertices_field, false); + /// let expected_type = DataType::List(linestrings_field.into()); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let coords_type = coord_type_to_data_type(self.coord_type, self.dim); + let vertices_field = Field::new("vertices", coords_type, false); + let linestrings_field = Field::new_large_list("linestrings", vertices_field, false).into(); + DataType::LargeList(linestrings_field) + } +} + +impl ExtensionType for MultiLineStringType { + const NAME: &'static str = "geoarrow.multilinestring"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_multilinestring(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_multilinestring(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_multilinestring(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::List(inner1) => match inner1.data_type() { + DataType::List(inner2) => parse_point(inner2.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner multilinestring data type: {dt}" + ))), + }, + DataType::LargeList(inner1) => match inner1.data_type() { + DataType::LargeList(inner2) => parse_point(inner2.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner multilinestring data type: {dt}" + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type parsing multilinestring: {dt}" + ))), + } +} + +impl MultiPolygonType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{Dimension, MultiPolygonType}; + /// + /// let geom_type = MultiPolygonType::new(Dimension::XYM, Default::default()); + /// + /// let expected_coord_type = DataType::Struct( + /// vec![ + /// Field::new("x", DataType::Float64, false), + /// Field::new("y", DataType::Float64, false), + /// Field::new("m", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// let vertices_field = Field::new("vertices", expected_coord_type, false); + /// let rings_field = Field::new_list("rings", vertices_field, false); + /// let polygons_field = Field::new_list("polygons", rings_field, false); + /// let expected_type = DataType::List(polygons_field.into()); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let coords_type = coord_type_to_data_type(self.coord_type, self.dim); + let vertices_field = Field::new("vertices", coords_type, false); + let rings_field = Field::new_large_list("rings", vertices_field, false); + let polygons_field = Field::new_large_list("polygons", rings_field, false).into(); + DataType::LargeList(polygons_field) + } +} + +impl ExtensionType for MultiPolygonType { + const NAME: &'static str = "geoarrow.multipolygon"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_multipolygon(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_multipolygon(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_multipolygon(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::List(inner1) => match inner1.data_type() { + DataType::List(inner2) => match inner2.data_type() { + DataType::List(inner3) => parse_point(inner3.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner2 multipolygon data type: {dt}" + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner1 multipolygon data type: {dt}" + ))), + }, + DataType::LargeList(inner1) => match inner1.data_type() { + DataType::LargeList(inner2) => match inner2.data_type() { + DataType::LargeList(inner3) => parse_point(inner3.data_type()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner2 multipolygon data type: {dt}" + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "Unexpected inner1 multipolygon data type: {dt}" + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } +} + +impl GeometryCollectionType { + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use std::sync::Arc; + /// + /// use arrow_schema::{DataType, Field, UnionFields, UnionMode}; + /// use geoarrow_schema::{ + /// Dimension, GeometryCollectionType, LineStringType, Metadata, MultiLineStringType, + /// MultiPointType, MultiPolygonType, PointType, PolygonType, + /// }; + /// + /// let dim = Dimension::XY; + /// let metadata = Arc::new(Metadata::default()); + /// let geom_type = GeometryCollectionType::new(dim, metadata.clone()); + /// + /// let fields = vec![ + /// Field::new( + /// "Point", + /// PointType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// Field::new( + /// "LineString", + /// LineStringType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// Field::new( + /// "Polygon", + /// PolygonType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// Field::new( + /// "MultiPoint", + /// MultiPointType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// Field::new( + /// "MultiLineString", + /// MultiLineStringType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// Field::new( + /// "MultiPolygon", + /// MultiPolygonType::new(dim, metadata.clone()).data_type(), + /// true, + /// ), + /// ]; + /// let type_ids = vec![1, 2, 3, 4, 5, 6]; + /// + /// let union_fields = UnionFields::new(type_ids, fields); + /// let union_data_type = DataType::Union(union_fields, UnionMode::Dense); + /// + /// let geometries_field = Field::new("geometries", union_data_type, false).into(); + /// let expected_type = DataType::List(geometries_field); + /// + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let geometries_field = Field::new( + "geometries", + mixed_data_type(self.coord_type, self.dim), + false, + ) + .into(); + DataType::LargeList(geometries_field) + } +} + +fn mixed_data_type(coord_type: CoordType, dim: Dimension) -> DataType { + let mut fields = vec![]; + let mut type_ids = vec![]; + + match dim { + Dimension::XY => type_ids.extend([1, 2, 3, 4, 5, 6]), + Dimension::XYZ => type_ids.extend([11, 12, 13, 14, 15, 16]), + Dimension::XYM => type_ids.extend([21, 22, 23, 24, 25, 26]), + Dimension::XYZM => type_ids.extend([31, 32, 33, 34, 35, 36]), + } + + // Note: we manually construct the fields because these fields shouldn't have their own + // GeoArrow extension metadata + macro_rules! push_field { + ($field_name:literal, $geom_type:ident) => {{ + fields.push(Field::new( + $field_name, + $geom_type { + coord_type, + dim, + metadata: Metadata::default().into(), + } + .data_type(), + true, + )); + }}; + } + + match dim { + Dimension::XY => { + push_field!("Point", PointType); + push_field!("LineString", LineStringType); + push_field!("Polygon", PolygonType); + push_field!("MultiPoint", MultiPointType); + push_field!("MultiLineString", MultiLineStringType); + push_field!("MultiPolygon", MultiPolygonType); + } + Dimension::XYZ => { + push_field!("Point Z", PointType); + push_field!("LineString Z", LineStringType); + push_field!("Polygon Z", PolygonType); + push_field!("MultiPoint Z", MultiPointType); + push_field!("MultiLineString Z", MultiLineStringType); + push_field!("MultiPolygon Z", MultiPolygonType); + } + Dimension::XYM => { + push_field!("Point M", PointType); + push_field!("LineString M", LineStringType); + push_field!("Polygon M", PolygonType); + push_field!("MultiPoint M", MultiPointType); + push_field!("MultiLineString M", MultiLineStringType); + push_field!("MultiPolygon M", MultiPolygonType); + } + Dimension::XYZM => { + push_field!("Point ZM", PointType); + push_field!("LineString ZM", LineStringType); + push_field!("Polygon ZM", PolygonType); + push_field!("MultiPoint ZM", MultiPointType); + push_field!("MultiLineString ZM", MultiLineStringType); + push_field!("MultiPolygon ZM", MultiPolygonType); + } + } + + let union_fields = UnionFields::try_new(type_ids, fields).unwrap(); + DataType::Union(union_fields, UnionMode::Dense) +} + +impl ExtensionType for GeometryCollectionType { + const NAME: &'static str = "geoarrow.geometrycollection"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let (coord_type, dim) = parse_geometry_collection(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let (coord_type, dim) = parse_geometry_collection(data_type)?; + Ok(Self { + coord_type, + dim, + metadata, + }) + } +} + +fn parse_mixed(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + match data_type { + DataType::Union(fields, _) => { + let mut coord_types: HashSet = HashSet::new(); + let mut dimensions: HashSet = HashSet::new(); + + // Validate that all fields of the union have the same coordinate type and dimension + fields.iter().try_for_each(|(type_id, field)| { + macro_rules! impl_type_id { + ($expected_dim:path, $parse_fn:ident) => {{ + let (ct, dim) = $parse_fn(field.data_type())?; + coord_types.insert(ct); + assert!(matches!(dim, $expected_dim)); + dimensions.insert(dim); + }}; + } + + match type_id { + 1 => impl_type_id!(Dimension::XY, parse_point), + 2 => impl_type_id!(Dimension::XY, parse_linestring), + 3 => impl_type_id!(Dimension::XY, parse_polygon), + 4 => impl_type_id!(Dimension::XY, parse_multipoint), + 5 => impl_type_id!(Dimension::XY, parse_multilinestring), + 6 => impl_type_id!(Dimension::XY, parse_multipolygon), + 11 => impl_type_id!(Dimension::XYZ, parse_point), + 12 => impl_type_id!(Dimension::XYZ, parse_linestring), + 13 => impl_type_id!(Dimension::XYZ, parse_polygon), + 14 => impl_type_id!(Dimension::XYZ, parse_multipoint), + 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring), + 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon), + 21 => impl_type_id!(Dimension::XYM, parse_point), + 22 => impl_type_id!(Dimension::XYM, parse_linestring), + 23 => impl_type_id!(Dimension::XYM, parse_polygon), + 24 => impl_type_id!(Dimension::XYM, parse_multipoint), + 25 => impl_type_id!(Dimension::XYM, parse_multilinestring), + 26 => impl_type_id!(Dimension::XYM, parse_multipolygon), + 31 => impl_type_id!(Dimension::XYZM, parse_point), + 32 => impl_type_id!(Dimension::XYZM, parse_linestring), + 33 => impl_type_id!(Dimension::XYZM, parse_polygon), + 34 => impl_type_id!(Dimension::XYZM, parse_multipoint), + 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring), + 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon), + id => { + return Err(ArrowError::SchemaError(format!( + "Unexpected type id parsing mixed: {id}" + ))); + } + }; + Ok::<_, ArrowError>(()) + })?; + + if coord_types.len() > 1 { + return Err(ArrowError::SchemaError( + "Multi coord types in union".to_string(), + )); + } + if dimensions.len() > 1 { + return Err(ArrowError::SchemaError( + "Multi dimensions types in union".to_string(), + )); + } + + let coord_type = coord_types.drain().next().unwrap(); + let dimension = dimensions.drain().next().unwrap(); + Ok((coord_type, dimension)) + } + dt => Err(ArrowError::SchemaError(format!( + "Unexpected mixed data type: {dt}" + ))), + } +} + +fn parse_geometry_collection(data_type: &DataType) -> Result<(CoordType, Dimension), ArrowError> { + // We need to parse the _inner_ type of the geometry collection as a union so that we can check + // what coordinate type it's using. + match data_type { + DataType::List(inner_field) | DataType::LargeList(inner_field) => { + parse_mixed(inner_field.data_type()) + } + dt => Err(ArrowError::SchemaError(format!( + "Unexpected geometry collection data type: {dt}" + ))), + } +} + +static INTERLEAVED_XY: LazyLock = LazyLock::new(|| { + let values_field = Field::new("xy", DataType::Float64, false); + DataType::FixedSizeList(Arc::new(values_field), 2) +}); + +static INTERLEAVED_XYZ: LazyLock = LazyLock::new(|| { + let values_field = Field::new("xyz", DataType::Float64, false); + DataType::FixedSizeList(Arc::new(values_field), 3) +}); + +static INTERLEAVED_XYM: LazyLock = LazyLock::new(|| { + let values_field = Field::new("xym", DataType::Float64, false); + DataType::FixedSizeList(Arc::new(values_field), 3) +}); + +static INTERLEAVED_XYZM: LazyLock = LazyLock::new(|| { + let values_field = Field::new("xyzm", DataType::Float64, false); + DataType::FixedSizeList(Arc::new(values_field), 4) +}); + +static SEPARATED_XY: LazyLock = LazyLock::new(|| { + DataType::Struct( + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ] + .into(), + ) +}); + +static SEPARATED_XYZ: LazyLock = LazyLock::new(|| { + DataType::Struct( + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("z", DataType::Float64, false), + ] + .into(), + ) +}); + +static SEPARATED_XYM: LazyLock = LazyLock::new(|| { + DataType::Struct( + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("m", DataType::Float64, false), + ] + .into(), + ) +}); + +static SEPARATED_XYZM: LazyLock = LazyLock::new(|| { + DataType::Struct( + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + Field::new("z", DataType::Float64, false), + Field::new("m", DataType::Float64, false), + ] + .into(), + ) +}); + +/// A GeoArrow Geometry type. +/// +/// Refer to the [GeoArrow +/// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#geometry). +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct GeometryType { + coord_type: CoordType, + metadata: Arc, +} + +impl GeometryType { + /// Construct a new type from parts. + pub fn new(metadata: Arc) -> Self { + Self { + coord_type: Default::default(), + metadata, + } + } + + /// Change the underlying [`CoordType`] + pub fn with_coord_type(self, coord_type: CoordType) -> Self { + Self { coord_type, ..self } + } + + /// Change the underlying [`Metadata`] + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { metadata, ..self } + } + + /// Retrieve the underlying [`CoordType`] + pub fn coord_type(&self) -> CoordType { + self.coord_type + } + + /// Retrieve the underlying [`Metadata`] + pub fn metadata(&self) -> &Arc { + &self.metadata + } + + /// Convert to the corresponding [`DataType`]. + pub fn data_type(&self) -> DataType { + let mut fields = vec![]; + let type_ids = vec![ + 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 31, 32, + 33, 34, 35, 36, 37, + ]; + + // Note: we manually construct the fields because these fields shouldn't have their own + // GeoArrow extension metadata + macro_rules! push_field { + ($field_name:literal, $geom_type:ident, $dim:path) => {{ + fields.push(Field::new( + $field_name, + $geom_type { + coord_type: self.coord_type, + dim: $dim, + metadata: Metadata::default().into(), + } + .data_type(), + true, + )); + }}; + } + + push_field!("Point", PointType, Dimension::XY); + push_field!("LineString", LineStringType, Dimension::XY); + push_field!("Polygon", PolygonType, Dimension::XY); + push_field!("MultiPoint", MultiPointType, Dimension::XY); + push_field!("MultiLineString", MultiLineStringType, Dimension::XY); + push_field!("MultiPolygon", MultiPolygonType, Dimension::XY); + push_field!("GeometryCollection", GeometryCollectionType, Dimension::XY); + + push_field!("Point Z", PointType, Dimension::XYZ); + push_field!("LineString Z", LineStringType, Dimension::XYZ); + push_field!("Polygon Z", PolygonType, Dimension::XYZ); + push_field!("MultiPoint Z", MultiPointType, Dimension::XYZ); + push_field!("MultiLineString Z", MultiLineStringType, Dimension::XYZ); + push_field!("MultiPolygon Z", MultiPolygonType, Dimension::XYZ); + push_field!( + "GeometryCollection Z", + GeometryCollectionType, + Dimension::XYZ + ); + + push_field!("Point M", PointType, Dimension::XYM); + push_field!("LineString M", LineStringType, Dimension::XYM); + push_field!("Polygon M", PolygonType, Dimension::XYM); + push_field!("MultiPoint M", MultiPointType, Dimension::XYM); + push_field!("MultiLineString M", MultiLineStringType, Dimension::XYM); + push_field!("MultiPolygon M", MultiPolygonType, Dimension::XYM); + push_field!( + "GeometryCollection M", + GeometryCollectionType, + Dimension::XYM + ); + + push_field!("Point ZM", PointType, Dimension::XYZM); + push_field!("LineString ZM", LineStringType, Dimension::XYZM); + push_field!("Polygon ZM", PolygonType, Dimension::XYZM); + push_field!("MultiPoint ZM", MultiPointType, Dimension::XYZM); + push_field!("MultiLineString ZM", MultiLineStringType, Dimension::XYZM); + push_field!("MultiPolygon ZM", MultiPolygonType, Dimension::XYZM); + push_field!( + "GeometryCollection ZM", + GeometryCollectionType, + Dimension::XYZM + ); + + let union_fields = UnionFields::try_new(type_ids, fields).unwrap(); + DataType::Union(union_fields, UnionMode::Dense) + } + + /// Convert this type to a [`Field`], retaining extension metadata. + pub fn to_field>(&self, name: N, nullable: bool) -> Field { + Field::new(name, self.data_type(), nullable).with_extension_type(self.clone()) + } +} + +impl ExtensionType for GeometryType { + const NAME: &'static str = "geoarrow.geometry"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let coord_type = parse_geometry(data_type)?; + if coord_type != self.coord_type { + return Err(ArrowError::SchemaError(format!( + "Expected coordinate type {:?}, but got {:?}", + self.coord_type, coord_type + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let coord_type = parse_geometry(data_type)?; + Ok(Self { + coord_type, + metadata, + }) + } +} + +fn parse_geometry(data_type: &DataType) -> Result { + if let DataType::Union(fields, _mode) = data_type { + let mut coord_types: HashSet = HashSet::new(); + + // Validate that all fields of the union have the same coordinate type + fields.iter().try_for_each(|(type_id, field)| { + macro_rules! impl_type_id { + ($expected_dim:path, $parse_fn:ident) => {{ + let (ct, dim) = $parse_fn(field.data_type())?; + coord_types.insert(ct); + assert!(matches!(dim, $expected_dim)); + }}; + } + + match type_id { + 1 => impl_type_id!(Dimension::XY, parse_point), + 2 => impl_type_id!(Dimension::XY, parse_linestring), + 3 => impl_type_id!(Dimension::XY, parse_polygon), + 4 => impl_type_id!(Dimension::XY, parse_multipoint), + 5 => impl_type_id!(Dimension::XY, parse_multilinestring), + 6 => impl_type_id!(Dimension::XY, parse_multipolygon), + 7 => impl_type_id!(Dimension::XY, parse_geometry_collection), + 11 => impl_type_id!(Dimension::XYZ, parse_point), + 12 => impl_type_id!(Dimension::XYZ, parse_linestring), + 13 => impl_type_id!(Dimension::XYZ, parse_polygon), + 14 => impl_type_id!(Dimension::XYZ, parse_multipoint), + 15 => impl_type_id!(Dimension::XYZ, parse_multilinestring), + 16 => impl_type_id!(Dimension::XYZ, parse_multipolygon), + 17 => impl_type_id!(Dimension::XYZ, parse_geometry_collection), + 21 => impl_type_id!(Dimension::XYM, parse_point), + 22 => impl_type_id!(Dimension::XYM, parse_linestring), + 23 => impl_type_id!(Dimension::XYM, parse_polygon), + 24 => impl_type_id!(Dimension::XYM, parse_multipoint), + 25 => impl_type_id!(Dimension::XYM, parse_multilinestring), + 26 => impl_type_id!(Dimension::XYM, parse_multipolygon), + 27 => impl_type_id!(Dimension::XYM, parse_geometry_collection), + 31 => impl_type_id!(Dimension::XYZM, parse_point), + 32 => impl_type_id!(Dimension::XYZM, parse_linestring), + 33 => impl_type_id!(Dimension::XYZM, parse_polygon), + 34 => impl_type_id!(Dimension::XYZM, parse_multipoint), + 35 => impl_type_id!(Dimension::XYZM, parse_multilinestring), + 36 => impl_type_id!(Dimension::XYZM, parse_multipolygon), + 37 => impl_type_id!(Dimension::XYZM, parse_geometry_collection), + id => { + return Err(ArrowError::SchemaError(format!( + "Unexpected type id parsing geometry: {id}" + ))); + } + }; + Ok::<_, ArrowError>(()) + })?; + + if coord_types.len() > 1 { + return Err(ArrowError::SchemaError( + "Multi coord types in union".to_string(), + )); + } + + let coord_type = coord_types.drain().next().unwrap(); + Ok(coord_type) + } else { + Err(ArrowError::SchemaError("Expected union type".to_string())) + } +} + +/// A GeoArrow "Box" or "Rect" type. +/// +/// Refer to the [GeoArrow +/// specification](https://github.com/geoarrow/geoarrow/blob/main/format.md#box). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct BoxType { + dim: Dimension, + metadata: Arc, +} + +impl BoxType { + /// Construct a new type from parts. + pub fn new(dim: Dimension, metadata: Arc) -> Self { + Self { dim, metadata } + } + + /// Change the underlying [`Dimension`] + pub fn with_dimension(self, dim: Dimension) -> Self { + Self { dim, ..self } + } + + /// Change the underlying [`Metadata`] + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { metadata, ..self } + } + + /// Retrieve the underlying [`CoordType`] + pub fn dimension(&self) -> Dimension { + self.dim + } + + /// Retrieve the underlying [`Metadata`] + pub fn metadata(&self) -> &Arc { + &self.metadata + } + + /// Convert to the corresponding [`DataType`]. + /// + /// ``` + /// use arrow_schema::{DataType, Field}; + /// use geoarrow_schema::{BoxType, Dimension}; + /// + /// let geom_type = BoxType::new(Dimension::XYZM, Default::default()); + /// + /// let expected_type = DataType::Struct( + /// vec![ + /// Field::new("xmin", DataType::Float64, false), + /// Field::new("ymin", DataType::Float64, false), + /// Field::new("zmin", DataType::Float64, false), + /// Field::new("mmin", DataType::Float64, false), + /// Field::new("xmax", DataType::Float64, false), + /// Field::new("ymax", DataType::Float64, false), + /// Field::new("zmax", DataType::Float64, false), + /// Field::new("mmax", DataType::Float64, false), + /// ] + /// .into(), + /// ); + /// assert_eq!(geom_type.data_type(), expected_type); + /// ``` + pub fn data_type(&self) -> DataType { + let values_fields = match self.dim { + Dimension::XY => { + vec![ + Field::new("xmin", DataType::Float64, false), + Field::new("ymin", DataType::Float64, false), + Field::new("xmax", DataType::Float64, false), + Field::new("ymax", DataType::Float64, false), + ] + } + Dimension::XYZ => { + vec![ + Field::new("xmin", DataType::Float64, false), + Field::new("ymin", DataType::Float64, false), + Field::new("zmin", DataType::Float64, false), + Field::new("xmax", DataType::Float64, false), + Field::new("ymax", DataType::Float64, false), + Field::new("zmax", DataType::Float64, false), + ] + } + Dimension::XYM => { + vec![ + Field::new("xmin", DataType::Float64, false), + Field::new("ymin", DataType::Float64, false), + Field::new("mmin", DataType::Float64, false), + Field::new("xmax", DataType::Float64, false), + Field::new("ymax", DataType::Float64, false), + Field::new("mmax", DataType::Float64, false), + ] + } + Dimension::XYZM => { + vec![ + Field::new("xmin", DataType::Float64, false), + Field::new("ymin", DataType::Float64, false), + Field::new("zmin", DataType::Float64, false), + Field::new("mmin", DataType::Float64, false), + Field::new("xmax", DataType::Float64, false), + Field::new("ymax", DataType::Float64, false), + Field::new("zmax", DataType::Float64, false), + Field::new("mmax", DataType::Float64, false), + ] + } + }; + DataType::Struct(values_fields.into()) + } + + /// Convert this type to a [`Field`], retaining extension metadata. + pub fn to_field>(&self, name: N, nullable: bool) -> Field { + Field::new(name, self.data_type(), nullable).with_extension_type(self.clone()) + } +} + +impl ExtensionType for BoxType { + const NAME: &'static str = "geoarrow.box"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + let dim = parse_box(data_type)?; + if dim != self.dim { + return Err(ArrowError::SchemaError(format!( + "Expected dimension {:?}, but got {:?}", + self.dim, dim + ))); + } + Ok(()) + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let dim = parse_box(data_type)?; + Ok(Self { dim, metadata }) + } +} + +fn parse_box(data_type: &DataType) -> Result { + match data_type { + DataType::Struct(struct_fields) => match struct_fields.len() { + 4 => Ok(Dimension::XY), + 6 => { + let names: HashSet<&str> = + struct_fields.iter().map(|f| f.name().as_str()).collect(); + if names.contains("mmin") && names.contains("mmax") { + Ok(Dimension::XYM) + } else if names.contains("zmin") && names.contains("zmax") { + Ok(Dimension::XYZ) + } else { + Err(ArrowError::SchemaError(format!( + "unexpected either mmin and mmax or zmin and zmax for struct with 6 fields. Got names: {names:?}", + ))) + } + } + 8 => Ok(Dimension::XYZM), + num_fields => Err(ArrowError::SchemaError(format!( + "unexpected number of struct fields: {num_fields}", + ))), + }, + dt => Err(ArrowError::SchemaError(format!( + "unexpected data type parsing box: {dt:?}", + ))), + } +} + +/// A type alias for [`BoxType`]. +/// +/// The official GeoArrow specification refers to this type as "geoarrow.box", but `Box` is a +/// reserved keyword in Rust and has its own meaning. In line with GeoRust, GeoArrow Rust calls +/// this type `Rect`. +pub type RectType = BoxType; + +/// A GeoArrow WKB type. +/// +/// This extension type support multiple physical data types, including [`DataType::Binary`], +/// [`DataType::LargeBinary`], and [`DataType::BinaryView`]. +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct WkbType { + metadata: Arc, +} + +impl WkbType { + /// Construct a new type from parts. + pub fn new(metadata: Arc) -> Self { + Self { metadata } + } + + /// Change the underlying [`Metadata`] + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { metadata } + } + + /// Retrieve the underlying [`Metadata`] + pub fn metadata(&self) -> &Arc { + &self.metadata + } +} + +impl ExtensionType for WkbType { + const NAME: &'static str = "geoarrow.wkb"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + match data_type { + DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Ok(()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let wkb = Self { metadata }; + wkb.supports_data_type(data_type)?; + Ok(wkb) + } +} + +/// A GeoArrow WKT type. +/// +/// This extension type support multiple physical data types, including [`DataType::Utf8`], +/// [`DataType::LargeUtf8`], and [`DataType::Utf8View`]. +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct WktType { + metadata: Arc, +} + +impl WktType { + /// Construct a new type from parts. + pub fn new(metadata: Arc) -> Self { + Self { metadata } + } + + /// Change the underlying [`Metadata`] + pub fn with_metadata(self, metadata: Arc) -> Self { + Self { metadata } + } + + /// Retrieve the underlying [`Metadata`] + pub fn metadata(&self) -> &Arc { + &self.metadata + } +} + +impl ExtensionType for WktType { + const NAME: &'static str = "geoarrow.wkt"; + + type Metadata = Arc; + + fn metadata(&self) -> &Self::Metadata { + self.metadata() + } + + fn serialize_metadata(&self) -> Option { + self.metadata.serialize() + } + + fn deserialize_metadata(metadata: Option<&str>) -> Result { + Ok(Arc::new(Metadata::deserialize(metadata)?)) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + match data_type { + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()), + dt => Err(ArrowError::SchemaError(format!( + "Unexpected data type {dt}" + ))), + } + } + + fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result { + let wkb = Self { metadata }; + wkb.supports_data_type(data_type)?; + Ok(wkb) + } +} + +fn coord_type_to_data_type(coord_type: CoordType, dim: Dimension) -> DataType { + match (coord_type, dim) { + (CoordType::Interleaved, Dimension::XY) => INTERLEAVED_XY.clone(), + + (CoordType::Interleaved, Dimension::XYZ) => INTERLEAVED_XYZ.clone(), + + (CoordType::Interleaved, Dimension::XYM) => INTERLEAVED_XYM.clone(), + (CoordType::Interleaved, Dimension::XYZM) => INTERLEAVED_XYZM.clone(), + (CoordType::Separated, Dimension::XY) => SEPARATED_XY.clone(), + (CoordType::Separated, Dimension::XYZ) => SEPARATED_XYZ.clone(), + (CoordType::Separated, Dimension::XYM) => SEPARATED_XYM.clone(), + (CoordType::Separated, Dimension::XYZM) => SEPARATED_XYZM.clone(), + } +} + +// #[cfg(test)] +// mod test { +// use std::sync::Arc; + +// use arrow_schema::{DataType, Field}; + +// use super::*; +// use crate::{crs::Crs, edges::Edges}; + +// #[test] +// fn test_point_interleaved_xy() { +// let data_type = +// DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2); +// let metadata = Arc::new(Metadata::default()); +// let type_ = PointType::try_new(&data_type, metadata).unwrap(); + +// assert_eq!(type_.coord_type, CoordType::Interleaved); +// assert_eq!(type_.dim, Dimension::XY); +// assert_eq!(type_.serialize_metadata(), None); +// } + +// #[test] +// fn test_point_separated_xyz() { +// let data_type = DataType::Struct( +// vec![ +// Field::new("x", DataType::Float64, false), +// Field::new("y", DataType::Float64, false), +// Field::new("z", DataType::Float64, false), +// ] +// .into(), +// ); +// let metadata = Arc::new(Metadata::default()); +// let type_ = PointType::try_new(&data_type, metadata).unwrap(); + +// assert_eq!(type_.coord_type, CoordType::Separated); +// assert_eq!(type_.dim, Dimension::XYZ); +// assert_eq!(type_.serialize_metadata(), None); +// } + +// #[test] +// fn test_point_metadata() { +// let data_type = +// DataType::FixedSizeList(Arc::new(Field::new("xy", DataType::Float64, false)), 2); +// let crs = Crs::from_authority_code("EPSG:4326".to_string()); +// let metadata = Arc::new(Metadata::new(crs, Some(Edges::Spherical))); +// let type_ = PointType::try_new(&data_type, metadata).unwrap(); + +// let expected = r#"{"crs":"EPSG:4326","crs_type":"authority_code","edges":"spherical"}"#; +// assert_eq!(type_.serialize_metadata().as_deref(), Some(expected)); +// } + +// #[test] +// fn geometry_data_type() { +// let typ = GeometryCollectionType::new(Dimension::XY, Default::default()); +// dbg!(typ.data_type()); +// } +// } diff --git a/src/geoarrow/geoarrow-schema/src/type_id.rs b/src/geoarrow/geoarrow-schema/src/type_id.rs new file mode 100644 index 0000000000..4be1380b41 --- /dev/null +++ b/src/geoarrow/geoarrow-schema/src/type_id.rs @@ -0,0 +1,91 @@ +//! Contains helpers for working with GeoArrow Type IDs. + +use crate::{ + Dimension, GeometryCollectionType, LineStringType, MultiLineStringType, MultiPointType, + MultiPolygonType, PointType, PolygonType, +}; + +/// Compute the Type ID for an array type-dimension combination. +/// +/// The GeoArrow specification defines a Type ID for each geometry type and dimension combination. +/// +pub trait GeometryTypeId { + /// The integer offset for this geometry type. + /// + /// This matches the 2D geometry type IDs defined in the GeoArrow specification. For example, + /// Point is 1, LineString is 2, etc. + const GEOMETRY_TYPE_OFFSET: i8; + + /// The dimension of this geometry type. + fn dimension(&self) -> Dimension; + + /// The Type ID for this geometry type and dimension. + fn geometry_type_id(&self) -> i8 { + (dimension_order(self.dimension()) * 10) + Self::GEOMETRY_TYPE_OFFSET + } +} + +fn dimension_order(dim: Dimension) -> i8 { + match dim { + Dimension::XY => 0, + Dimension::XYZ => 1, + Dimension::XYM => 2, + Dimension::XYZM => 3, + } +} + +impl GeometryTypeId for PointType { + const GEOMETRY_TYPE_OFFSET: i8 = 1; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for LineStringType { + const GEOMETRY_TYPE_OFFSET: i8 = 2; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for PolygonType { + const GEOMETRY_TYPE_OFFSET: i8 = 3; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for MultiPointType { + const GEOMETRY_TYPE_OFFSET: i8 = 4; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for MultiLineStringType { + const GEOMETRY_TYPE_OFFSET: i8 = 5; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for MultiPolygonType { + const GEOMETRY_TYPE_OFFSET: i8 = 6; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} + +impl GeometryTypeId for GeometryCollectionType { + const GEOMETRY_TYPE_OFFSET: i8 = 7; + + fn dimension(&self) -> Dimension { + self.dimension() + } +} diff --git a/tests/series/test_geospatial.py b/tests/series/test_geospatial.py new file mode 100644 index 0000000000..34a7372ffe --- /dev/null +++ b/tests/series/test_geospatial.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from daft import GeospatialMode +from daft.datatype import DataType, get_super_ext_type +from daft.series import Series + +DaftExtension = get_super_ext_type() + + +def test_point_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = ["POINT(1 2)", "POINT(3 4)", None] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.point(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_linestring_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = ["LINESTRING(0 0,1 1,2 2)", "LINESTRING(3 3,4 4)", None] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.linestring(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_polygon_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = [ + "POLYGON((0 0,1 0,1 1,0 1,0 0))", + "POLYGON((0 0,2 0,2 2,0 2,0 0),(0.5 0.5,1.5 0.5,1.5 1.5,0.5 1.5,0.5 0.5))", + None, + ] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.polygon(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_multipoint_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = ["MULTIPOINT((0 0),(1 1),(2 2))", "MULTIPOINT((3 3),(4 4))", None] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.multipoint(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_multilinestring_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = ["MULTILINESTRING((0 0,1 1),(2 2,3 3))", "MULTILINESTRING((4 4,5 5,6 6))", None] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.multilinestring(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_multipolygon_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = [ + "MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0)),((2 2,3 2,3 3,2 3,2 2)))", + "MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)))", + None, + ] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.multipolygon(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data + + +def test_geometry_collection_roundtrip(): + geo_mode = GeospatialMode.from_user_defined_mode("xy", "separated") + data = [ + "GEOMETRYCOLLECTION(POINT(1 1),LINESTRING(1 1,2 2),POINT(2 2))", + "GEOMETRYCOLLECTION(LINESTRING(1 1,2 2),POLYGON((1 1,2 2,3 3,1 1)))", + None, + ] + string_series = Series.from_pylist(data, dtype=DataType.string()) + wkt_array = string_series.cast(DataType.wkt(geo_mode)) + casted = wkt_array.cast(DataType.geometry_collection(geo_mode)) + wkt_roundtrip = casted.cast(DataType.wkt(geo_mode)) + new_string_series = wkt_roundtrip.cast(DataType.string()) + assert new_string_series.to_pylist() == data