1414// KIND, either express or implied. See the License for the
1515// specific language governing permissions and limitations
1616// under the License.
17- use std:: { collections :: HashSet , str:: FromStr } ;
17+ use std:: str:: FromStr ;
1818
1919use datafusion_common:: { stats:: Precision , ColumnStatistics , DataFusionError , Result , ScalarValue } ;
2020use sedona_geometry:: interval:: { Interval , IntervalTrait } ;
21- use sedona_geometry:: { bounding_box:: BoundingBox , types:: GeometryTypeAndDimensions } ;
21+ use sedona_geometry:: {
22+ bounding_box:: BoundingBox ,
23+ types:: { GeometryTypeAndDimensions , GeometryTypeAndDimensionsSet } ,
24+ } ;
2225use serde:: { Deserialize , Serialize } ;
2326
2427/// Statistics specific to spatial data types
@@ -33,7 +36,7 @@ use serde::{Deserialize, Serialize};
3336pub struct GeoStatistics {
3437 // Core spatial statistics for pruning
3538 bbox : Option < BoundingBox > , // The overall bounding box (min/max coordinates) containing all geometries
36- geometry_types : Option < HashSet < GeometryTypeAndDimensions > > , // Set of all geometry types and dimensions present
39+ geometry_types : Option < GeometryTypeAndDimensionsSet > , // Set of all geometry types and dimensions present
3740
3841 // Extended statistics for analysis
3942 total_geometries : Option < i64 > , // Total count of all geometries
@@ -73,16 +76,16 @@ impl GeoStatistics {
7376 pub fn empty ( ) -> Self {
7477 Self {
7578 bbox : Some ( BoundingBox :: xy ( Interval :: empty ( ) , Interval :: empty ( ) ) ) ,
76- geometry_types : Some ( HashSet :: new ( ) ) , // Empty set of geometry types
77- total_geometries : Some ( 0 ) , // Zero geometries
78- total_size_bytes : Some ( 0 ) , // Zero bytes
79- total_points : Some ( 0 ) , // Zero points
80- puntal_count : Some ( 0 ) , // Zero point geometries
81- lineal_count : Some ( 0 ) , // Zero line geometries
82- polygonal_count : Some ( 0 ) , // Zero polygon geometries
83- collection_count : Some ( 0 ) , // Zero collection geometries
84- total_envelope_width : Some ( 0.0 ) , // Zero width
85- total_envelope_height : Some ( 0.0 ) , // Zero height
79+ geometry_types : Some ( GeometryTypeAndDimensionsSet :: new ( ) ) , // Empty set of geometry types
80+ total_geometries : Some ( 0 ) , // Zero geometries
81+ total_size_bytes : Some ( 0 ) , // Zero bytes
82+ total_points : Some ( 0 ) , // Zero points
83+ puntal_count : Some ( 0 ) , // Zero point geometries
84+ lineal_count : Some ( 0 ) , // Zero line geometries
85+ polygonal_count : Some ( 0 ) , // Zero polygon geometries
86+ collection_count : Some ( 0 ) , // Zero collection geometries
87+ total_envelope_width : Some ( 0.0 ) , // Zero width
88+ total_envelope_height : Some ( 0.0 ) , // Zero height
8689 }
8790 }
8891
@@ -92,20 +95,10 @@ impl GeoStatistics {
9295 }
9396
9497 /// Update the geometry types and return self
95- pub fn with_geometry_types ( self , types : Option < & [ GeometryTypeAndDimensions ] > ) -> Self {
96- match types {
97- Some ( type_slice) => {
98- let type_set: HashSet < GeometryTypeAndDimensions > =
99- type_slice. iter ( ) . cloned ( ) . collect ( ) ;
100- Self {
101- geometry_types : Some ( type_set) ,
102- ..self
103- }
104- }
105- None => Self {
106- geometry_types : None ,
107- ..self
108- } ,
98+ pub fn with_geometry_types ( self , types : Option < GeometryTypeAndDimensionsSet > ) -> Self {
99+ Self {
100+ geometry_types : types,
101+ ..self
109102 }
110103 }
111104
@@ -115,7 +108,7 @@ impl GeoStatistics {
115108 }
116109
117110 /// Get the geometry types if available
118- pub fn geometry_types ( & self ) -> Option < & HashSet < GeometryTypeAndDimensions > > {
111+ pub fn geometry_types ( & self ) -> Option < & GeometryTypeAndDimensionsSet > {
119112 self . geometry_types . as_ref ( )
120113 }
121114
@@ -290,9 +283,7 @@ impl GeoStatistics {
290283 if let Some ( other_types) = & other. geometry_types {
291284 match & mut self . geometry_types {
292285 Some ( types) => {
293- let mut new_types = types. clone ( ) ;
294- new_types. extend ( other_types. iter ( ) . cloned ( ) ) ;
295- self . geometry_types = Some ( new_types) ;
286+ types. merge ( other_types) ;
296287 }
297288 None => self . geometry_types = Some ( other_types. clone ( ) ) ,
298289 }
@@ -374,13 +365,12 @@ impl GeoStatistics {
374365 pub fn try_with_str_geometry_types ( self , geometry_types : Option < & [ & str ] > ) -> Result < Self > {
375366 match geometry_types {
376367 Some ( strings) => {
377- let new_geometry_types = strings
378- . iter ( )
379- . map ( |string| {
380- GeometryTypeAndDimensions :: from_str ( string)
381- . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) )
382- } )
383- . collect :: < Result < HashSet < GeometryTypeAndDimensions > > > ( ) ?;
368+ let mut new_geometry_types = GeometryTypeAndDimensionsSet :: new ( ) ;
369+ for string in strings {
370+ let type_and_dim = GeometryTypeAndDimensions :: from_str ( string)
371+ . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) ) ?;
372+ new_geometry_types. insert_or_ignore ( & type_and_dim) ;
373+ }
384374
385375 Ok ( Self {
386376 geometry_types : Some ( new_geometry_types) ,
@@ -442,7 +432,10 @@ mod test {
442432 // Test with_bbox
443433 let stats = GeoStatistics :: empty ( ) . with_bbox ( Some ( bbox. clone ( ) ) ) ;
444434 assert_eq ! ( stats. bbox( ) , Some ( & bbox) ) ;
445- assert_eq ! ( stats. geometry_types( ) , Some ( HashSet :: new( ) ) . as_ref( ) ) ;
435+ assert_eq ! (
436+ stats. geometry_types( ) ,
437+ Some ( & GeometryTypeAndDimensionsSet :: new( ) )
438+ ) ;
446439
447440 let regular_stats = stats. to_column_statistics ( ) . unwrap ( ) ;
448441 assert_eq ! (
@@ -459,15 +452,17 @@ mod test {
459452
460453 #[ test]
461454 fn specified_geometry_types ( ) {
462- let type_array = [ GeometryTypeAndDimensions :: new (
463- GeometryTypeId :: Polygon ,
464- Dimensions :: Xy ,
465- ) ] ;
455+ let mut types = GeometryTypeAndDimensionsSet :: new ( ) ;
456+ types
457+ . insert ( & GeometryTypeAndDimensions :: new (
458+ GeometryTypeId :: Polygon ,
459+ Dimensions :: Xy ,
460+ ) )
461+ . unwrap ( ) ;
466462
467463 // Test with_geometry_types
468- let stats = GeoStatistics :: empty ( ) . with_geometry_types ( Some ( & type_array) ) ;
469- let expected_set: HashSet < GeometryTypeAndDimensions > = type_array. iter ( ) . cloned ( ) . collect ( ) ;
470- assert_eq ! ( stats. geometry_types( ) , Some ( & expected_set) ) ;
464+ let stats = GeoStatistics :: empty ( ) . with_geometry_types ( Some ( types. clone ( ) ) ) ;
465+ assert_eq ! ( stats. geometry_types( ) , Some ( & types) ) ;
471466 assert_eq ! (
472467 stats. bbox( ) ,
473468 Some ( & BoundingBox :: xy( Interval :: empty( ) , Interval :: empty( ) ) )
@@ -493,15 +488,19 @@ mod test {
493488 . try_with_str_geometry_types ( Some ( & [ "polygon" , "point" ] ) )
494489 . unwrap ( ) ;
495490
496- let mut expected_types = HashSet :: new ( ) ;
497- expected_types. insert ( GeometryTypeAndDimensions :: new (
498- GeometryTypeId :: Polygon ,
499- Dimensions :: Xy ,
500- ) ) ;
501- expected_types. insert ( GeometryTypeAndDimensions :: new (
502- GeometryTypeId :: Point ,
503- Dimensions :: Xy ,
504- ) ) ;
491+ let mut expected_types = GeometryTypeAndDimensionsSet :: new ( ) ;
492+ expected_types
493+ . insert ( & GeometryTypeAndDimensions :: new (
494+ GeometryTypeId :: Polygon ,
495+ Dimensions :: Xy ,
496+ ) )
497+ . unwrap ( ) ;
498+ expected_types
499+ . insert ( & GeometryTypeAndDimensions :: new (
500+ GeometryTypeId :: Point ,
501+ Dimensions :: Xy ,
502+ ) )
503+ . unwrap ( ) ;
505504
506505 assert_eq ! ( stats. geometry_types( ) , Some ( & expected_types) ) ;
507506 assert_eq ! (
0 commit comments