131131 ("Cable Route: Europe-Africa" , "LINESTRING (10 55, 5 45, 0 35, -5 25, 0 10, 5 0)" , "undersea cable from Europe through Atlantic to Africa" ),
132132]
133133
134+ # Complex polygon geometries (triangles, pentagons, concave shapes, holes)
135+ COMPLEX_POLYGONS = [
136+ ("Triangular Survey Area: Mediterranean" , "POLYGON ((10 35, 20 45, 5 42, 10 35))" , "triangular research zone in Mediterranean Sea" ),
137+ ("Pentagon Study Region: Central Europe" , "POLYGON ((10 48, 15 50, 17 47, 12 44, 8 46, 10 48))" , "five-sided ecological study area in Central Europe" ),
138+ ("Concave Polygon Zone: Southeast Asia" , "POLYGON ((100 5, 105 5, 105 10, 103 8, 101 10, 100 10, 100 5))" , "irregularly shaped coastal research area" ),
139+ ("Protected Area with Exclusion Zone: Amazon" , "POLYGON ((-65 -5, -60 -5, -60 0, -65 0, -65 -5), (-63 -3, -62 -3, -62 -2, -63 -2, -63 -3))" , "conservation area with restricted inner zone in Amazon rainforest" ),
140+ ("Star-shaped Survey: Arabian Peninsula" , "POLYGON ((50 22, 51 23, 52 22, 51 21, 52 20, 51 19, 50 20, 49 19, 48 20, 49 21, 48 22, 49 23, 50 22))" , "multi-pronged geological survey region" ),
141+ ]
142+
143+ # Mixed geometry collections - all permutations of point, line, polygon
144+ MIXED_GEOMETRIES = [
145+ # Point only (single)
146+ ("Point-only Study: Remote Island" , "GEOMETRYCOLLECTION (POINT (-10 75))" , "single monitoring station on remote Arctic island" ),
147+ # Line only (single)
148+ ("Line-only Survey: Shipping Route" , "GEOMETRYCOLLECTION (LINESTRING (-15 70, 5 72, 25 75))" , "linear shipping route survey in North Atlantic" ),
149+ # Polygon only (single)
150+ ("Polygon-only Region: Coastal Zone" , "GEOMETRYCOLLECTION (POLYGON ((0 80, 10 80, 10 85, 0 85, 0 80)))" , "coastal research zone in Arctic Ocean" ),
151+ # Point + Line
152+ ("Point-Line Study: River Monitoring" , "GEOMETRYCOLLECTION (POINT (0 5), LINESTRING (-5 0, 0 5, 5 10))" , "river monitoring with station and flow path" ),
153+ # Point + Polygon
154+ ("Point-Polygon Study: Harbor Analysis" , "GEOMETRYCOLLECTION (POINT (100 10), POLYGON ((98 8, 102 8, 102 12, 98 12, 98 8)))" , "harbor with central buoy and boundary zone" ),
155+ # Line + Polygon
156+ ("Line-Polygon Study: Coastal Transect" , "GEOMETRYCOLLECTION (LINESTRING (80 27, 85 29, 90 28), POLYGON ((82 26, 88 26, 88 30, 82 30, 82 26)))" , "coastal transect through study area" ),
157+ # Point + Line + Polygon (full combination)
158+ ("Multi-site Arctic Study" , "GEOMETRYCOLLECTION (POINT (-10 75), LINESTRING (-15 70, 5 72, 25 75), POLYGON ((0 80, 10 80, 10 85, 0 85, 0 80)))" , "integrated Arctic research with monitoring stations, survey transects, and study areas" ),
159+ # Multiple Points + Line
160+ ("Multi-Point-Line: Island Network" , "GEOMETRYCOLLECTION (POINT (160 -5), POINT (165 0), POINT (170 5), LINESTRING (158 -8, 172 8))" , "island monitoring network with connection route" ),
161+ # Multiple Points + Polygon
162+ ("Multi-Point-Polygon: Lake Study" , "GEOMETRYCOLLECTION (POINT (-75 20), POINT (-70 18), POLYGON ((-80 15, -60 15, -60 25, -80 25, -80 15)))" , "lake study with sampling stations and boundary" ),
163+ # Multiple Lines + Polygon
164+ ("Multi-Line-Polygon: Watershed Analysis" , "GEOMETRYCOLLECTION (LINESTRING (50 20, 52 22), LINESTRING (51 19, 52 21), POLYGON ((48 18, 54 18, 54 24, 48 24, 48 18)))" , "watershed with multiple streams and catchment area" ),
165+ ]
166+
167+ # Very small and very large geometries for edge case testing
168+ EXTREME_SCALE_GEOMETRIES = [
169+ ("Micro-site Study: Urban Park" , "POLYGON ((13.40500 52.52000, 13.40510 52.52000, 13.40510 52.52005, 13.40500 52.52005, 13.40500 52.52000))" , "very small urban ecology study (sub-meter precision)" ),
170+ ("Continental-scale Transect" , "LINESTRING (-120 25, -80 30, -40 35, 0 40, 40 45, 80 50, 120 55)" , "global east-west transect spanning multiple continents" ),
171+ ]
172+
173+ # MultiPoint and MultiLineString for additional complexity
174+ MULTI_GEOMETRY_TYPES = [
175+ ("Scattered Monitoring Network: Pacific Islands" , "MULTIPOINT ((160 -10), (165 -5), (170 0), (175 5), (180 10))" , "distributed ocean monitoring stations across Pacific" ),
176+ ("Multi-route Shipping Analysis" , "MULTILINESTRING ((140 30, 150 32, 160 33), (142 28, 152 29, 162 30), (138 32, 148 34, 158 35))" , "parallel shipping corridor analysis in Northwest Pacific" ),
177+ ("Fragmented Habitat Study: Indonesia" , "MULTIPOLYGON (((120 -5, 122 -5, 122 -3, 120 -3, 120 -5)), ((124 -4, 126 -4, 126 -2, 124 -2, 124 -4)), ((128 -6, 130 -6, 130 -4, 128 -4, 128 -6)))" , "island biogeography across separated land masses" ),
178+ ]
179+
134180def create_source (pk , name , issn_l = None , is_oa = True ):
135181 """Create a source object."""
136182 return {
@@ -402,6 +448,106 @@ def main():
402448 keyword_idx += 1
403449 topic_idx += 1
404450
451+ print ("\n === Creating complex polygon geometries ===" )
452+ for i , (title , geometry , description ) in enumerate (COMPLEX_POLYGONS ):
453+ pk = pk_counter
454+ pk_counter += 1
455+ source_pk_choice = 2000 + (i % len (sources ))
456+
457+ pub = create_publication (
458+ pk = pk ,
459+ source_pk = source_pk_choice ,
460+ title = title ,
461+ abstract = f"Complex polygon study focusing on { description } . This research examines irregular boundaries and geometric complexity in spatial analysis." ,
462+ geometry_wkt = geometry ,
463+ region_desc = description ,
464+ authors_idx = author_idx ,
465+ keywords_idx = keyword_idx ,
466+ topics_idx = topic_idx ,
467+ has_openalex = True ,
468+ )
469+ fixture_data .append (pub )
470+ print (f" [{ pk } ] { title } : { len (pub ['fields' ]['authors' ])} authors, { len (pub ['fields' ]['keywords' ])} keywords, { len (pub ['fields' ]['topics' ])} topics" )
471+
472+ author_idx += 1
473+ keyword_idx += 1
474+ topic_idx += 1
475+
476+ print ("\n === Creating mixed geometry collections ===" )
477+ for i , (title , geometry , description ) in enumerate (MIXED_GEOMETRIES ):
478+ pk = pk_counter
479+ pk_counter += 1
480+ source_pk_choice = 2000 + (i % len (sources ))
481+
482+ pub = create_publication (
483+ pk = pk ,
484+ source_pk = source_pk_choice ,
485+ title = title ,
486+ abstract = f"Multi-component spatial study integrating { description } . Combines point-based, linear, and areal data collection methods." ,
487+ geometry_wkt = geometry ,
488+ region_desc = description ,
489+ authors_idx = author_idx ,
490+ keywords_idx = keyword_idx ,
491+ topics_idx = topic_idx ,
492+ has_openalex = True ,
493+ )
494+ fixture_data .append (pub )
495+ print (f" [{ pk } ] { title } : { len (pub ['fields' ]['authors' ])} authors, { len (pub ['fields' ]['keywords' ])} keywords, { len (pub ['fields' ]['topics' ])} topics" )
496+
497+ author_idx += 1
498+ keyword_idx += 1
499+ topic_idx += 1
500+
501+ print ("\n === Creating extreme scale geometries ===" )
502+ for i , (title , geometry , description ) in enumerate (EXTREME_SCALE_GEOMETRIES ):
503+ pk = pk_counter
504+ pk_counter += 1
505+ source_pk_choice = 2000 + (i % len (sources ))
506+
507+ pub = create_publication (
508+ pk = pk ,
509+ source_pk = source_pk_choice ,
510+ title = title ,
511+ abstract = f"Scale-specific analysis examining { description } . Tests spatial processing at extreme precision or extent." ,
512+ geometry_wkt = geometry ,
513+ region_desc = description ,
514+ authors_idx = author_idx ,
515+ keywords_idx = keyword_idx ,
516+ topics_idx = topic_idx ,
517+ has_openalex = True ,
518+ )
519+ fixture_data .append (pub )
520+ print (f" [{ pk } ] { title } : { len (pub ['fields' ]['authors' ])} authors, { len (pub ['fields' ]['keywords' ])} keywords, { len (pub ['fields' ]['topics' ])} topics" )
521+
522+ author_idx += 1
523+ keyword_idx += 1
524+ topic_idx += 1
525+
526+ print ("\n === Creating multi-geometry types ===" )
527+ for i , (title , geometry , description ) in enumerate (MULTI_GEOMETRY_TYPES ):
528+ pk = pk_counter
529+ pk_counter += 1
530+ source_pk_choice = 2000 + (i % len (sources ))
531+
532+ pub = create_publication (
533+ pk = pk ,
534+ source_pk = source_pk_choice ,
535+ title = title ,
536+ abstract = f"Multi-feature spatial analysis documenting { description } . Studies distributed or parallel spatial phenomena." ,
537+ geometry_wkt = geometry ,
538+ region_desc = description ,
539+ authors_idx = author_idx ,
540+ keywords_idx = keyword_idx ,
541+ topics_idx = topic_idx ,
542+ has_openalex = True ,
543+ )
544+ fixture_data .append (pub )
545+ print (f" [{ pk } ] { title } : { len (pub ['fields' ]['authors' ])} authors, { len (pub ['fields' ]['keywords' ])} keywords, { len (pub ['fields' ]['topics' ])} topics" )
546+
547+ author_idx += 1
548+ keyword_idx += 1
549+ topic_idx += 1
550+
405551 # Create backup of original
406552 import os
407553 import shutil
@@ -428,12 +574,18 @@ def main():
428574
429575 print ("\n === Summary ===" )
430576 print (f"Total publications: { len (publications )} " )
577+ print (f"\n Basic geometry types:" )
431578 print (f" - Continents (polygons): { len (CONTINENTS )} " )
432579 print (f" - Oceans (polygons): { len (OCEANS )} " )
433580 print (f" - Two-region overlaps (polygons): { len (TWO_REGION_OVERLAPS )} " )
434581 print (f" - Multi-region spans (polygons): { len (MULTI_REGION_SPANS )} " )
435582 print (f" - Region points (points): { len (REGION_POINTS )} " )
436583 print (f" - Cross-region lines (linestrings): { len (CROSS_REGION_LINES )} " )
584+ print (f"\n Complex geometry types:" )
585+ print (f" - Complex polygons (triangles, pentagons, concave, holes): { len (COMPLEX_POLYGONS )} " )
586+ print (f" - Mixed geometries (point+line+polygon): { len (MIXED_GEOMETRIES )} " )
587+ print (f" - Extreme scale geometries: { len (EXTREME_SCALE_GEOMETRIES )} " )
588+ print (f" - Multi-geometry types (multipoint, multiline, multipoly): { len (MULTI_GEOMETRY_TYPES )} " )
437589 print (f"\n Metadata coverage:" )
438590 print (f" - With authors: { with_authors } /{ len (publications )} " )
439591 print (f" - With keywords: { with_keywords } /{ len (publications )} " )
0 commit comments