Skip to content

Commit 0d221df

Browse files
authored
Merge pull request #158 from GeoinformationSystems/features/export_wikidata
2 parents f3d7809 + 6b1e930 commit 0d221df

16 files changed

+4840
-468
lines changed

.claude/settings.local.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,16 @@
1616
"Bash(python manage.py:*)",
1717
"Bash(python -m py_compile:*)",
1818
"Bash(python:*)",
19-
"Bash(node --check:*)"
19+
"Bash(node --check:*)",
20+
"Bash(find:*)",
21+
"Bash(OPTIMAP_LOGGING_LEVEL=WARNING python manage.py test:*)",
22+
"Bash(export OPTIMAP_LOGGING_LEVEL=WARNING)",
23+
"Bash(awk:*)",
24+
"Bash(chmod:*)",
25+
"Bash(bash:*)",
26+
"Bash(./create_wikibase_property.sh:*)",
27+
"Bash(python3:*)",
28+
"Bash(pkill:*)"
2029
],
2130
"deny": [],
2231
"ask": []

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,11 +284,12 @@ python -m smtpd -c DebuggingServer -n localhost:5587
284284
OPTIMAP_EMAIL_HOST=localhost
285285
OPTIMAP_EMAIL_PORT=5587
286286
```
287-
### Accessing list of article links
288287

289-
Visit the URL - http://127.0.0.1:8000/articles/links/
288+
### Accessing list of works
290289

291-
### Harvest Publications from Real Journals
290+
Visit the URL - <http://127.0.0.1:8000/works/>
291+
292+
### Harvest Publications from real journals
292293

293294
The `harvest_journals` management command allows you to harvest publications from real OAI-PMH journal sources directly into your database. This is useful for:
294295

fixtures/create_global_feeds_fixture.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,52 @@
131131
("Cable Route: Europe-Africa", "LINESTRING (10 55, 5 45, 0 35, -5 25, 0 10, 5 0)", "undersea cable from Europe through Atlantic to Africa"),
132132
]
133133

134+
# Complex polygon geometries (triangles, pentagons, concave shapes, holes)
135+
COMPLEX_POLYGONS = [
136+
("Triangular Survey Area: Mediterranean", "POLYGON ((10 35, 20 45, 5 42, 10 35))", "triangular research zone in Mediterranean Sea"),
137+
("Pentagon Study Region: Central Europe", "POLYGON ((10 48, 15 50, 17 47, 12 44, 8 46, 10 48))", "five-sided ecological study area in Central Europe"),
138+
("Concave Polygon Zone: Southeast Asia", "POLYGON ((100 5, 105 5, 105 10, 103 8, 101 10, 100 10, 100 5))", "irregularly shaped coastal research area"),
139+
("Protected Area with Exclusion Zone: Amazon", "POLYGON ((-65 -5, -60 -5, -60 0, -65 0, -65 -5), (-63 -3, -62 -3, -62 -2, -63 -2, -63 -3))", "conservation area with restricted inner zone in Amazon rainforest"),
140+
("Star-shaped Survey: Arabian Peninsula", "POLYGON ((50 22, 51 23, 52 22, 51 21, 52 20, 51 19, 50 20, 49 19, 48 20, 49 21, 48 22, 49 23, 50 22))", "multi-pronged geological survey region"),
141+
]
142+
143+
# Mixed geometry collections - all permutations of point, line, polygon
144+
MIXED_GEOMETRIES = [
145+
# Point only (single)
146+
("Point-only Study: Remote Island", "GEOMETRYCOLLECTION (POINT (-10 75))", "single monitoring station on remote Arctic island"),
147+
# Line only (single)
148+
("Line-only Survey: Shipping Route", "GEOMETRYCOLLECTION (LINESTRING (-15 70, 5 72, 25 75))", "linear shipping route survey in North Atlantic"),
149+
# Polygon only (single)
150+
("Polygon-only Region: Coastal Zone", "GEOMETRYCOLLECTION (POLYGON ((0 80, 10 80, 10 85, 0 85, 0 80)))", "coastal research zone in Arctic Ocean"),
151+
# Point + Line
152+
("Point-Line Study: River Monitoring", "GEOMETRYCOLLECTION (POINT (0 5), LINESTRING (-5 0, 0 5, 5 10))", "river monitoring with station and flow path"),
153+
# Point + Polygon
154+
("Point-Polygon Study: Harbor Analysis", "GEOMETRYCOLLECTION (POINT (100 10), POLYGON ((98 8, 102 8, 102 12, 98 12, 98 8)))", "harbor with central buoy and boundary zone"),
155+
# Line + Polygon
156+
("Line-Polygon Study: Coastal Transect", "GEOMETRYCOLLECTION (LINESTRING (80 27, 85 29, 90 28), POLYGON ((82 26, 88 26, 88 30, 82 30, 82 26)))", "coastal transect through study area"),
157+
# Point + Line + Polygon (full combination)
158+
("Multi-site Arctic Study", "GEOMETRYCOLLECTION (POINT (-10 75), LINESTRING (-15 70, 5 72, 25 75), POLYGON ((0 80, 10 80, 10 85, 0 85, 0 80)))", "integrated Arctic research with monitoring stations, survey transects, and study areas"),
159+
# Multiple Points + Line
160+
("Multi-Point-Line: Island Network", "GEOMETRYCOLLECTION (POINT (160 -5), POINT (165 0), POINT (170 5), LINESTRING (158 -8, 172 8))", "island monitoring network with connection route"),
161+
# Multiple Points + Polygon
162+
("Multi-Point-Polygon: Lake Study", "GEOMETRYCOLLECTION (POINT (-75 20), POINT (-70 18), POLYGON ((-80 15, -60 15, -60 25, -80 25, -80 15)))", "lake study with sampling stations and boundary"),
163+
# Multiple Lines + Polygon
164+
("Multi-Line-Polygon: Watershed Analysis", "GEOMETRYCOLLECTION (LINESTRING (50 20, 52 22), LINESTRING (51 19, 52 21), POLYGON ((48 18, 54 18, 54 24, 48 24, 48 18)))", "watershed with multiple streams and catchment area"),
165+
]
166+
167+
# Very small and very large geometries for edge case testing
168+
EXTREME_SCALE_GEOMETRIES = [
169+
("Micro-site Study: Urban Park", "POLYGON ((13.40500 52.52000, 13.40510 52.52000, 13.40510 52.52005, 13.40500 52.52005, 13.40500 52.52000))", "very small urban ecology study (sub-meter precision)"),
170+
("Continental-scale Transect", "LINESTRING (-120 25, -80 30, -40 35, 0 40, 40 45, 80 50, 120 55)", "global east-west transect spanning multiple continents"),
171+
]
172+
173+
# MultiPoint and MultiLineString for additional complexity
174+
MULTI_GEOMETRY_TYPES = [
175+
("Scattered Monitoring Network: Pacific Islands", "MULTIPOINT ((160 -10), (165 -5), (170 0), (175 5), (180 10))", "distributed ocean monitoring stations across Pacific"),
176+
("Multi-route Shipping Analysis", "MULTILINESTRING ((140 30, 150 32, 160 33), (142 28, 152 29, 162 30), (138 32, 148 34, 158 35))", "parallel shipping corridor analysis in Northwest Pacific"),
177+
("Fragmented Habitat Study: Indonesia", "MULTIPOLYGON (((120 -5, 122 -5, 122 -3, 120 -3, 120 -5)), ((124 -4, 126 -4, 126 -2, 124 -2, 124 -4)), ((128 -6, 130 -6, 130 -4, 128 -4, 128 -6)))", "island biogeography across separated land masses"),
178+
]
179+
134180
def create_source(pk, name, issn_l=None, is_oa=True):
135181
"""Create a source object."""
136182
return {
@@ -402,6 +448,106 @@ def main():
402448
keyword_idx += 1
403449
topic_idx += 1
404450

451+
print("\n=== Creating complex polygon geometries ===")
452+
for i, (title, geometry, description) in enumerate(COMPLEX_POLYGONS):
453+
pk = pk_counter
454+
pk_counter += 1
455+
source_pk_choice = 2000 + (i % len(sources))
456+
457+
pub = create_publication(
458+
pk=pk,
459+
source_pk=source_pk_choice,
460+
title=title,
461+
abstract=f"Complex polygon study focusing on {description}. This research examines irregular boundaries and geometric complexity in spatial analysis.",
462+
geometry_wkt=geometry,
463+
region_desc=description,
464+
authors_idx=author_idx,
465+
keywords_idx=keyword_idx,
466+
topics_idx=topic_idx,
467+
has_openalex=True,
468+
)
469+
fixture_data.append(pub)
470+
print(f" [{pk}] {title}: {len(pub['fields']['authors'])} authors, {len(pub['fields']['keywords'])} keywords, {len(pub['fields']['topics'])} topics")
471+
472+
author_idx += 1
473+
keyword_idx += 1
474+
topic_idx += 1
475+
476+
print("\n=== Creating mixed geometry collections ===")
477+
for i, (title, geometry, description) in enumerate(MIXED_GEOMETRIES):
478+
pk = pk_counter
479+
pk_counter += 1
480+
source_pk_choice = 2000 + (i % len(sources))
481+
482+
pub = create_publication(
483+
pk=pk,
484+
source_pk=source_pk_choice,
485+
title=title,
486+
abstract=f"Multi-component spatial study integrating {description}. Combines point-based, linear, and areal data collection methods.",
487+
geometry_wkt=geometry,
488+
region_desc=description,
489+
authors_idx=author_idx,
490+
keywords_idx=keyword_idx,
491+
topics_idx=topic_idx,
492+
has_openalex=True,
493+
)
494+
fixture_data.append(pub)
495+
print(f" [{pk}] {title}: {len(pub['fields']['authors'])} authors, {len(pub['fields']['keywords'])} keywords, {len(pub['fields']['topics'])} topics")
496+
497+
author_idx += 1
498+
keyword_idx += 1
499+
topic_idx += 1
500+
501+
print("\n=== Creating extreme scale geometries ===")
502+
for i, (title, geometry, description) in enumerate(EXTREME_SCALE_GEOMETRIES):
503+
pk = pk_counter
504+
pk_counter += 1
505+
source_pk_choice = 2000 + (i % len(sources))
506+
507+
pub = create_publication(
508+
pk=pk,
509+
source_pk=source_pk_choice,
510+
title=title,
511+
abstract=f"Scale-specific analysis examining {description}. Tests spatial processing at extreme precision or extent.",
512+
geometry_wkt=geometry,
513+
region_desc=description,
514+
authors_idx=author_idx,
515+
keywords_idx=keyword_idx,
516+
topics_idx=topic_idx,
517+
has_openalex=True,
518+
)
519+
fixture_data.append(pub)
520+
print(f" [{pk}] {title}: {len(pub['fields']['authors'])} authors, {len(pub['fields']['keywords'])} keywords, {len(pub['fields']['topics'])} topics")
521+
522+
author_idx += 1
523+
keyword_idx += 1
524+
topic_idx += 1
525+
526+
print("\n=== Creating multi-geometry types ===")
527+
for i, (title, geometry, description) in enumerate(MULTI_GEOMETRY_TYPES):
528+
pk = pk_counter
529+
pk_counter += 1
530+
source_pk_choice = 2000 + (i % len(sources))
531+
532+
pub = create_publication(
533+
pk=pk,
534+
source_pk=source_pk_choice,
535+
title=title,
536+
abstract=f"Multi-feature spatial analysis documenting {description}. Studies distributed or parallel spatial phenomena.",
537+
geometry_wkt=geometry,
538+
region_desc=description,
539+
authors_idx=author_idx,
540+
keywords_idx=keyword_idx,
541+
topics_idx=topic_idx,
542+
has_openalex=True,
543+
)
544+
fixture_data.append(pub)
545+
print(f" [{pk}] {title}: {len(pub['fields']['authors'])} authors, {len(pub['fields']['keywords'])} keywords, {len(pub['fields']['topics'])} topics")
546+
547+
author_idx += 1
548+
keyword_idx += 1
549+
topic_idx += 1
550+
405551
# Create backup of original
406552
import os
407553
import shutil
@@ -428,12 +574,18 @@ def main():
428574

429575
print("\n=== Summary ===")
430576
print(f"Total publications: {len(publications)}")
577+
print(f"\nBasic geometry types:")
431578
print(f" - Continents (polygons): {len(CONTINENTS)}")
432579
print(f" - Oceans (polygons): {len(OCEANS)}")
433580
print(f" - Two-region overlaps (polygons): {len(TWO_REGION_OVERLAPS)}")
434581
print(f" - Multi-region spans (polygons): {len(MULTI_REGION_SPANS)}")
435582
print(f" - Region points (points): {len(REGION_POINTS)}")
436583
print(f" - Cross-region lines (linestrings): {len(CROSS_REGION_LINES)}")
584+
print(f"\nComplex geometry types:")
585+
print(f" - Complex polygons (triangles, pentagons, concave, holes): {len(COMPLEX_POLYGONS)}")
586+
print(f" - Mixed geometries (point+line+polygon): {len(MIXED_GEOMETRIES)}")
587+
print(f" - Extreme scale geometries: {len(EXTREME_SCALE_GEOMETRIES)}")
588+
print(f" - Multi-geometry types (multipoint, multiline, multipoly): {len(MULTI_GEOMETRY_TYPES)}")
437589
print(f"\nMetadata coverage:")
438590
print(f" - With authors: {with_authors}/{len(publications)}")
439591
print(f" - With keywords: {with_keywords}/{len(publications)}")

0 commit comments

Comments
 (0)