Skip to content

Commit 5f35c3f

Browse files
uxairibrarnuest
authored andcommitted
Export data to wikidata
1 parent f3d7809 commit 5f35c3f

File tree

3 files changed

+214
-1
lines changed

3 files changed

+214
-1
lines changed

optimap/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@
222222
EMAIL_SEND_DELAY = 2
223223
DATA_DUMP_INTERVAL_HOURS = 6
224224
OPENALEX_MAILTO = "[email protected]"
225+
WIKIBASE_API_URL = env("WIKIBASE_API_URL")
226+
WIKIBASE_USERNAME = env("WIKIBASE_USERNAME")
227+
WIKIBASE_PASSWORD = env("WIKIBASE_PASSWORD")
228+
225229

226230
MIDDLEWARE = [
227231
'django.middleware.cache.UpdateCacheMiddleware',

publications/admin.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,24 @@
1717
from publications.tasks import regenerate_geopackage_cache
1818
from django.test import Client
1919
from django.http import HttpResponse
20+
from publications.wikidata import export_publications_to_wikidata
21+
22+
@admin.action(description="Create new Wikidata items for selected publications")
23+
def export_to_wikidata(modeladmin, request, queryset):
24+
created_count, updated_count, error_records = export_publications_to_wikidata(queryset)
25+
26+
# Success messages
27+
if created_count:
28+
messages.success(request, f"{created_count} new Wikidata item(s) created.")
29+
if updated_count:
30+
messages.success(request, f"{updated_count} existing Wikidata item(s) updated.")
31+
32+
# Warnings and errors
33+
for publication, error_message in error_records:
34+
if error_message == "no publicationDate":
35+
messages.warning(request, f"Skipping “{publication.title}”: no publication date")
36+
else:
37+
messages.error(request, f"Failed to export “{publication.title}”: {error_message}")
2038

2139
@admin.action(description="Mark selected publications as published")
2240
def make_public(modeladmin, request, queryset):
@@ -155,7 +173,7 @@ class PublicationAdmin(LeafletGeoAdmin, ImportExportModelAdmin):
155173
"openalex_ids", "openalex_open_access_status")
156174
readonly_fields = ("created_by", "updated_by", "openalex_link")
157175
actions = ["make_public", "make_draft", "regenerate_all_exports",
158-
"export_permalinks_csv", "email_permalinks_preview"]
176+
"export_permalinks_csv", "email_permalinks_preview", "export_to_wikidata"]
159177

160178
@admin.display(boolean=True, description="Has DOI")
161179
def has_permalink(self, obj):
@@ -261,3 +279,4 @@ class UserAdmin(admin.ModelAdmin):
261279
@admin.register(GlobalRegion)
262280
class GlobalRegionAdmin(admin.ModelAdmin):
263281
"""GlobalRegion Admin."""
282+

publications/wikidata.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import os
2+
import requests
3+
from datetime import datetime
4+
from django.conf import settings
5+
6+
from wikibaseintegrator.wbi_exceptions import ModificationFailed
7+
from wikibaseintegrator import WikibaseIntegrator
8+
from wikibaseintegrator.wbi_login import Login
9+
from wikibaseintegrator.datatypes import (
10+
MonolingualText,
11+
Time,
12+
String,
13+
ExternalID,
14+
GlobeCoordinate
15+
)
16+
try:
17+
from wikibaseintegrator.datatypes import Url
18+
except ImportError:
19+
from wikibaseintegrator.datatypes import URL as Url
20+
21+
# Our instance’s SPARQL endpoint (for local lookups by DOI)
22+
if "www.wikidata.org/w/api.php" in settings.WIKIBASE_API_URL:
23+
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
24+
else:
25+
SPARQL_ENDPOINT = settings.WIKIBASE_API_URL.replace("/w/api.php", "/query/sparql")
26+
27+
# constant for all dates
28+
CALENDAR_MODEL = "http://www.wikidata.org/entity/Q1985727"
29+
30+
# Wikidata property IDs mapping
31+
P_TITLE = "P1476" # title (monolingual text)
32+
P_ABSTRACT = "P1810" # abstract
33+
P_URL = "P856" # official website / URL
34+
P_PUBLICATION_DATE = "P577" # publication date
35+
P_PERIOD_START = "P580" # start time
36+
P_PERIOD_END = "P582" # end time
37+
P_DOI = "P356" # DOI as External ID
38+
P_AUTHOR_STRING = "P2093" # author name string
39+
P_JOURNAL_NAME = "P1448" # journal name (monolingual text)
40+
P_GEOMETRY = "P625" # coordinate location
41+
42+
def normalize_date_and_precision(date_str):
43+
parts = date_str.split("-")
44+
if len(parts) == 1 and parts[0].isdigit():
45+
# "YYYY"
46+
return f"{parts[0]}-01-01", 9
47+
if len(parts) == 2 and all(p.isdigit() for p in parts):
48+
# "YYYY-MM"
49+
return f"{parts[0]}-{parts[1]}-01", 10
50+
# assume full "YYYY-MM-DD"
51+
return date_str, 11
52+
53+
def add_time_claims(dates, prop_nr, statements):
54+
for ds in dates:
55+
iso, prec = normalize_date_and_precision(ds)
56+
timestamp = f"+{iso}T00:00:00Z"
57+
statements.append(Time(
58+
prop_nr=prop_nr,
59+
time=timestamp,
60+
timezone=0,
61+
before=0,
62+
after=0,
63+
precision=prec,
64+
calendarmodel=CALENDAR_MODEL
65+
))
66+
67+
68+
def find_local_item_by_doi(doi):
69+
"""
70+
Return the Q-ID of an existing item in our Wikibase instance for the given DOI,
71+
or None if no match is found.
72+
"""
73+
sparql_query = f'''
74+
SELECT ?item WHERE {{
75+
?item wdt:{P_DOI} "{doi}" .
76+
}} LIMIT 1
77+
'''
78+
response = requests.get(
79+
SPARQL_ENDPOINT,
80+
params={"query": sparql_query, "format": "json"},
81+
headers={"Accept": "application/json"}
82+
)
83+
response.raise_for_status()
84+
85+
data = response.json()
86+
bindings = data.get("results", {}).get("bindings", [])
87+
if not bindings:
88+
return None
89+
90+
item_uri = bindings[0]["item"]["value"]
91+
return item_uri.rsplit("/", 1)[-1]
92+
93+
def upsert_publication(publication, wikibase_integrator):
94+
"""
95+
Create or update a single Publication on Wikibase.
96+
Returns a tuple (action, qid):
97+
- action is "created", "updated", or "skipped"
98+
- qid is the Wikibase item ID (or None if skipped)
99+
"""
100+
# 1) Build statements
101+
iso_date = publication.publicationDate.isoformat()
102+
publication_timestamp = f"+{iso_date}T00:00:00Z"
103+
104+
statements = [
105+
MonolingualText(prop_nr=P_TITLE, text=publication.title, language="en"),
106+
Time(prop_nr=P_PUBLICATION_DATE, time=publication_timestamp, timezone=0, before=0, after=0, precision=11, calendarmodel=CALENDAR_MODEL),
107+
String(prop_nr=P_AUTHOR_STRING, value=(publication.created_by.username if publication.created_by else "Unknown author")),
108+
]
109+
110+
if publication.abstract:
111+
statements.append(String(prop_nr=P_ABSTRACT, value=publication.abstract))
112+
113+
if publication.url:
114+
statements.append(Url(prop_nr=P_URL, value=publication.url))
115+
116+
if publication.timeperiod_startdate:
117+
add_time_claims(publication.timeperiod_startdate, P_PERIOD_START, statements)
118+
119+
if publication.timeperiod_enddate:
120+
add_time_claims(publication.timeperiod_enddate, P_PERIOD_END, statements)
121+
122+
if publication.source:
123+
statements.append(MonolingualText(prop_nr=P_JOURNAL_NAME, text=publication.source, language="en"))
124+
125+
if publication.doi:
126+
statements.append( ExternalID(prop_nr=P_DOI, value=publication.doi))
127+
128+
if publication.geometry:
129+
geometries = getattr(publication.geometry, "geoms", [publication.geometry])
130+
for geom in geometries:
131+
if getattr(geom, "geom_type", None) != "Point":
132+
geom = geom.centroid
133+
statements.append(GlobeCoordinate(prop_nr=P_GEOMETRY, latitude=geom.y, longitude=geom.x, precision=0.0001))
134+
135+
# 7) Check for existing item by DOI
136+
existing_qid = find_local_item_by_doi(publication.doi) if publication.doi else None
137+
138+
if existing_qid:
139+
# Update existing item
140+
entity = wikibase_integrator.item.get(entity_id=existing_qid)
141+
entity.claims.add(statements)
142+
try:
143+
entity.write(summary="Update publication via OptimapBot")
144+
return "updated", existing_qid
145+
except ModificationFailed as e:
146+
if "already has label" in str(e):
147+
return "skipped", existing_qid
148+
raise
149+
else:
150+
# Create new item
151+
entity = wikibase_integrator.item.new()
152+
entity.labels.set("en", publication.title)
153+
entity.descriptions.set("en", "Publication imported from Optimap")
154+
entity.claims.add(statements)
155+
try:
156+
write_result = entity.write(summary="Create publication via OptimapBot")
157+
created_qid = write_result.get("entity", {}).get("id")
158+
return "created", created_qid
159+
except ModificationFailed as e:
160+
if "already has label" in str(e):
161+
return "skipped", None
162+
raise
163+
164+
def export_publications_to_wikidata(publications):
165+
login_session = Login(
166+
user=settings.WIKIBASE_USERNAME,
167+
password=settings.WIKIBASE_PASSWORD,
168+
mediawiki_api_url=settings.WIKIBASE_API_URL,
169+
)
170+
wikibase_client = WikibaseIntegrator(login=login_session)
171+
172+
created_count = 0
173+
updated_count = 0
174+
error_records = []
175+
176+
for publication in publications:
177+
if not publication.publicationDate:
178+
error_records.append((publication, "no publicationDate"))
179+
continue
180+
181+
try:
182+
action, entity_id = upsert_publication(publication, wikibase_client)
183+
if action == "created":
184+
created_count += 1
185+
elif action == "updated":
186+
updated_count += 1
187+
except Exception as err:
188+
error_records.append((publication, str(err)))
189+
190+
return created_count, updated_count, error_records

0 commit comments

Comments
 (0)