Skip to content

Commit 3970620

Browse files
committed
Adds fixes for Wikibase export for publications
Implements functionality to export publication metadata to a Wikibase instance, including Wikidata
1 parent 5f35c3f commit 3970620

File tree

12 files changed

+2422
-148
lines changed

12 files changed

+2422
-148
lines changed

.claude/settings.local.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,16 @@
1616
"Bash(python manage.py:*)",
1717
"Bash(python -m py_compile:*)",
1818
"Bash(python:*)",
19-
"Bash(node --check:*)"
19+
"Bash(node --check:*)",
20+
"Bash(find:*)",
21+
"Bash(OPTIMAP_LOGGING_LEVEL=WARNING python manage.py test:*)",
22+
"Bash(export OPTIMAP_LOGGING_LEVEL=WARNING)",
23+
"Bash(awk:*)",
24+
"Bash(chmod:*)",
25+
"Bash(bash:*)",
26+
"Bash(./create_wikibase_property.sh:*)",
27+
"Bash(python3:*)",
28+
"Bash(pkill:*)"
2029
],
2130
"deny": [],
2231
"ask": []

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,11 +284,12 @@ python -m smtpd -c DebuggingServer -n localhost:5587
284284
OPTIMAP_EMAIL_HOST=localhost
285285
OPTIMAP_EMAIL_PORT=5587
286286
```
287-
### Accessing list of article links
288287

289-
Visit the URL - http://127.0.0.1:8000/articles/links/
288+
### Accessing list of works
290289

291-
### Harvest Publications from Real Journals
290+
Visit the URL - <http://127.0.0.1:8000/works/>
291+
292+
### Harvest Publications from real journals
292293

293294
The `harvest_journals` management command allows you to harvest publications from real OAI-PMH journal sources directly into your database. This is useful for:
294295

optimap/.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,10 @@ OPTIMAP_LOGGING_LEVEL=INFO
2727
DJANGO_LOGGING_LEVEL=ERROR
2828

2929
OPTIMAP_DATA_DUMP_RETENTION=3
30+
31+
WIKIBASE_CONSUMER_TOKEN=your_consumer_token
32+
WIKIBASE_CONSUMER_SECRET=your_consumer_secret
33+
WIKIBASE_ACCESS_TOKEN=your_access_token
34+
WIKIBASE_ACCESS_SECRET=your_access_secret
35+
WIKIBASE_API_URL=https://your-instance.wikibase.cloud/w/api.php
36+

optimap/settings.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,27 @@
217217
EMAIL_IMAP_SENT_FOLDER = env('OPTIMAP_EMAIL_IMAP_SENT_FOLDER', default='')
218218
OPTIMAP_EMAIL_SEND_DELAY = env("OPTIMAP_EMAIL_SEND_DELAY", default=2)
219219
BASE_URL = env("BASE_URL", default="http://127.0.0.1:8000")
220-
OAI_USERNAME = env("OPTIMAP_OAI_USERNAME", default="")
221-
OAI_PASSWORD = env("OPTIMAP_OAI_PASSWORD", default="")
222220
EMAIL_SEND_DELAY = 2
223221
DATA_DUMP_INTERVAL_HOURS = 6
224-
OPENALEX_MAILTO = "[email protected]"
225-
WIKIBASE_API_URL = env("WIKIBASE_API_URL")
226-
WIKIBASE_USERNAME = env("WIKIBASE_USERNAME")
227-
WIKIBASE_PASSWORD = env("WIKIBASE_PASSWORD")
222+
223+
# Contact email for API user agents (OpenAlex, Wikidata, etc.)
224+
CONTACT_EMAIL = "[email protected]"
225+
226+
# Wikibase/Wikidata configuration
227+
WIKIBASE_API_URL = env("WIKIBASE_API_URL", default="")
228+
229+
# OAuth 1.0a authentication (required)
230+
WIKIBASE_CONSUMER_TOKEN = env("WIKIBASE_CONSUMER_TOKEN", default="")
231+
WIKIBASE_CONSUMER_SECRET = env("WIKIBASE_CONSUMER_SECRET", default="")
232+
WIKIBASE_ACCESS_TOKEN = env("WIKIBASE_ACCESS_TOKEN", default="")
233+
WIKIBASE_ACCESS_SECRET = env("WIKIBASE_ACCESS_SECRET", default="")
234+
235+
# Property auto-creation setting
236+
WIKIBASE_CREATE_PROPERTIES_IF_MISSING = env("WIKIBASE_CREATE_PROPERTIES_IF_MISSING", default=True, cast=bool)
237+
238+
# Import optimap version for user agent
239+
import optimap
240+
WIKIBASE_USER_AGENT = f"OPTIMAP/{optimap.__version__} (https://optimap.science; {CONTACT_EMAIL})"
228241

229242

230243
MIDDLEWARE = [

publications/admin.py

Lines changed: 127 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from leaflet.admin import LeafletGeoAdmin
99
from publications.models import Publication, Source, HarvestingEvent, BlockedEmail, BlockedDomain, GlobalRegion
1010
from import_export.admin import ImportExportModelAdmin
11-
from publications.models import EmailLog, Subscription, UserProfile
11+
from publications.models import EmailLog, Subscription, UserProfile, WikidataExportLog
1212
from publications.tasks import harvest_oai_endpoint, schedule_subscription_email_task, send_monthly_email, schedule_monthly_email_task
1313
from django_q.models import Schedule
1414
from django.utils.timezone import now
@@ -17,24 +17,45 @@
1717
from publications.tasks import regenerate_geopackage_cache
1818
from django.test import Client
1919
from django.http import HttpResponse
20-
from publications.wikidata import export_publications_to_wikidata
20+
from publications.wikidata import export_publications_to_wikidata, export_publications_to_wikidata_dryrun
2121

22-
@admin.action(description="Create new Wikidata items for selected publications")
22+
@admin.action(description="Export selected publications to Wikidata/Wikibase")
2323
def export_to_wikidata(modeladmin, request, queryset):
24-
created_count, updated_count, error_records = export_publications_to_wikidata(queryset)
24+
stats = export_publications_to_wikidata(queryset)
2525

2626
# Success messages
27-
if created_count:
28-
messages.success(request, f"{created_count} new Wikidata item(s) created.")
29-
if updated_count:
30-
messages.success(request, f"{updated_count} existing Wikidata item(s) updated.")
31-
32-
# Warnings and errors
33-
for publication, error_message in error_records:
34-
if error_message == "no publicationDate":
35-
messages.warning(request, f"Skipping “{publication.title}”: no publication date")
36-
else:
37-
messages.error(request, f"Failed to export “{publication.title}”: {error_message}")
27+
if stats['created']:
28+
messages.success(request, f"{stats['created']} new Wikidata item(s) created.")
29+
if stats['updated']:
30+
messages.success(request, f"{stats['updated']} existing Wikidata item(s) updated.")
31+
if stats['skipped']:
32+
messages.info(request, f"{stats['skipped']} publication(s) skipped (already exist or duplicate labels).")
33+
34+
# Error messages
35+
if stats['errors']:
36+
messages.error(request, f"{stats['errors']} publication(s) failed to export. Check the Wikidata export logs for details.")
37+
38+
# Summary message
39+
messages.info(request, f"Total: {stats['total']} publication(s) processed.")
40+
41+
@admin.action(description="[DRY-RUN] Export selected publications to Wikidata/Wikibase")
42+
def export_to_wikidata_dryrun(modeladmin, request, queryset):
43+
stats = export_publications_to_wikidata_dryrun(queryset)
44+
45+
# Dry-run summary messages
46+
messages.info(request, f"[DRY-RUN] Export simulation complete:")
47+
48+
if stats['created']:
49+
messages.info(request, f" • Would create {stats['created']} new Wikidata item(s)")
50+
if stats['updated']:
51+
messages.info(request, f" • Would update {stats['updated']} existing Wikidata item(s)")
52+
if stats['skipped']:
53+
messages.info(request, f" • Would skip {stats['skipped']} publication(s)")
54+
if stats['errors']:
55+
messages.warning(request, f" • {stats['errors']} publication(s) have validation errors")
56+
57+
# Summary message
58+
messages.success(request, f"[DRY-RUN] Total: {stats['total']} publication(s) analyzed. No changes were written to Wikibase.")
3859

3960
@admin.action(description="Mark selected publications as published")
4061
def make_public(modeladmin, request, queryset):
@@ -172,8 +193,9 @@ class PublicationAdmin(LeafletGeoAdmin, ImportExportModelAdmin):
172193
"openalex_fulltext_origin", "openalex_is_retracted",
173194
"openalex_ids", "openalex_open_access_status")
174195
readonly_fields = ("created_by", "updated_by", "openalex_link")
175-
actions = ["make_public", "make_draft", "regenerate_all_exports",
176-
"export_permalinks_csv", "email_permalinks_preview", "export_to_wikidata"]
196+
actions = [make_public, make_draft, regenerate_all_exports,
197+
"export_permalinks_csv", "email_permalinks_preview",
198+
export_to_wikidata, export_to_wikidata_dryrun]
177199

178200
@admin.display(boolean=True, description="Has DOI")
179201
def has_permalink(self, obj):
@@ -243,12 +265,95 @@ class EmailLogAdmin(admin.ModelAdmin):
243265
"sent_at",
244266
"sent_by",
245267
"trigger_source",
246-
"status",
247-
"error_message",
268+
"status",
269+
"error_message",
248270
)
249-
list_filter = ("status", "trigger_source", "sent_at")
250-
search_fields = ("recipient_email", "subject", "sent_by__username")
251-
actions = [trigger_monthly_email, trigger_monthly_email_task]
271+
list_filter = ("status", "trigger_source", "sent_at")
272+
search_fields = ("recipient_email", "subject", "sent_by__username")
273+
actions = [trigger_monthly_email, trigger_monthly_email_task]
274+
275+
@admin.register(WikidataExportLog)
276+
class WikidataExportLogAdmin(admin.ModelAdmin):
277+
"""Admin interface for Wikidata export logs."""
278+
list_display = (
279+
"id",
280+
"publication_title",
281+
"action",
282+
"wikidata_link",
283+
"export_date",
284+
"fields_count",
285+
)
286+
list_filter = ("action", "export_date")
287+
search_fields = (
288+
"publication__title",
289+
"publication__doi",
290+
"wikidata_qid",
291+
"export_summary",
292+
)
293+
readonly_fields = (
294+
"publication",
295+
"export_date",
296+
"action",
297+
"wikidata_qid",
298+
"wikidata_url",
299+
"wikidata_link_display",
300+
"wikibase_endpoint",
301+
"exported_fields",
302+
"error_message_display",
303+
"export_summary",
304+
)
305+
fields = (
306+
"publication",
307+
"export_date",
308+
"action",
309+
"wikibase_endpoint",
310+
"wikidata_qid",
311+
"wikidata_link_display",
312+
"export_summary",
313+
"exported_fields",
314+
"error_message_display",
315+
)
316+
ordering = ("-export_date",)
317+
date_hierarchy = "export_date"
318+
319+
@admin.display(description="Publication")
320+
def publication_title(self, obj):
321+
return obj.publication.title[:60] if obj.publication else "—"
322+
323+
@admin.display(description="Wikidata")
324+
def wikidata_link(self, obj):
325+
if obj.wikidata_qid and obj.wikidata_url:
326+
return format_html(
327+
'<a href="{}" target="_blank" rel="noopener"><i class="fas fa-external-link-alt"></i> {}</a>',
328+
obj.wikidata_url,
329+
obj.wikidata_qid
330+
)
331+
return "—"
332+
333+
@admin.display(description="Wikidata Link")
334+
def wikidata_link_display(self, obj):
335+
if obj.wikidata_qid and obj.wikidata_url:
336+
return format_html(
337+
'<a href="{}" target="_blank" rel="noopener">{}</a>',
338+
obj.wikidata_url,
339+
obj.wikidata_url
340+
)
341+
return "—"
342+
343+
@admin.display(description="Fields")
344+
def fields_count(self, obj):
345+
if obj.exported_fields:
346+
return len(obj.exported_fields)
347+
return 0
348+
349+
@admin.display(description="Error Message (Full Traceback)")
350+
def error_message_display(self, obj):
351+
if obj.error_message:
352+
return format_html(
353+
'<pre style="white-space: pre-wrap; font-family: monospace; font-size: 12px; background: #f5f5f5; padding: 10px; border: 1px solid #ddd; border-radius: 4px; max-height: 400px; overflow-y: auto;">{}</pre>',
354+
obj.error_message
355+
)
356+
return "—"
252357

253358
@admin.register(Subscription)
254359
class SubscriptionAdmin(admin.ModelAdmin):
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Generated migration for WikidataExportLog model
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('publications', '0002_add_regions_to_subscription'),
11+
]
12+
13+
operations = [
14+
migrations.CreateModel(
15+
name='WikidataExportLog',
16+
fields=[
17+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18+
('publication', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='wikidata_exports', to='publications.publication')),
19+
('export_date', models.DateTimeField(auto_now_add=True, db_index=True)),
20+
('action', models.CharField(max_length=20, choices=[('created', 'Created'), ('updated', 'Updated'), ('skipped', 'Skipped'), ('error', 'Error')], db_index=True)),
21+
('wikidata_qid', models.CharField(max_length=50, blank=True, null=True, help_text='Wikidata Q-ID (e.g., Q12345)')),
22+
('wikidata_url', models.URLField(max_length=512, blank=True, null=True, help_text='Full URL to Wikidata item')),
23+
('exported_fields', models.JSONField(blank=True, null=True, help_text='List of fields that were exported')),
24+
('error_message', models.TextField(blank=True, null=True)),
25+
('export_summary', models.TextField(blank=True, null=True, help_text='Summary of what was exported')),
26+
],
27+
options={
28+
'ordering': ['-export_date'],
29+
'verbose_name': 'Wikidata Export Log',
30+
'verbose_name_plural': 'Wikidata Export Logs',
31+
},
32+
),
33+
migrations.AddIndex(
34+
model_name='wikidataexportlog',
35+
index=models.Index(fields=['wikidata_qid'], name='publications_wikidata_qid_idx'),
36+
),
37+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.1.9 on 2025-10-23 20:41
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("publications", "0004_wikidata_export_log"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="wikidataexportlog",
15+
name="wikibase_endpoint",
16+
field=models.URLField(
17+
blank=True,
18+
help_text="Wikibase API endpoint used for this export (e.g., https://www.wikidata.org/w/api.php)",
19+
max_length=512,
20+
null=True,
21+
),
22+
),
23+
]

publications/models.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,4 +262,66 @@ def save(self, *args, **kwargs):
262262
name=f"Harvest Source {self.id}",
263263
)
264264

265-
Journal = Source
265+
Journal = Source
266+
267+
268+
class WikidataExportLog(models.Model):
269+
"""
270+
Log of Wikidata exports for publications.
271+
Tracks when publications were exported, what action was taken,
272+
and links to the created/updated Wikidata items.
273+
"""
274+
ACTION_CHOICES = [
275+
('created', 'Created'),
276+
('updated', 'Updated'),
277+
('skipped', 'Skipped'),
278+
('error', 'Error'),
279+
]
280+
281+
publication = models.ForeignKey(
282+
'Publication',
283+
on_delete=models.CASCADE,
284+
related_name='wikidata_exports'
285+
)
286+
export_date = models.DateTimeField(auto_now_add=True, db_index=True)
287+
action = models.CharField(max_length=20, choices=ACTION_CHOICES, db_index=True)
288+
wikidata_qid = models.CharField(
289+
max_length=50,
290+
blank=True,
291+
null=True,
292+
help_text='Wikidata Q-ID (e.g., Q12345)'
293+
)
294+
wikidata_url = models.URLField(
295+
max_length=512,
296+
blank=True,
297+
null=True,
298+
help_text='Full URL to Wikidata item'
299+
)
300+
exported_fields = models.JSONField(
301+
blank=True,
302+
null=True,
303+
help_text='List of fields that were exported'
304+
)
305+
error_message = models.TextField(blank=True, null=True)
306+
export_summary = models.TextField(
307+
blank=True,
308+
null=True,
309+
help_text='Summary of what was exported'
310+
)
311+
wikibase_endpoint = models.URLField(
312+
max_length=512,
313+
blank=True,
314+
null=True,
315+
help_text='Wikibase API endpoint used for this export (e.g., https://www.wikidata.org/w/api.php)'
316+
)
317+
318+
class Meta:
319+
ordering = ['-export_date']
320+
verbose_name = 'Wikidata Export Log'
321+
verbose_name_plural = 'Wikidata Export Logs'
322+
indexes = [
323+
models.Index(fields=['wikidata_qid'], name='publications_wikidata_qid_idx'),
324+
]
325+
326+
def __str__(self):
327+
return f"{self.action.capitalize()} {self.publication.title[:50]} on {self.export_date.strftime('%Y-%m-%d')}"

0 commit comments

Comments
 (0)