Skip to content

Commit 1e7a1ee

Browse files
author
BharatVe
committed
Working implementation of journal meta data using openalex. Passes tests, needs review.
1 parent 48ed82f commit 1e7a1ee

File tree

12 files changed

+674
-80
lines changed

12 files changed

+674
-80
lines changed

fixtures/test_data.json

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,31 @@
11
[
2+
{
3+
"model": "publications.journal",
4+
"pk": 1,
5+
"fields": {
6+
"name": "Nature",
7+
"issn_l": "0028-0836",
8+
"openalex_id": null,
9+
"publisher_name": null,
10+
"works_count": null,
11+
"works_api_url": null,
12+
"openalex_url": null
13+
}
14+
},
15+
{
16+
"model": "publications.journal",
17+
"pk": 2,
18+
"fields": {
19+
"name": "Science",
20+
"issn_l": "0036-8075",
21+
"openalex_id": null,
22+
"publisher_name": null,
23+
"works_count": null,
24+
"works_api_url": null,
25+
"openalex_url": null
26+
}
27+
},
28+
229
{
330
"model": "publications.publication",
431
"pk": 1,
@@ -12,7 +39,7 @@
1239
"geometry": "SRID=4326;GEOMETRYCOLLECTION (POINT (7.595730774920725 51.96944097112328), POLYGON ((7.599984296478425 51.984257653537384, 7.5715788777530975 51.97057414651397, 7.570122189613329 51.950602187631205, 7.580319006590855 51.93825551711683, 7.609054957094401 51.93035649564658, 7.659674869951374 51.942256350721436, 7.6833460522228165 51.968514669138415, 7.665137450475669 51.99229098076532, 7.626171042736502 51.98982421450293, 7.599984296478425 51.984257653537384)))",
1340
"creationDate": "2022-10-24T12:10:53.086Z",
1441
"lastUpdate": "2022-10-24T12:10:53.086Z",
15-
"source": "OPTIMAP Test Journal",
42+
"source": 1,
1643
"timeperiod_startdate": "[\"2020-02-02\"]",
1744
"timeperiod_enddate": "[\"2022-02-20\"]",
1845
"provenance": "Manually added from file test_data.json using the Django management script."
@@ -31,7 +58,7 @@
3158
"geometry": "SRID=4326;GEOMETRYCOLLECTION (LINESTRING (9.754609563397707 52.36630414438588, 9.813062794192035 52.41569645624003, 10.141300167111496 52.36904961184797, 10.518997966087937 52.330597538337116, 10.838242534270051 52.311358956793185, 11.058566250338231 52.220550088821824, 11.535184901427073 52.15714903642342, 12.272594889905236 52.24258143981572, 12.618817872299417 52.35532056817789, 12.911084026269464 52.2976119913985, 13.144896949445211 52.50063147184562, 13.396695482095708 52.517051586549286))",
3259
"creationDate": "2022-10-24T12:10:53.086Z",
3360
"lastUpdate": "2022-10-24T12:10:53.086Z",
34-
"source": "OPTIMAP Test Journal",
61+
"source": 1,
3562
"timeperiod_startdate": "[\"2010-01-01\"]",
3663
"timeperiod_enddate": "[\"2012-12-12\"]",
3764
"provenance": "Manually added from file test_data.json using the Django management script."
@@ -50,10 +77,49 @@
5077
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON ((13.558502 50.990421, 13.558502 51.094036, 13.864746 51.094036, 13.864746 50.990421, 13.558502 50.990421)))",
5178
"creationDate": "2022-10-24T12:10:53.086Z",
5279
"lastUpdate": "2022-10-24T12:10:53.086Z",
53-
"source": "OPTIMAP Test Journal",
80+
"source": 1,
5481
"timeperiod_startdate": "[\"2023\"]",
5582
"timeperiod_enddate": "[\"2024\"]",
5683
"provenance": "Manually added from file test_data.json using the Django management script."
5784
}
85+
},
86+
87+
{
88+
"model": "publications.publication",
89+
"pk": 4,
90+
"fields": {
91+
"status": "p",
92+
"title": "Statewide Spatial Data Clearinghouses: an Oregon Case Study",
93+
"abstract": "Collaborative planning to streamline and provide citizens with easier access to geospatial data has a long history in Oregon, culminating in the launch of the Oregon Spatial Data Library in November 2009. This case study examines how Oregon’s participation in the National Spatial Data Infrastructure (NSDI) shaped statewide data sharing practices.",
94+
"publicationDate": "2012-08-01",
95+
"doi": "10.5062/F4RB72J1",
96+
"url": "http://www.istl.org/12-summer/refereed4.html",
97+
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(-123.035094 44.942898))",
98+
"creationDate": "2024-06-01T09:30:00.000Z",
99+
"lastUpdate": "2024-06-01T09:30:00.000Z",
100+
"source": 2,
101+
"timeperiod_startdate": "[\"2009-11-01\"]",
102+
"timeperiod_enddate": "[\"2010-11-01\"]",
103+
"provenance": "Imported from ResearchGate and Oregon State University Archive on 2024-06-01."
104+
}
105+
},
106+
{
107+
"model": "publications.publication",
108+
"pk": 5,
109+
"fields": {
110+
"status": "p",
111+
"title": "GIS at John Day Fossil Beds National Monument: Paleontological Locality Data and Inventory Practices",
112+
"abstract": "This study uses GIS to analyze long-term paleontological locality data from John Day Fossil Beds National Monument, Oregon, to inform better inventory and monitoring practices for fossil resources. We demonstrate how GPS-enabled mapping and spatial analysis can reveal sampling inconsistencies and guide future field surveys.",
113+
"publicationDate": "2020-04-15",
114+
"doi": "10.26879/1053",
115+
"url": "https://palaeo-electronica.org/content/2020/2996-gis-at-john-day-fossil-beds",
116+
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(-119.350000 44.300000))",
117+
"creationDate": "2024-06-01T10:15:00.000Z",
118+
"lastUpdate": "2024-06-01T10:15:00.000Z",
119+
"source": 2,
120+
"timeperiod_startdate": "[\"2019\"]",
121+
"timeperiod_enddate": "[\"2020\"]",
122+
"provenance": "Imported from Palaeontologia Electronica archive on 2024-06-01."
123+
}
58124
}
59125
]

publications/api.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
"""Publications API URL Configuration."""
22

33
from rest_framework import routers
4-
5-
from publications.viewsets import PublicationViewSet, SubscriptionViewset
4+
from publications.viewsets import ( JournalViewSet,
5+
PublicationViewSet,
6+
SubscriptionViewSet,
7+
)
68

79
router = routers.DefaultRouter()
8-
router.register(r"publications", PublicationViewSet)
9-
router.register(r"subscriptions", SubscriptionViewset, basename='subscription')
10+
router.register(r"journals", JournalViewSet, basename="journal")
11+
router.register(r"publications", PublicationViewSet, basename="publication")
12+
router.register(r"subscriptions", SubscriptionViewSet, basename="subscription")
13+
1014
urlpatterns = router.urls
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# publications/management/commands/update_openalex_journals.py
2+
3+
from django.core.management.base import BaseCommand
4+
from publications.models import Journal
5+
import requests
6+
7+
def fetch_openalex_for_issn(issn: str) -> dict | None:
8+
"""
9+
Query OpenAlex for a given ISSN-L and return the JSON dict.
10+
Follows 302 redirects if necessary.
11+
"""
12+
try:
13+
# Initial request to /sources/issn:<ISSN>
14+
resp = requests.get(f"https://api.openalex.org/sources/issn:{issn}", timeout=10)
15+
# If OpenAlex returns a 302 redirect, follow it to the canonical URL
16+
if resp.status_code == 302 and "Location" in resp.headers:
17+
resp = requests.get(resp.headers["Location"], timeout=10)
18+
if resp.status_code == 200:
19+
return resp.json()
20+
except requests.RequestException:
21+
pass
22+
return None
23+
24+
class Command(BaseCommand):
25+
help = "Update Journal metadata (openalex_id, publisher_name, works_count, works_api_url, etc.) from OpenAlex."
26+
27+
def handle(self, *args, **options):
28+
journals_qs = Journal.objects.exclude(issn_l__isnull=True)
29+
total = journals_qs.count()
30+
self.stdout.write(f"Found {total} journal(s) with ISSN-L.")
31+
32+
for journal in journals_qs:
33+
data = fetch_openalex_for_issn(journal.issn_l)
34+
if not data:
35+
self.stdout.write(f"Skipped (no data): {journal.name}")
36+
continue
37+
38+
changed = False
39+
40+
# 1. openalex_id & openalex_url
41+
new_openalex = data.get("id") # e.g., "https://openalex.org/S137773608"
42+
if new_openalex and journal.openalex_id != new_openalex:
43+
journal.openalex_id = new_openalex
44+
journal.openalex_url = new_openalex # mirror the same URL
45+
changed = True
46+
47+
# 2. works_count & works_api_url
48+
new_works_count = data.get("works_count")
49+
if new_works_count is not None and journal.works_count != new_works_count:
50+
journal.works_count = new_works_count
51+
changed = True
52+
53+
api_url = data.get("works_api_url")
54+
if api_url and journal.works_api_url != api_url:
55+
journal.works_api_url = api_url
56+
changed = True
57+
58+
# 3. publisher_name: read from "host_organization.display_name"
59+
host_org = data.get("host_organization", {})
60+
new_publisher = None
61+
if isinstance(host_org, dict):
62+
new_publisher = host_org.get("display_name")
63+
# Fallback: if still None, use data["display_name"] as proxy
64+
if not new_publisher:
65+
new_publisher = data.get("display_name")
66+
if new_publisher and journal.publisher_name != new_publisher:
67+
journal.publisher_name = new_publisher
68+
changed = True
69+
70+
if changed:
71+
journal.save()
72+
self.stdout.write(f"Updated: {journal.name} ({journal.issn_l})")
73+
else:
74+
self.stdout.write(f"Skipped (unchanged): {journal.name}")
75+
76+
self.stdout.write("Done updating OpenAlex metadata.")
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by Django 5.1.9 on 2025-06-02 11:00
2+
3+
import django.db.models.deletion
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('publications', '0003_remove_customuser_deleted_and_more'),
11+
]
12+
13+
operations = [
14+
migrations.CreateModel(
15+
name='Journal',
16+
fields=[
17+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18+
('name', models.CharField(max_length=255)),
19+
('issn_l', models.CharField(blank=True, max_length=9, null=True)),
20+
('openalex_id', models.CharField(blank=True, max_length=50, null=True)),
21+
],
22+
),
23+
migrations.AlterField(
24+
model_name='publication',
25+
name='source',
26+
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='publications', to='publications.journal'),
27+
),
28+
]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from django.db import migrations, models
2+
3+
class Migration(migrations.Migration):
4+
5+
dependencies = [
6+
('publications', '0004_journal_alter_publication_source'),
7+
]
8+
9+
operations = [
10+
migrations.AddField(
11+
model_name='journal',
12+
name='publisher_name',
13+
field=models.CharField(
14+
max_length=255,
15+
null=True,
16+
blank=True,
17+
help_text='Name of the publisher as returned by OpenAlex'
18+
),
19+
),
20+
migrations.AddField(
21+
model_name='journal',
22+
name='works_count',
23+
field=models.IntegerField(
24+
null=True,
25+
blank=True,
26+
help_text='Total number of works (articles, books, etc.) from this journal'
27+
),
28+
),
29+
migrations.AddField(
30+
model_name='journal',
31+
name='works_api_url',
32+
field=models.URLField(
33+
max_length=512,
34+
null=True,
35+
blank=True,
36+
help_text='API endpoint to list all works from this journal'
37+
),
38+
),
39+
migrations.AddField(
40+
model_name='journal',
41+
name='openalex_url',
42+
field=models.URLField(
43+
max_length=512,
44+
null=True,
45+
blank=True,
46+
help_text='Canonical OpenAlex URL for this journal (source.id)'
47+
),
48+
),
49+
]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Generated by Django 5.1.9 on 2025-06-02 14:39
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('publications', '0005_journal_extra_fields'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='journal',
15+
name='openalex_url',
16+
field=models.URLField(blank=True, max_length=512, null=True),
17+
),
18+
migrations.AlterField(
19+
model_name='journal',
20+
name='publisher_name',
21+
field=models.CharField(blank=True, max_length=255, null=True),
22+
),
23+
migrations.AlterField(
24+
model_name='journal',
25+
name='works_api_url',
26+
field=models.URLField(blank=True, max_length=512, null=True),
27+
),
28+
migrations.AlterField(
29+
model_name='journal',
30+
name='works_count',
31+
field=models.IntegerField(blank=True, null=True),
32+
),
33+
]

publications/models.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class Publication(models.Model):
4545

4646
# optional fields
4747
doi = models.CharField(max_length=1024, unique=True, blank=True, null=True)
48-
source = models.CharField(max_length=4096, null=True, blank=True) # journal, conference, preprint repo, ..
48+
source = models.ForeignKey('Journal', on_delete=models.SET_NULL, null=True, related_name='publications')
4949
provenance = models.TextField(null=True, blank=True)
5050
publicationDate = models.DateField(null=True, blank=True)
5151
abstract = models.TextField(null=True, blank=True)
@@ -231,3 +231,15 @@ class BlockedDomain(models.Model):
231231

232232
def __str__(self):
233233
return self.domain
234+
235+
class Journal(models.Model):
236+
name = models.CharField(max_length=255)
237+
issn_l = models.CharField(max_length=9, blank=True, null=True)
238+
openalex_id = models.CharField(max_length=50, blank=True, null=True)
239+
publisher_name = models.CharField(max_length=255, blank=True, null=True)
240+
works_count = models.IntegerField(blank=True, null=True)
241+
works_api_url = models.URLField(max_length=512, blank=True, null=True)
242+
openalex_url = models.URLField(max_length=512, blank=True, null=True)
243+
244+
def __str__(self):
245+
return self.name

0 commit comments

Comments
 (0)