Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions fixtures/test_data.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
[
{
"model": "publications.journal",
"pk": 1,
"fields": {
"name": "Nature",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand where you're coming from, but this is actually not what our values are.

If we want to use real journals here because their ISSN actually exists, then we go with diamond open access journals (https://en.wikipedia.org/wiki/Diamond_open_access).

https://github.com/loreabad6/doaj-geo is a good starting point, so let's use some that we might also want to collaborate with. Please add the following journals to the test data:

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed in the test data, @BharatVe double checks.

"issn_l": "0028-0836",
"openalex_id": null,
"publisher_name": null,
"works_count": null,
"works_api_url": null,
"openalex_url": null
}
},
{
"model": "publications.journal",
"pk": 2,
"fields": {
"name": "Science",
"issn_l": "0036-8075",
"openalex_id": null,
"publisher_name": null,
"works_count": null,
"works_api_url": null,
"openalex_url": null
}
},

{
"model": "publications.publication",
"pk": 1,
Expand All @@ -12,7 +39,7 @@
"geometry": "SRID=4326;GEOMETRYCOLLECTION (POINT (7.595730774920725 51.96944097112328), POLYGON ((7.599984296478425 51.984257653537384, 7.5715788777530975 51.97057414651397, 7.570122189613329 51.950602187631205, 7.580319006590855 51.93825551711683, 7.609054957094401 51.93035649564658, 7.659674869951374 51.942256350721436, 7.6833460522228165 51.968514669138415, 7.665137450475669 51.99229098076532, 7.626171042736502 51.98982421450293, 7.599984296478425 51.984257653537384)))",
"creationDate": "2022-10-24T12:10:53.086Z",
"lastUpdate": "2022-10-24T12:10:53.086Z",
"source": "OPTIMAP Test Journal",
"source": 1,
"timeperiod_startdate": "[\"2020-02-02\"]",
"timeperiod_enddate": "[\"2022-02-20\"]",
"provenance": "Manually added from file test_data.json using the Django management script."
Expand All @@ -31,7 +58,7 @@
"geometry": "SRID=4326;GEOMETRYCOLLECTION (LINESTRING (9.754609563397707 52.36630414438588, 9.813062794192035 52.41569645624003, 10.141300167111496 52.36904961184797, 10.518997966087937 52.330597538337116, 10.838242534270051 52.311358956793185, 11.058566250338231 52.220550088821824, 11.535184901427073 52.15714903642342, 12.272594889905236 52.24258143981572, 12.618817872299417 52.35532056817789, 12.911084026269464 52.2976119913985, 13.144896949445211 52.50063147184562, 13.396695482095708 52.517051586549286))",
"creationDate": "2022-10-24T12:10:53.086Z",
"lastUpdate": "2022-10-24T12:10:53.086Z",
"source": "OPTIMAP Test Journal",
"source": 1,
"timeperiod_startdate": "[\"2010-01-01\"]",
"timeperiod_enddate": "[\"2012-12-12\"]",
"provenance": "Manually added from file test_data.json using the Django management script."
Expand All @@ -50,10 +77,49 @@
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON ((13.558502 50.990421, 13.558502 51.094036, 13.864746 51.094036, 13.864746 50.990421, 13.558502 50.990421)))",
"creationDate": "2022-10-24T12:10:53.086Z",
"lastUpdate": "2022-10-24T12:10:53.086Z",
"source": "OPTIMAP Test Journal",
"source": 1,
"timeperiod_startdate": "[\"2023\"]",
"timeperiod_enddate": "[\"2024\"]",
"provenance": "Manually added from file test_data.json using the Django management script."
}
},

{
"model": "publications.publication",
"pk": 4,
"fields": {
"status": "p",
"title": "Statewide Spatial Data Clearinghouses: an Oregon Case Study",
"abstract": "Collaborative planning to streamline and provide citizens with easier access to geospatial data has a long history in Oregon, culminating in the launch of the Oregon Spatial Data Library in November 2009. This case study examines how Oregon’s participation in the National Spatial Data Infrastructure (NSDI) shaped statewide data sharing practices.",
"publicationDate": "2012-08-01",
"doi": "10.5062/F4RB72J1",
"url": "http://www.istl.org/12-summer/refereed4.html",
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(-123.035094 44.942898))",
"creationDate": "2024-06-01T09:30:00.000Z",
"lastUpdate": "2024-06-01T09:30:00.000Z",
"source": 2,
"timeperiod_startdate": "[\"2009-11-01\"]",
"timeperiod_enddate": "[\"2010-11-01\"]",
"provenance": "Imported from ResearchGate and Oregon State University Archive on 2024-06-01."
}
},
{
"model": "publications.publication",
"pk": 5,
"fields": {
"status": "p",
"title": "GIS at John Day Fossil Beds National Monument: Paleontological Locality Data and Inventory Practices",
"abstract": "This study uses GIS to analyze long-term paleontological locality data from John Day Fossil Beds National Monument, Oregon, to inform better inventory and monitoring practices for fossil resources. We demonstrate how GPS-enabled mapping and spatial analysis can reveal sampling inconsistencies and guide future field surveys.",
"publicationDate": "2020-04-15",
"doi": "10.26879/1053",
"url": "https://palaeo-electronica.org/content/2020/2996-gis-at-john-day-fossil-beds",
"geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(-119.350000 44.300000))",
"creationDate": "2024-06-01T10:15:00.000Z",
"lastUpdate": "2024-06-01T10:15:00.000Z",
"source": 2,
"timeperiod_startdate": "[\"2019\"]",
"timeperiod_enddate": "[\"2020\"]",
"provenance": "Imported from Palaeontologia Electronica archive on 2024-06-01."
}
}
]
12 changes: 8 additions & 4 deletions publications/api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Publications API URL Configuration."""

from rest_framework import routers

from publications.viewsets import PublicationViewSet, SubscriptionViewset
from publications.viewsets import ( JournalViewSet,
PublicationViewSet,
SubscriptionViewSet,
)

router = routers.DefaultRouter()
router.register(r"publications", PublicationViewSet)
router.register(r"subscriptions", SubscriptionViewset, basename='subscription')
router.register(r"journals", JournalViewSet, basename="journal")
router.register(r"publications", PublicationViewSet, basename="publication")
router.register(r"subscriptions", SubscriptionViewSet, basename="subscription")

urlpatterns = router.urls
76 changes: 76 additions & 0 deletions publications/management/commands/update_openalex_journals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# publications/management/commands/update_openalex_journals.py

from django.core.management.base import BaseCommand
from publications.models import Journal
import requests

def fetch_openalex_for_issn(issn: str) -> dict | None:
"""
Query OpenAlex for a given ISSN-L and return the JSON dict.
Follows 302 redirects if necessary.
"""
try:
# Initial request to /sources/issn:<ISSN>
resp = requests.get(f"https://api.openalex.org/sources/issn:{issn}", timeout=10)
# If OpenAlex returns a 302 redirect, follow it to the canonical URL
if resp.status_code == 302 and "Location" in resp.headers:
resp = requests.get(resp.headers["Location"], timeout=10)
if resp.status_code == 200:
return resp.json()
except requests.RequestException:
pass
return None

class Command(BaseCommand):
help = "Update Journal metadata (openalex_id, publisher_name, works_count, works_api_url, etc.) from OpenAlex."

def handle(self, *args, **options):
journals_qs = Journal.objects.exclude(issn_l__isnull=True)
total = journals_qs.count()
self.stdout.write(f"Found {total} journal(s) with ISSN-L.")

for journal in journals_qs:
data = fetch_openalex_for_issn(journal.issn_l)
if not data:
self.stdout.write(f"Skipped (no data): {journal.name}")
continue

changed = False

# 1. openalex_id & openalex_url
new_openalex = data.get("id") # e.g., "https://openalex.org/S137773608"
if new_openalex and journal.openalex_id != new_openalex:
journal.openalex_id = new_openalex
journal.openalex_url = new_openalex # mirror the same URL
changed = True

# 2. works_count & works_api_url
new_works_count = data.get("works_count")
if new_works_count is not None and journal.works_count != new_works_count:
journal.works_count = new_works_count
changed = True

api_url = data.get("works_api_url")
if api_url and journal.works_api_url != api_url:
journal.works_api_url = api_url
changed = True

# 3. publisher_name: read from "host_organization.display_name"
host_org = data.get("host_organization", {})
new_publisher = None
if isinstance(host_org, dict):
new_publisher = host_org.get("display_name")
# Fallback: if still None, use data["display_name"] as proxy
if not new_publisher:
new_publisher = data.get("display_name")
if new_publisher and journal.publisher_name != new_publisher:
journal.publisher_name = new_publisher
changed = True

if changed:
journal.save()
self.stdout.write(f"Updated: {journal.name} ({journal.issn_l})")
else:
self.stdout.write(f"Skipped (unchanged): {journal.name}")

self.stdout.write("Done updating OpenAlex metadata.")
28 changes: 28 additions & 0 deletions publications/migrations/0004_journal_alter_publication_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 5.1.9 on 2025-06-02 11:00

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('publications', '0003_remove_customuser_deleted_and_more'),
]

operations = [
migrations.CreateModel(
name='Journal',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255)),
('issn_l', models.CharField(blank=True, max_length=9, null=True)),
('openalex_id', models.CharField(blank=True, max_length=50, null=True)),
],
),
migrations.AlterField(
model_name='publication',
name='source',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='publications', to='publications.journal'),
),
]
49 changes: 49 additions & 0 deletions publications/migrations/0005_journal_extra_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from django.db import migrations, models

class Migration(migrations.Migration):

dependencies = [
('publications', '0004_journal_alter_publication_source'),
]

operations = [
migrations.AddField(
model_name='journal',
name='publisher_name',
field=models.CharField(
max_length=255,
null=True,
blank=True,
help_text='Name of the publisher as returned by OpenAlex'
),
),
migrations.AddField(
model_name='journal',
name='works_count',
field=models.IntegerField(
null=True,
blank=True,
help_text='Total number of works (articles, books, etc.) from this journal'
),
),
migrations.AddField(
model_name='journal',
name='works_api_url',
field=models.URLField(
max_length=512,
null=True,
blank=True,
help_text='API endpoint to list all works from this journal'
),
),
migrations.AddField(
model_name='journal',
name='openalex_url',
field=models.URLField(
max_length=512,
null=True,
blank=True,
help_text='Canonical OpenAlex URL for this journal (source.id)'
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 5.1.9 on 2025-06-02 14:39

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('publications', '0005_journal_extra_fields'),
]

operations = [
migrations.AlterField(
model_name='journal',
name='openalex_url',
field=models.URLField(blank=True, max_length=512, null=True),
),
migrations.AlterField(
model_name='journal',
name='publisher_name',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='journal',
name='works_api_url',
field=models.URLField(blank=True, max_length=512, null=True),
),
migrations.AlterField(
model_name='journal',
name='works_count',
field=models.IntegerField(blank=True, null=True),
),
]
14 changes: 13 additions & 1 deletion publications/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Publication(models.Model):

# optional fields
doi = models.CharField(max_length=1024, unique=True, blank=True, null=True)
source = models.CharField(max_length=4096, null=True, blank=True) # journal, conference, preprint repo, ..
source = models.ForeignKey('Journal', on_delete=models.SET_NULL, null=True, related_name='publications')
provenance = models.TextField(null=True, blank=True)
publicationDate = models.DateField(null=True, blank=True)
abstract = models.TextField(null=True, blank=True)
Expand Down Expand Up @@ -231,3 +231,15 @@ class BlockedDomain(models.Model):

def __str__(self):
return self.domain

class Journal(models.Model):
name = models.CharField(max_length=255)
issn_l = models.CharField(max_length=9, blank=True, null=True)
openalex_id = models.CharField(max_length=50, blank=True, null=True)
publisher_name = models.CharField(max_length=255, blank=True, null=True)
works_count = models.IntegerField(blank=True, null=True)
works_api_url = models.URLField(max_length=512, blank=True, null=True)
openalex_url = models.URLField(max_length=512, blank=True, null=True)

def __str__(self):
return self.name
Loading
Loading