Skip to content

Commit cefcd4b

Browse files
Resolved reference columns (#36)
* add resolved_reference columns * alembic upgrade * refereed_status default to None
1 parent dea4fae commit cefcd4b

File tree

5 files changed

+175
-6
lines changed

5 files changed

+175
-6
lines changed

adsrefpipe/app.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,9 @@ def update_resolved_reference_records(self, session: object, resolved_list: List
433433
"score": r.score,
434434
"reference_raw": r.reference_raw,
435435
"external_identifier": _ensure_list(getattr(r, "external_identifier", None)) or [],
436+
"scix_id": getattr(r, "scix_id", None),
437+
"publication_year": getattr(r, "publication_year", None),
438+
"refereed_status": getattr(r, "refereed_status", None),
436439
})
437440

438441
session.bulk_update_mappings(ResolvedReference, mappings)
@@ -474,7 +477,9 @@ def populate_resolved_reference_records_pre_resolved(self, references: List, his
474477
scix_id = '0000',
475478
score=-1,
476479
reference_raw=ref.get('refraw', None),
477-
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
480+
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [],
481+
publication_year=ref.get('publication_year', None),
482+
refereed_status=ref.get('refereed_status', None))
478483
resolved_records.append(resolved_record)
479484
# add the id and remove xml_reference that is now in database
480485
ref['id'] = 'H%dI%d' % (history_id, item_num)
@@ -578,7 +583,9 @@ def populate_tables_post_resolved(self, resolved_reference: List, source_bibcode
578583
scix_id=ref.get('scix_id',None),
579584
score=ref.get('score', None),
580585
reference_raw=ref.get('refstring', None),
581-
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
586+
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [],
587+
publication_year=ref.get('publication_year', None),
588+
refereed_status=ref.get('refereed_status', None))
582589
resolved_records.append(resolved_record)
583590
if resolved_classic:
584591
compare_record = CompareClassic(history_id=history_id,

adsrefpipe/models.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,11 @@ class ResolvedReference(Base):
215215
reference_raw = Column(String)
216216
external_identifier = Column(ARRAY(String))
217217
scix_id = Column(String)
218+
publication_year = Column(Integer)
219+
refereed_status = Column(Integer)
218220

219-
def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None, scix_id: str = None):
221+
def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str,
222+
external_identifier: list = None, scix_id: str = None, publication_year: int = None, refereed_status: int = None):
220223
"""
221224
initializes a resolved reference object
222225
@@ -228,6 +231,8 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
228231
:param score: confidence score of the resolved reference
229232
:param reference_raw: raw reference string
230233
:param external_identifier: list of external identifiers associated with the reference, e.g. ["doi:...", "arxiv:...", "ascl:..."]
234+
:param publication_year: publication year
235+
:param refereed_status: refereed status flag (0 or 1)
231236
"""
232237
self.history_id = history_id
233238
self.item_num = item_num
@@ -237,6 +242,8 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
237242
self.reference_raw = reference_raw
238243
self.external_identifier = external_identifier or []
239244
self.scix_id = scix_id
245+
self.publication_year = publication_year
246+
self.refereed_status = refereed_status
240247

241248
def toJSON(self) -> dict:
242249
"""
@@ -252,7 +259,9 @@ def toJSON(self) -> dict:
252259
'item_num': self.item_num,
253260
**({'reference_raw': self.reference_raw} if self.reference_raw else {}),
254261
'external_identifier': self.external_identifier,
255-
**({'scix_id': self.scix_id} if self.scix_id else {})
262+
**({'scix_id': self.scix_id} if self.scix_id else {}),
263+
**({'publication_year': self.publication_year} if self.publication_year is not None else {}),
264+
**({'refereed_status': self.refereed_status} if self.refereed_status is not None else {}),
256265
}
257266

258267

@@ -299,4 +308,3 @@ def toJSON(self) -> dict:
299308
'score': self.score,
300309
'state': self.state,
301310
}
302-

adsrefpipe/tests/unittests/test_app.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,28 @@ def _get_scix_id(rec):
5353
return getattr(rec, "scix_id", None)
5454

5555

56+
def _get_publication_year(rec):
57+
"""
58+
Works whether rec is a dict (bulk mappings) or an ORM object.
59+
"""
60+
if rec is None:
61+
return None
62+
if isinstance(rec, dict):
63+
return rec.get("publication_year")
64+
return getattr(rec, "publication_year", None)
65+
66+
67+
def _get_refereed_status(rec):
68+
"""
69+
Works whether rec is a dict (bulk mappings) or an ORM object.
70+
"""
71+
if rec is None:
72+
return None
73+
if isinstance(rec, dict):
74+
return rec.get("refereed_status")
75+
return getattr(rec, "refereed_status", None)
76+
77+
5678
def _make_session_scope_cm(session):
5779
"""
5880
Return a context manager mock that behaves like app.session_scope()
@@ -717,6 +739,8 @@ def test_populate_tables_post_resolved_with_classic(self):
717739
'score': 1.0,
718740
'external_identifier': ['doi:10.1234/abc', 'arxiv:2301.00001'],
719741
'scix_id': 'scix:ABCD-1234-ref1',
742+
'publication_year': 2023,
743+
'refereed_status': 1,
720744
},
721745
{
722746
'id': 'H1I2',
@@ -725,6 +749,8 @@ def test_populate_tables_post_resolved_with_classic(self):
725749
'score': 0.8,
726750
'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
727751
'scix_id': 'scix:ABCD-1234-ref2',
752+
'publication_year': 2021,
753+
'refereed_status': 0,
728754
}
729755
]
730756

@@ -756,6 +782,10 @@ def test_populate_tables_post_resolved_with_classic(self):
756782

757783
self.assertEqual(_get_scix_id(resolved_records[0]), 'scix:ABCD-1234-ref1')
758784
self.assertEqual(_get_scix_id(resolved_records[1]), 'scix:ABCD-1234-ref2')
785+
self.assertEqual(_get_publication_year(resolved_records[0]), 2023)
786+
self.assertEqual(_get_publication_year(resolved_records[1]), 2021)
787+
self.assertEqual(_get_refereed_status(resolved_records[0]), 1)
788+
self.assertEqual(_get_refereed_status(resolved_records[1]), 0)
759789

760790
@patch("adsrefpipe.app.ProcessedHistory")
761791
@patch("adsrefpipe.app.ResolvedReference")
@@ -1058,13 +1088,17 @@ def test_resolved_reference_toJSON_includes_scix_id(self):
10581088
reference_raw="Some ref raw",
10591089
external_identifier=["doi:10.1234/xyz"],
10601090
scix_id="scix:ABCD-1234-0004",
1091+
publication_year=2020,
1092+
refereed_status=1,
10611093
)
10621094
got = rr.toJSON()
10631095
self.assertEqual(got["history_id"], 123)
10641096
self.assertEqual(got["item_num"], 1)
10651097
self.assertEqual(got["bibcode"], "2020A&A...000A...1X")
10661098
self.assertEqual(got["external_identifier"], ["doi:10.1234/xyz"])
10671099
self.assertEqual(got["scix_id"], "scix:ABCD-1234-0004")
1100+
self.assertEqual(got["publication_year"], 2020)
1101+
self.assertEqual(got["refereed_status"], 1)
10681102

10691103
def test_resolved_reference_toJSON_omits_scix_id_when_none(self):
10701104
"""Test ResolvedReference.toJSON omits scix_id when not set"""
@@ -1077,9 +1111,13 @@ def test_resolved_reference_toJSON_omits_scix_id_when_none(self):
10771111
reference_raw="Some ref raw",
10781112
external_identifier=["doi:10.1234/xyz"],
10791113
scix_id=None,
1114+
publication_year=None,
1115+
refereed_status=0,
10801116
)
10811117
got = rr.toJSON()
10821118
self.assertTrue("scix_id" not in got)
1119+
self.assertTrue("publication_year" not in got)
1120+
self.assertEqual(got["refereed_status"], 0)
10831121

10841122

10851123
class TestDatabaseNoStubdata(unittest.TestCase):
@@ -1126,6 +1164,31 @@ def test_app(self):
11261164
assert self.app._config.get('SQLALCHEMY_URL') == 'postgresql://mock/mock'
11271165
assert self.app.conf.get('SQLALCHEMY_URL') == 'postgresql://mock/mock'
11281166

1167+
def test_update_resolved_reference_records_includes_new_columns(self):
1168+
"""Verify bulk update payload includes publication_year and refereed_status."""
1169+
rr = ResolvedReference(
1170+
history_id=1,
1171+
item_num=2,
1172+
reference_str="Some reference",
1173+
bibcode="2023A&A...657A...1X",
1174+
score=1.0,
1175+
reference_raw="Some reference",
1176+
external_identifier=["doi:10.1234/example"],
1177+
scix_id="scix:ABCD-1234-9999",
1178+
publication_year=2023,
1179+
refereed_status=1,
1180+
)
1181+
1182+
result = self.app.update_resolved_reference_records(self.mock_session, [rr])
1183+
self.assertTrue(result)
1184+
1185+
self.mock_session.bulk_update_mappings.assert_called_once()
1186+
called_model, called_mappings = self.mock_session.bulk_update_mappings.call_args[0]
1187+
self.assertIs(called_model, ResolvedReference)
1188+
self.assertEqual(len(called_mappings), 1)
1189+
self.assertEqual(called_mappings[0]["publication_year"], 2023)
1190+
self.assertEqual(called_mappings[0]["refereed_status"], 1)
1191+
11291192
def test_query_reference_tbl_when_empty(self):
11301193
""" verify reference_source table being empty """
11311194
self.app.diagnostic_query = MagicMock(return_value=[])
@@ -1164,6 +1227,8 @@ def test_populate_tables(self):
11641227
"id": "H1I1",
11651228
"external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"],
11661229
"scix_id": "scix:ABCD-1234-0005",
1230+
"publication_year": 2011,
1231+
"refereed_status": 1,
11671232
},
11681233
{
11691234
"score": "1.0",
@@ -1173,6 +1238,8 @@ def test_populate_tables(self):
11731238
"id": "H1I2",
11741239
"external_identifier": ["arxiv:1709.02923", "ascl:2301.001"],
11751240
"scix_id": "scix:ABCD-1234-0006",
1241+
"publication_year": 2017,
1242+
"refereed_status": 0,
11761243
}
11771244
]
11781245

@@ -1221,6 +1288,10 @@ def test_populate_tables(self):
12211288
self.assertEqual(got[1]["external_identifier"], ["arxiv:1709.02923", "ascl:2301.001"])
12221289
self.assertEqual(got[0]["scix_id"], "scix:ABCD-1234-0005")
12231290
self.assertEqual(got[1]["scix_id"], "scix:ABCD-1234-0006")
1291+
self.assertEqual(got[0]["publication_year"], 2011)
1292+
self.assertEqual(got[1]["publication_year"], 2017)
1293+
self.assertEqual(got[0]["refereed_status"], 1)
1294+
self.assertEqual(got[1]["refereed_status"], 0)
12241295

12251296
def test_get_parser_error(self):
12261297
""" test get_parser when it errors for unrecognized source filename """
@@ -1242,4 +1313,3 @@ def _fake_get_parser(path):
12421313

12431314
if __name__ == '__main__':
12441315
unittest.main()
1245-
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""add scix_id
2+
3+
Revision ID: 835999dfb9e3
4+
Revises: 08ca70bd6f5f
5+
Create Date: 2026-02-11 12:45:45.441650
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '835999dfb9e3'
14+
down_revision = '08ca70bd6f5f'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
bind = op.get_bind()
21+
inspector = sa.inspect(bind)
22+
if not inspector.has_table("resolved_reference"):
23+
raise RuntimeError(
24+
"Migration 835999dfb9e3 requires table `resolved_reference`, "
25+
"but it does not exist. Database schema and alembic_version are out of sync."
26+
)
27+
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
28+
if "scix_id" not in columns:
29+
op.add_column("resolved_reference", sa.Column("scix_id", sa.String(), nullable=True))
30+
31+
32+
def downgrade():
33+
bind = op.get_bind()
34+
inspector = sa.inspect(bind)
35+
if not inspector.has_table("resolved_reference"):
36+
return
37+
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
38+
if "scix_id" in columns:
39+
op.drop_column("resolved_reference", "scix_id")
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""add publication_year and refereed_status
2+
3+
Revision ID: 9a4b1e8b6c7d
4+
Revises: 835999dfb9e3
5+
Create Date: 2026-03-11 00:00:00.000000
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "9a4b1e8b6c7d"
14+
down_revision = "835999dfb9e3"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
bind = op.get_bind()
21+
inspector = sa.inspect(bind)
22+
if not inspector.has_table("resolved_reference"):
23+
raise RuntimeError(
24+
"Migration 9a4b1e8b6c7d requires table `resolved_reference`, "
25+
"but it does not exist. Database schema and alembic_version are out of sync."
26+
)
27+
28+
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
29+
if "publication_year" not in columns:
30+
op.add_column("resolved_reference", sa.Column("publication_year", sa.Integer(), nullable=True))
31+
if "refereed_status" not in columns:
32+
op.add_column("resolved_reference", sa.Column("refereed_status", sa.Integer(), nullable=True))
33+
34+
35+
def downgrade():
36+
bind = op.get_bind()
37+
inspector = sa.inspect(bind)
38+
if not inspector.has_table("resolved_reference"):
39+
return
40+
41+
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
42+
if "refereed_status" in columns:
43+
op.drop_column("resolved_reference", "refereed_status")
44+
if "publication_year" in columns:
45+
op.drop_column("resolved_reference", "publication_year")

0 commit comments

Comments
 (0)