Skip to content

Commit 2fbaff7

Browse files
authored
Merge pull request #28 from HugoOnghai/new-core-to-rebase
New Core PR, after Rebased with upstream/master
2 parents 401b6b1 + 5f21e46 commit 2fbaff7

File tree

2 files changed

+246
-0
lines changed

2 files changed

+246
-0
lines changed

src/mp_cite/core.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
from typing import TypeAlias
2+
3+
from elinkapi import Elink
4+
from elinkapi.record import RecordResponse
5+
from pymongo import MongoClient
6+
7+
import requests
8+
from elinkapi.utils import Validation
9+
10+
11+
from mp_cite.doi_builder import MinimumDARecord
12+
13+
from typing import Literal
14+
15+
OstiID: TypeAlias = int
16+
17+
18+
def find_out_of_date_doi_entries(
19+
rc_client: MongoClient,
20+
doi_client: MongoClient,
21+
robocrys_db: str,
22+
robocrys_collection: str,
23+
doi_db: str,
24+
doi_collection: str,
25+
) -> list[OstiID]:
26+
"""
27+
find_out_of_date_doi_entries queries MP's mongo collections to find all robocrys documents that were updated less recently than the latest doi document
28+
29+
:rc_client is the MongoClient used to access the robocrys collection
30+
:doi_client is the MongoClient used to access the doi collection (since a new doi collection is planned, they clients are passed separately, though in the future they may be the same client.)
31+
:robocrys_db is the name of the database the robocrys collection is in
32+
:robocrys_collection is the name of the robocrys collection
33+
:doi_db is the name of the database the doi collection is in
34+
:doi_collection is the name of the doi collection
35+
36+
returns a list containing all OSTI IDs associated with out-of-date doi entries.
37+
"""
38+
robocrys = rc_client[robocrys_db][robocrys_collection]
39+
dois = doi_client[doi_db][doi_collection]
40+
41+
latest_doi = next(
42+
dois.aggregate(
43+
[
44+
{"$project": {"_id": 0, "date_metadata_updated": 1}},
45+
{"$sort": {"date_metadata_updated": -1}},
46+
{"$limit": 1},
47+
]
48+
)
49+
)["date_metadata_updated"]
50+
51+
material_ids_to_update = list(
52+
map(
53+
lambda x: x["material_id"],
54+
robocrys.find(
55+
{"last_updated": {"$gt": latest_doi}}, {"_id": 0, "material_id": 1}
56+
),
57+
)
58+
)
59+
60+
return list(
61+
map(
62+
lambda x: x["osti_id"],
63+
dois.find(
64+
{"material_id": {"$in": material_ids_to_update}},
65+
{"_id": 0, "osti_id": 1},
66+
),
67+
),
68+
)
69+
70+
71+
def update_existing_osti_record(
72+
elinkapi: Elink, osti_id: OstiID, new_values: dict
73+
) -> RecordResponse:
74+
"""
75+
update_existing_osti_record allows users to provide a dictionary of keywords and new values, which will replace the old values under the same keywords in the record with the given osti id
76+
77+
:elinkapi is the instance of the elinkapi associated with the environment in which the record is held (e.g. either production or review environment)
78+
:osti_id is the osti id of the record which ought to be updated
79+
:new_values is a dictionary of keywords (which should exist in ELink's record model) and new value pairs.
80+
81+
N.B., it is currently assumed that the user will handle the "sponsor identifier bug"
82+
--- in which the retreived record responses of validated records from the E-Link production environment seemingly
83+
lack the required Sponsor Organization identifiers which were necessary for their submission (due to rearrangement of metadata
84+
on E-Link's side) --- before calling this function.
85+
86+
Otherwise, the following code excerpt would need to be added to retroactively fix the issue with the sponsor organization's identifiers
87+
for entry in record.organizations:
88+
if entry.type == "SPONSOR":
89+
entry.identifiers = [{"type": 'CN_DOE', "value": 'AC02-05CH11231'}]
90+
break
91+
92+
Instead, we leave this for the user.
93+
"""
94+
95+
record_on_elink = elinkapi.get_single_record(osti_id)
96+
97+
for keyword in new_values:
98+
setattr(record_on_elink, keyword, new_values[keyword])
99+
100+
return elinkapi.update_record(
101+
osti_id, record_on_elink, state="save"
102+
) # user should use update_state_of_osti_record to submit instead
103+
104+
105+
def submit_new_osti_record(
106+
elinkapi: Elink,
107+
new_values: dict,
108+
state="submit",
109+
) -> RecordResponse:
110+
"""
111+
submit_new_osti_record generates a new record based on the provided keyword-value pairs in the new_values dict and the default minimum DA Record metadata necessary for submission
112+
113+
:elinkapi is the elinkapi (see previous)
114+
:new_values is the dictionary of keywords and values which want to be included in the submitted record (besides or in lieu of default values). The title MUST be provided.
115+
:state defaults to "submit" but the user can simply "save" if desired. This is done given our assumption that there is
116+
no need to both with saving, rather, just only send new record to osti when it's ready for submission.
117+
118+
returns the record response after submission
119+
"""
120+
121+
# template for all repeated stuff
122+
# only submit
123+
new_record = MinimumDARecord(
124+
**new_values
125+
) # record is an instance of the MinimumDARecord model which gives default values to all necessary fields (EXCEPT Title)
126+
record_response = elinkapi.post_new_record(new_record, state)
127+
128+
return record_response
129+
130+
131+
def update_state_of_osti_record(
132+
elinkapi: Elink, osti_id: OstiID, new_state: Literal["save", "submit"]
133+
) -> RecordResponse:
134+
"""
135+
update_state_of_osti_record allows a user to update the state of a record with provided osti_id to either "save" or "submit" (the two valid states)
136+
137+
:elinkapi is the elinkapi (see previous)
138+
:osti_id is the OSTI ID associated with the record of which to update state.
139+
:new_state is a Literal object, in this case a subtype of strings (either "save" or "submit").
140+
141+
returns the record response after updating the state.
142+
"""
143+
record = elinkapi.get_single_record(osti_id)
144+
return elinkapi.update_record(osti_id, record, new_state)
145+
146+
147+
def delete_osti_record(elinkapi: Elink, osti_id: OstiID, reason: str) -> bool:
148+
"""
149+
Delete a record by its OSTI ID.
150+
151+
:elinkapi is the elinkapi
152+
:osti_id is the osti_id associated with the record which ought to be deleted
153+
:reason is a str object which explains in words why the record is to be deleted (necessary for the http request)
154+
155+
returns true if deleted successfully, else false, which indicates a bad status_code
156+
"""
157+
response = requests.delete(
158+
f"{elinkapi.target}records/{osti_id}?reason={reason}",
159+
headers={"Authorization": f"Bearer {elinkapi.token}"},
160+
)
161+
Validation.handle_response(response)
162+
return response.status_code == 204 # True if deleted successfully

src/mp_cite/models.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from pydantic import BaseModel, Field, model_validator
2+
3+
from datetime import datetime
4+
from elinkapi import Record, Organization, Person
5+
6+
from typing import List, Any
7+
import pytz
8+
9+
10+
class DOIModel(BaseModel):
11+
"""
12+
The model for a DOI document in a mongodb collection, which should better align with E-Link's record model.
13+
14+
It is designed for easy transfer from E-Link record response to doi document. All fields can be mapped directly from a
15+
record response keywords of the same name, or, in the case of material_id, it is automatically filled in with site_unique_id
16+
with the model validator `set_material_id(...)`
17+
"""
18+
19+
# identifiers
20+
doi: str = Field(description="The DOI number as allocated by OSTI")
21+
title: str = Field(description="The title of the record")
22+
osti_id: int = Field(
23+
coerce_numbers_to_str=True,
24+
description="The OSTI ID number allocated by OSTI to make the DOI number",
25+
)
26+
material_id: str
27+
site_unique_id: str
28+
29+
# time stamps
30+
date_metadata_added: datetime | None = Field(
31+
description="date_record_entered_onto_ELink"
32+
)
33+
date_metadata_updated: datetime | None = Field(
34+
description="date_record_last_updated_on_Elink"
35+
)
36+
37+
# status
38+
workflow_status: str
39+
date_released: datetime | None = Field(description="")
40+
date_submitted_to_osti_first: datetime = Field(
41+
description="date record was first submitted to OSTI for publication, maintained internally by E-Link"
42+
)
43+
date_submitted_to_osti_last: datetime = Field(
44+
description="most recent date record information was submitted to OSTI. Maintained internally by E-Link"
45+
)
46+
publication_date: datetime | None = Field(description="")
47+
48+
@model_validator(mode="before")
49+
def set_material_id(cls, values: dict[str, Any]):
50+
"""
51+
set_material_id will take the values passed into the model constructor before full instantiation of the object and pydantic parcing
52+
and make it that the whatever is passed in for the unique_site_id will match whatever is passed in for material_id
53+
54+
:cls to designate it as a class method
55+
:values are the values passed into the constructor (contain the "raw input")
56+
57+
returns the values so that instantiation can proceed.
58+
"""
59+
values["material_id"] = values["site_unique_id"]
60+
return values
61+
62+
63+
class MinimumDARecord(Record):
64+
product_type: str = Field(default="DA")
65+
title: str # Required
66+
organizations: List[Organization] = Field(
67+
default_factory=lambda: [
68+
Organization(type="RESEARCHING", name="LBNL Materials Project (LBNL-MP)"),
69+
Organization(
70+
type="SPONSOR",
71+
name="TEST SPONSOR ORG",
72+
identifiers=[{"type": "CN_DOE", "value": "AC02-05CH11231"}],
73+
), # sponsor org is necessary for submission
74+
]
75+
)
76+
persons: List[Person] = Field(
77+
default_factory=lambda: [Person(type="AUTHOR", last_name="Persson")]
78+
)
79+
site_ownership_code: str = Field(default="LBNL-MP")
80+
access_limitations: List[str] = Field(default_factory=lambda: ["UNL"])
81+
publication_date: datetime = Field(
82+
default_factory=lambda: datetime.now(tz=pytz.UTC)
83+
)
84+
site_url: str = Field(default="https://next-gen.materialsproject.org/materials")

0 commit comments

Comments
 (0)