Skip to content

Commit 877f0d0

Browse files
authored
Merge pull request #331 from semantic-systems/develop
add new updates to main from dev
2 parents 2c643c3 + 2054b6e commit 877f0d0

38 files changed

+1068
-218
lines changed

.env.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
PREFERRED_URL_SCHEME=http
22
SECRET_KEY=
33
IEEE_API_KEY=
4+
OPENCITATIONS_API_KEY=
5+
CORE_API_KEY=
46
CLIENT_ID_GOOGLE=
57
CLIENT_SECRET_GOOGLE=
68
CLIENT_ID_GITHUB=

config.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ class Config:
1313

1414
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
1515
IEEE_API_KEY = os.environ.get("IEEE_API_KEY", "")
16+
OPENCITATIONS_API_KEY= os.environ.get("OPENCITATIONS_API_KEY", "")
17+
CORE_API_KEY= os.environ.get("CORE_API_KEY", "")
1618

1719
REQUEST_HEADER_USER_AGENT = "nfdi4dsBot/1.0 (https://www.nfdi4datascience.de/nfdi4dsBot/; nfdi4dsBot@nfdi4datascience.de)"
1820
REQUEST_TIMEOUT = 100
@@ -225,6 +227,30 @@ class Config:
225227
"module": "dblp_venues",
226228
"search-endpoint": f"https://dblp.org/search/venue/api?format=json&h={NUMBER_OF_RECORDS_FOR_SEARCH_ENDPOINT}&q=",
227229
},
230+
"OpenCitations":{
231+
"logo": {
232+
"name": "OpenCitations",
233+
"link": "https://opencitations.net/",
234+
"src": "opencitations.png",
235+
"width": "w-100",
236+
"height": "h-100",
237+
},
238+
"module": "opencitations",
239+
"citations-endpoint": "https://opencitations.net/index/api/v2/citations/doi:",
240+
"get-publication-references-endpoint": "https://opencitations.net/index/api/v2/references/doi:",
241+
"metadata-endpoint": "https://opencitations.net/meta/api/v1/metadata/doi:"
242+
},
243+
"CORE":{
244+
"logo": {
245+
"name": "CORE",
246+
"link": "https://core.ac.uk/",
247+
"src": "core.png",
248+
"width": "w-100",
249+
"height": "h-100",
250+
},
251+
"module": "core",
252+
"search-endpoint": f"https://api.core.ac.uk/v3/search/works/?limit={NUMBER_OF_RECORDS_FOR_SEARCH_ENDPOINT}&q=",
253+
}
228254
# "dblp - Publications": {
229255
# "module": "dblp_publications",
230256
# "search-endpoint": f"https://dblp.org/search/publ/api?format=json&h={NUMBER_OF_RECORDS_FOR_SEARCH_ENDPOINT}&q=",

main.py

Lines changed: 212 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from config import Config
2020
from chatbot import chatbot
21+
from objects import Article
2122

2223
import utils
2324
from flask_limiter import Limiter
@@ -56,7 +57,8 @@
5657

5758
from typing import Optional
5859
from werkzeug.security import generate_password_hash, check_password_hash
59-
from dataclasses import dataclass, fields, field
60+
from pydantic.dataclasses import dataclass
61+
from dataclasses import fields, field
6062
from flask_login import UserMixin
6163
# ...
6264
@dataclass
@@ -218,7 +220,7 @@ def get_researcher_url(person, external=True) -> str:
218220
{{ person | get_researcher_url }}
219221
"""
220222

221-
if getattr(person, 'type', '').lower() != 'person':
223+
if getattr(person, 'additionalType', '').lower() != 'person':
222224
return ''
223225
if not getattr(person, 'identifier', None):
224226
return ''
@@ -683,6 +685,120 @@ def get_chatbot_answer():
683685
return answer
684686

685687

688+
@app.route('/publication-details/get-dois-references/<path:doi>', methods=['POST'])
689+
@limiter.limit("10 per minute")
690+
def get_publication_dois_references(doi):
691+
"""
692+
Endpoint to get a list of references for a given DOI.
693+
Uses the .get_dois_references() method from the modules.
694+
"""
695+
696+
# uses get_dois_references() from these sources:
697+
references_sources = {
698+
"CROSSREF - Publications": "crossref_publications",
699+
"OpenCitations": "opencitations",
700+
}
701+
702+
found_dois = set()
703+
704+
for source, module_name in references_sources.items():
705+
# request reference data from these endpoints
706+
dois = importlib.import_module(f'sources.{module_name}').get_dois_references(source=source, doi=doi)
707+
dois = [d.lower() for d in dois] # ensure DOIs are lowercase
708+
709+
print(f"found {len(dois)} DOIs in {source} for {doi}")
710+
711+
found_dois.update(dois)
712+
713+
return jsonify({
714+
'dois': list(found_dois)
715+
})
716+
717+
@app.route('/publication-details/get-dois-citations/<path:doi>', methods=['POST'])
718+
@limiter.limit("10 per minute")
719+
def get_publication_citations_dois(doi):
720+
"""
721+
Endpoint to get a list of citations for a given DOI.
722+
Uses the .get_dois_citations() method from the modules.
723+
"""
724+
725+
# uses get_dois_citations() from these sources:
726+
citation_sources = {
727+
"SEMANTIC SCHOLAR - Publications": "semanticscholar_publications",
728+
"OpenCitations": "opencitations",
729+
}
730+
731+
found_dois = set()
732+
733+
for source, module_name in citation_sources.items():
734+
# request citation data from these endpoints
735+
dois = importlib.import_module(f'sources.{module_name}').get_dois_citations(source=source, doi=doi)
736+
dois = [d.lower() for d in dois] # ensure DOIs are lowercase
737+
738+
print(f"found {len(dois)} DOIs in {source} for {doi}")
739+
740+
found_dois.update(dois)
741+
742+
return jsonify({
743+
'dois': list(found_dois)
744+
})
745+
746+
@app.route('/publication-details/get-metadata/', methods=['POST'])
747+
@limiter.limit("10 per minute")
748+
def get_publication_metadata():
749+
"""
750+
Endpoint to get metadata for a list of DOIs.
751+
Uses the .get_publication_metadata() method from the modules.
752+
"""
753+
754+
# add more metadata sources here
755+
# uses get_publication_metadata() from their modules
756+
metadata_sources = {
757+
"OpenCitations": "opencitations",
758+
}
759+
760+
dois = request.json.get('dois', [])
761+
print(f"Received {len(dois)} DOIs for metadata retrieval")
762+
763+
if not dois:
764+
return jsonify({"error": "No DOIs provided"}), 400
765+
766+
# collect articles keyed by DOI
767+
collected: dict[str, Article] = {}
768+
769+
for module_name in metadata_sources.values():
770+
articles = importlib.import_module(f'sources.{module_name}').get_batch_articles(dois=dois)
771+
772+
# get all lowercase titles and DOIs from the collected articles
773+
list_title = [article.name.lower() for article in collected.values()]
774+
list_doi = [article.identifier.lower() for article in collected.values()]
775+
776+
for article in articles:
777+
778+
# deduplicate and add to publication_list
779+
# check if the article title or DOI already exists
780+
if article.name.lower() not in list_title and article.identifier.lower() not in list_doi:
781+
782+
# article does not already exist, add it
783+
doi = article.identifier.lower()
784+
if doi and doi not in collected:
785+
collected[doi] = article
786+
787+
# create stub for every unresolved DOI
788+
for doi in dois:
789+
if doi not in collected:
790+
stub = Article(identifier=doi, partiallyLoaded=True) # an Article with only a DOI, set flag partiallyLoaded=True
791+
collected[doi.lower()] = stub
792+
793+
# serialize all Article objects to json
794+
payload = [
795+
art.model_dump(mode="python", exclude_none=True) for art in collected.values()
796+
]
797+
798+
return jsonify({
799+
'publications': payload
800+
})
801+
686802
@app.route('/publication-details/<string:source_name>/<string:source_id>/<string:doi>', methods=['GET'])
687803
@limiter.limit("10 per minute")
688804
@utils.timeit
@@ -737,23 +853,74 @@ def publication_details(source_name, source_id, doi):
737853

738854
return response
739855

740-
@app.route('/publication-details-references/<path:doi>', methods=['GET'])
856+
@app.route('/disabled/publication-details-references/<path:doi>', methods=['GET'])
741857
@utils.timeit
742858
def publication_details_references(doi):
743859
print("doi:", doi)
860+
744861
source = "CROSSREF - Publications"
745-
module_name = "crossref_publications"
746-
publication = importlib.import_module(f'sources.{module_name}').get_publication_references(source=source, doi=doi)
747-
response = make_response(render_template('partials/publication-details/references.html', publication=publication))
862+
module_name = "crossref_publications"
863+
864+
reference_sources = {
865+
"CROSSREF - Publications": "crossref_publications",
866+
"OpenCitations": "opencitations",
867+
}
868+
869+
references = []
870+
871+
# this will be the base article to which we will add references
872+
base_article = ''
873+
874+
for source, module_name in reference_sources.items():
875+
print(f"requesting references from {source} for DOI: {doi}")
876+
# request reference data from these endpoints
877+
article = importlib.import_module(f'sources.{module_name}').get_publication_references(source=source, doi=doi)
878+
879+
found_references = article.references if hasattr(article, 'references') else []
880+
881+
# add all references whose doi is not already in the references list
882+
doi_list = [ref.identifier for ref in references]
883+
name_list = [ref.name.lower() for ref in references]
884+
for ref in found_references:
885+
if ref.identifier not in doi_list and ref.name.lower() not in name_list:
886+
references.append(ref)
887+
888+
# change this to select another base article
889+
if source == "CROSSREF - Publications":
890+
base_article = article
891+
892+
# set all references to the base article
893+
base_article.references = references
894+
response = make_response(render_template('partials/publication-details/references.html', publication=base_article))
895+
748896
return response
749897

750898
@app.route('/publication-details-citations/<path:doi>', methods=['GET'])
751899
@utils.timeit
752900
def publication_details_citations(doi):
753-
print("for citations - DOI:", doi)
754-
source = "SEMANTIC SCHOLAR - Publications"
755-
module_name = "semanticscholar_publications"
756-
publications = importlib.import_module(f'sources.{module_name}').get_citations_for_publication(source=source, doi=doi)
901+
print("for citations - DOI:", doi)
902+
903+
# request citation data from these endpoints
904+
# source: module_name
905+
citation_sources = {
906+
"SEMANTIC SCHOLAR - Publications": "semanticscholar_publications",
907+
"OpenCitations": "opencitations",
908+
}
909+
910+
publications = []
911+
912+
for source, module_name in citation_sources.items():
913+
914+
found_publications = importlib.import_module(f'sources.{module_name}').get_citations_for_publication(source=source, doi=doi)
915+
916+
# add all publications whose doi is not already in the publications list
917+
doi_list = [pub.identifier for pub in publications]
918+
name_list = [pub.name.lower() for pub in publications]
919+
920+
for pub in found_publications:
921+
if pub.identifier not in doi_list and pub.name.lower() not in name_list:
922+
publications.append(pub)
923+
757924
response = make_response(render_template('partials/publication-details/citations.html', publications=publications))
758925
# print("response:", response)
759926
return response
@@ -769,6 +936,35 @@ def publication_details_recommendations(doi):
769936
# print("response:", response)
770937
return response
771938

939+
@app.get("/publication-details/citation/format")
940+
@limiter.limit("10 per minute")
941+
def get_citation():
942+
"""
943+
Get the citation string of a given DOI from the DOI Citation Formatter (https://citation.doi.org/).
944+
Query: ?doi=<doi>&style=<style> (lang is fixed to en-US)
945+
Returns: { doi, style, citation }
946+
"""
947+
948+
# get the parameters for the request
949+
doi = (request.args.get("doi") or "").strip().lower()
950+
style = (request.args.get("style") or "ieee").strip()
951+
952+
if not doi:
953+
return jsonify({"error": "missing doi"}), 400
954+
955+
try:
956+
# send the request to citation.doi.org
957+
r = requests.get(
958+
"https://citation.doi.org/format",
959+
params={"doi": doi, "style": style, "lang": "en-US"},
960+
headers={"Accept": "text/plain; charset=utf-8"},
961+
timeout=10,
962+
)
963+
r.raise_for_status()
964+
return jsonify({"doi": doi, "style": style, "citation": r.text.strip()}), 200
965+
except requests.RequestException as e:
966+
return jsonify({"error": "citation service failed", "detail": str(e)}), 502
967+
772968
@app.route('/researcher-details/<string:source_name>/<string:source_id>/<string:orcid>', methods=['GET'])
773969
@limiter.limit("10 per minute")
774970
@utils.timeit
@@ -814,7 +1010,7 @@ def researcher_details(source_name, source_id, orcid):
8141010

8151011
if (len(researchers) == 1): #forward the only publication record received from one of the sources
8161012
response = make_response(render_template('researcher-details.html', researcher=researchers[0]))
817-
session['researcher:'+orcid] = jsonify(researchers[0]).json
1013+
session['researcher:'+orcid] = researchers[0].model_dump(mode="python", exclude_none=True)
8181014
else:
8191015
#merge more than one researchers record into one researcher
8201016
merged_researcher = merge_objects(researchers, "researchers")
@@ -830,7 +1026,7 @@ def generate_researcher_about_me(orcid):
8301026
return jsonify(summary=f'{researcher_about_me}')
8311027

8321028
@app.route('/resource-details/<string:source_name>/<string:source_id>/<string:doi>', methods=['GET'])
833-
# @utils.handle_exceptions
1029+
@utils.handle_exceptions
8341030
def resource_details(source_name, source_id, doi):
8351031

8361032
source_name = unquote(source_name.split(':', 1)[1]) if ':' in source_name else unquote(source_name)
@@ -1158,18 +1354,18 @@ def get_preference_index(obj, field_name):
11581354
sources = set()
11591355

11601356
# iterate through the sorted objects and choose the first non-empty value for each field in the merged object
1161-
for field in fields(merged_object):
1357+
for field in type(merged_object).model_fields.keys():
11621358

11631359
# sort the objects by the current field
11641360
# if the field is not found, the objects are sorted with the __default__ list
1165-
sorted_objects = sorted(object_list, key=lambda obj: get_preference_index(obj, field.name))
1361+
sorted_objects = sorted(object_list, key=lambda obj: get_preference_index(obj, field))
11661362

11671363
# iterate through the sorted objects until one of them contains a non-empty value for the field
11681364
for obj in sorted_objects:
1169-
val = getattr(obj, field.name, None)
1365+
val = getattr(obj, field, None)
11701366

11711367
if val not in (None, "", [], {}): # check if the value is empty or a placeholder
1172-
setattr(merged_object, field.name, val)
1368+
setattr(merged_object, field, val)
11731369

11741370
# add all sources to the merged object
11751371
source_list = set(getattr(obj, 'source', []))

0 commit comments

Comments
 (0)