1+ import os
12from datetime import date
23from typing import Optional , Union
34
4- import metapub
5+ import eutils
56from eutils import EutilsNCBIError
7+ from eutils ._internal .xmlfacades .pubmedarticle import PubmedArticle
8+ from eutils ._internal .xmlfacades .pubmedarticleset import PubmedArticleSet
69from sqlalchemy .orm import Session
710
811from mavedb .lib .exceptions import AmbiguousIdentifierError , NonexistentIdentifierError
@@ -43,20 +46,23 @@ class ExternalPublication:
4346 identifier : str
4447 title : str
4548 abstract : str
46- authors : list [dict [str , str ]]
49+ authors : list [dict [str , Union [ str , bool ] ]]
4750 publication_year : int
48- published_doi : Optional [str ]
49- preprint_doi : Optional [str ]
51+ publication_volume : Optional [str ]
52+ publication_pages : Optional [str ]
53+ publication_doi : Optional [str ]
5054 publication_journal : Optional [str ]
55+ preprint_doi : Optional [str ]
5156 preprint_date : Optional [date ]
5257 db_name : str
53- reference_html : str
58+
59+ _article_cit_fmt = "{author}. {title}. {journal}. {year}; {volume}:{pages}. {doi}"
5460
5561 def __init__ (
5662 self ,
5763 identifier : str ,
5864 db_name : str ,
59- external_publication : Union [RxivContentDetail , metapub . PubMedArticle ],
65+ external_publication : Union [RxivContentDetail , PubmedArticle ],
6066 ) -> None :
6167 """
6268 NOTE: We assume here that the first author in each of these author lists is the primary author
@@ -71,54 +77,46 @@ def __init__(
7177 self .db_name = db_name
7278 self .title = str (external_publication .title )
7379 self .abstract = str (external_publication .abstract )
74- self .authors = self ._generate_author_list (external_publication .author_list )
80+ self .authors = self ._generate_author_list (external_publication .authors )
7581
7682 # Non-shared fields
77- if isinstance (external_publication , metapub . PubMedArticle ):
83+ if isinstance (external_publication , PubmedArticle ):
7884 self .publication_year = int (external_publication .year )
79- self .publication_journal = str ( external_publication .journal )
80- self .published_doi = str ( external_publication .doi )
81- self .preprint_doi = None
82- self .preprint_date = None
85+ self .publication_journal = external_publication .jrnl
86+ self .publication_doi = external_publication .doi
87+ self .publication_volume = external_publication . volume
88+ self .publication_pages = external_publication . pages
8389 elif isinstance (external_publication , RxivContentDetail ):
8490 self .preprint_doi = external_publication .doi
8591 self .preprint_date = external_publication .date
86- self .publication_journal = None
8792
88- self .reference_html = str (external_publication .citation_html )
89-
90- def _generate_author_list (self , authors : Union [list [str ], list [metapub .PubMedAuthor ]]) -> list [dict [str , str ]]:
93+ def _generate_author_list (self , authors : list [str ]) -> list [dict [str , Union [str , bool ]]]:
9194 """
92- Generates a tuple of author names associated with this publication.
95+ Generates a list of author names and thier authorship level associated with this publication.
9396 """
94- if not authors :
95- return []
96-
97- if isinstance (authors [0 ], metapub .PubMedAuthor ):
98- created_authors = [
99- {"name" : ", " .join ([str (authors [0 ].last_name ), str (authors [0 ].fore_name )]), "primary" : True }
100- ]
97+ return [{"name" : author , "primary" : idx == 0 } for idx , author in enumerate (authors )]
98+
99+ def _format_authors (self ) -> str :
100+ """Helper function for returning a well formatted HTML author list"""
101+ if self .authors and len (self .authors ) > 2 :
102+ author = str (self .authors [0 ]["name" ]) + ", <i>et al</i>"
103+ elif self .authors and len (self .authors ) == 2 :
104+ author = " and " .join ([str (author ["name" ]) for author in self .authors ])
105+ elif self .authors and len (self .authors ) < 2 :
106+ author = str (self .authors [0 ]["name" ])
101107 else :
102- created_authors = [{"name" : authors [0 ], "primary" : True }]
103-
104- for author in authors [1 :]:
105- if isinstance (author , metapub .PubMedAuthor ):
106- created_authors .append (
107- {"name" : ", " .join ([str (author .last_name ), str (author .fore_name )]), "primary" : False }
108- )
109- else :
110- created_authors .append ({"name" : author , "primary" : False })
108+ author = ""
111109
112- return created_authors
110+ return author
113111
114112 @property
115113 def first_author (self ) -> str :
116- return self .authors [0 ]["name" ]
114+ return str ( self .authors [0 ]["name" ])
117115
118116 @property
119117 def secondary_authors (self ) -> list [str ]:
120118 if len (self .authors ) > 1 :
121- return [author ["name" ] for author in self .authors [1 :]]
119+ return [str ( author ["name" ]) for author in self .authors [1 :]]
122120 else :
123121 return []
124122
@@ -133,6 +131,35 @@ def url(self) -> str:
133131 else :
134132 return ""
135133
134+ @property
135+ def reference_html (self ) -> str :
136+ """
137+ Return a well formatted citation HTML string based on article data.
138+ Intends to return an identical citation html string to metapub.PubMedArticle.
139+ """
140+ author = self ._format_authors ()
141+
142+ if self .db_name in ["PubMed" ]:
143+ doi_str = "" if not self .publication_doi else self .publication_doi
144+ title = "(None)" if not self .title else self .title .strip ("." )
145+ journal = "(None)" if not self .publication_journal else self .publication_journal .strip ("." )
146+ year = "(Unknown year)" if not self .publication_year else self .publication_year
147+ volume = "(Unknown volume)" if not self .publication_volume else self .publication_volume
148+ pages = "(Unknown pages)" if not self .publication_pages else self .publication_pages
149+ else :
150+ doi_str = "" if not self .preprint_doi else self .preprint_doi
151+ title = "(None)" if not self .title else self .title .strip ("." )
152+ journal = "(None)" if not self .publication_journal else self .publication_journal .strip ("." )
153+ year = "(Unknown year)" if not self .preprint_date else self .preprint_date .year
154+
155+ # We don't receive these fields from rxiv platforms
156+ volume = "(Unknown volume)"
157+ pages = "(Unknown pages)"
158+
159+ return self ._article_cit_fmt .format (
160+ author = author , volume = volume , pages = pages , year = year , title = title , journal = journal , doi = doi_str
161+ )
162+
136163
137164async def find_or_create_doi_identifier (db : Session , identifier : str ):
138165 """
@@ -152,13 +179,18 @@ async def fetch_pubmed_article(identifier: str) -> Optional[ExternalPublication]
152179 """
153180 Fetch an existing PubMed article from NCBI
154181 """
155- fetch = metapub . PubMedFetcher ( )
182+ fetch = eutils . QueryService ( api_key = os . getenv ( "NCBI_API_KEY" ) )
156183 try :
157- article = fetch .article_by_pmid (pmid = identifier )
158- if article :
159- article = ExternalPublication (identifier = identifier , db_name = "PubMed" , external_publication = article )
184+ fetched_articles = list (PubmedArticleSet (fetch .efetch ({"db" : "pubmed" , "id" : identifier })))
185+ assert len (fetched_articles ) < 2
186+ article = ExternalPublication (identifier = identifier , db_name = "PubMed" , external_publication = fetched_articles [0 ])
187+
188+ except AssertionError as exc :
189+ raise AmbiguousIdentifierError (f"Fetched more than 1 PubMed article associated with PMID { identifier } " ) from exc
160190 except EutilsNCBIError :
161191 return None
192+ except IndexError :
193+ return None
162194 else :
163195 return article
164196
@@ -280,7 +312,7 @@ def create_generic_article(article: ExternalPublication) -> PublicationIdentifie
280312 title = article .title ,
281313 abstract = article .abstract ,
282314 authors = article .authors ,
283- publication_doi = article .published_doi ,
315+ publication_doi = article .publication_doi ,
284316 publication_year = article .publication_year ,
285317 publication_journal = article .publication_journal ,
286318 reference_html = article .reference_html ,
0 commit comments