1313import org .jabref .logic .importer .IdBasedParserFetcher ;
1414import org .jabref .logic .importer .ParseException ;
1515import org .jabref .logic .importer .Parser ;
16+ import org .jabref .logic .importer .SearchBasedParserFetcher ;
17+ import org .jabref .logic .importer .fetcher .transformers .DefaultSearchQueryTransformer ;
1618import org .jabref .logic .importer .util .JsonReader ;
1719import org .jabref .model .entry .Author ;
1820import org .jabref .model .entry .AuthorList ;
2224import org .jabref .model .entry .field .UnknownField ;
2325import org .jabref .model .entry .types .EntryType ;
2426import org .jabref .model .entry .types .StandardEntryType ;
27+ import org .jabref .model .search .query .BaseQueryNode ;
2528
2629import kong .unirest .core .json .JSONArray ;
2730import kong .unirest .core .json .JSONException ;
2831import kong .unirest .core .json .JSONObject ;
32+ import org .apache .hc .core5 .net .URIBuilder ;
2933import org .slf4j .Logger ;
3034import org .slf4j .LoggerFactory ;
3135
32- public class EuropePmcFetcher implements IdBasedParserFetcher {
36+ public class EuropePmcFetcher implements IdBasedParserFetcher , SearchBasedParserFetcher {
3337 private static final Logger LOGGER = LoggerFactory .getLogger (EuropePmcFetcher .class );
3438
3539 @ Override
3640 public URL getUrlForIdentifier (String identifier ) throws URISyntaxException , MalformedURLException {
3741 return new URI ("https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=" + identifier + "&resultType=core&format=json" ).toURL ();
3842 }
3943
44+ @ Override
45+ public URL getURLForQuery (BaseQueryNode queryList ) throws URISyntaxException , MalformedURLException {
46+ DefaultSearchQueryTransformer transformer = new DefaultSearchQueryTransformer ();
47+ String query = transformer .transformSearchQuery (queryList ).orElse ("" );
48+ URIBuilder uriBuilder = new URIBuilder ("https://www.ebi.ac.uk/europepmc/webservices/rest/search" );
49+ // Europe PMC expects a Lucene-like query in the 'query' parameter
50+ uriBuilder .addParameter ("query" , query );
51+ uriBuilder .addParameter ("resultType" , "core" );
52+ uriBuilder .addParameter ("format" , "json" );
53+ return uriBuilder .build ().toURL ();
54+ }
55+
4056 @ Override
4157 public Parser getParser () {
4258 return inputStream -> {
@@ -54,40 +70,46 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
5470
5571 LOGGER .debug (result .toString (2 ));
5672
57- EntryType entryType = StandardEntryType .Article ;
58- if (result .has ("pubTypeList" )) {
59- for (Object o : result .getJSONObject ("pubTypeList" ).getJSONArray ("pubType" )) {
60- if ("letter" .equalsIgnoreCase (o .toString ())) {
61- entryType = StandardEntryType .Article ;
62- break ;
63- // TODO: handle other types e.g. books
64- }
65- }
66- }
73+ // Determine entry type from pubTypeList if available
74+ EntryType entryType = determineEntryType (result );
6775
6876 BibEntry entry = new BibEntry (entryType );
6977
70- entry .setField (StandardField .TITLE , result .optString ("title" ));
71- entry .setField (StandardField .ABSTRACT , result .optString ("abstractText" ));
78+ entry .withField (StandardField .TITLE , result .optString ("title" ))
79+ .withField (StandardField .ABSTRACT , result .optString ("abstractText" ))
80+ .withField (StandardField .YEAR , result .optString ("pubYear" ))
81+ .withField (StandardField .PAGES , result .optString ("pageInfo" ));
7282
73- entry . setField ( StandardField . YEAR , result .optString ("pubYear" ) );
74- entry .setField (StandardField .VOLUME , result . optString ( "journalVolume" ));
75- entry . setField (StandardField .ISSUE , result .optString ("journalIssue " ));
83+ String doi = result .optString ("doi" );
84+ entry .withField (StandardField .DOI , doi )
85+ . withField (StandardField .PMID , result .optString ("pmid " ));
7686
77- String pages = result .optString ("pageInfo" );
78- entry .setField (StandardField .PAGES , pages );
79-
80- entry .setField (StandardField .DOI , result .optString ("doi" ));
81- entry .setField (StandardField .PMID , result .optString ("pmid" ));
82-
83- // Handle URL
84- if (result .has ("pmid" )) {
85- entry .setField (StandardField .URL , "https://pubmed.ncbi.nlm.nih.gov/" + result .getString ("pmid" ) + "/" );
87+ // Prefer fulltext URLs (e.g., PDF) when available, otherwise fall back to DOI or PubMed page
88+ String bestUrl = extractBestFullTextUrl (result ).orElseGet (() -> {
89+ if (result .has ("pmid" )) {
90+ return "https://pubmed.ncbi.nlm.nih.gov/" + result .optString ("pmid" ) + "/" ;
91+ }
92+ if (doi != null && !doi .isBlank ()) {
93+ return "https://doi.org/" + doi ;
94+ }
95+ return null ;
96+ });
97+ if (bestUrl != null && !bestUrl .isBlank ()) {
98+ entry .setField (StandardField .URL , bestUrl );
8699 }
87100
88101 if (result .has ("journalInfo" ) && result .getJSONObject ("journalInfo" ).has ("issn" )) {
89102 entry .setField (StandardField .ISSN , result .getJSONObject ("journalInfo" ).getString ("issn" ));
90103 }
104+ // Prefer a full ISO date if provided
105+ final String datePattern = "\\ d{4}-\\ d{2}-\\ d{2}" ;
106+ String printPubDate = result .optString ("printPublicationDate" );
107+ String dateOfPublication = result .optString ("dateOfPublication" );
108+ if (printPubDate != null && printPubDate .matches (datePattern )) {
109+ entry .setField (StandardField .DATE , printPubDate );
110+ } else if (dateOfPublication != null && dateOfPublication .matches (datePattern )) {
111+ entry .setField (StandardField .DATE , dateOfPublication );
112+ }
91113
92114 // Handle authors
93115 if (result .has ("authorList" ) && result .getJSONObject ("authorList" ).has ("author" )) {
@@ -113,11 +135,41 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
113135 }
114136 }
115137
138+ if (result .has ("keywordList" ) && result .getJSONObject ("keywordList" ).has ("keyword" )) {
139+ JSONArray keywords = result .getJSONObject ("keywordList" ).getJSONArray ("keyword" );
140+ for (int i = 0 ; i < keywords .length (); i ++) {
141+ if (!keywords .isNull (i )) {
142+ String keyword = keywords .optString (i , "" ).trim ();
143+ if (!keyword .isEmpty ()) {
144+ entry .addKeyword (keyword , ',' );
145+ }
146+ }
147+ }
148+ }
149+ if (result .has ("meshHeadingList" ) && result .getJSONObject ("meshHeadingList" ).has ("meshHeading" )) {
150+ JSONArray mesh = result .getJSONObject ("meshHeadingList" ).getJSONArray ("meshHeading" );
151+ for (int i = 0 ; i < mesh .length (); i ++) {
152+ JSONObject meshHeading = mesh .optJSONObject (i );
153+ if (meshHeading != null ) {
154+ String descriptor = meshHeading .optString ("descriptorName" , "" ).trim ();
155+ if (!descriptor .isEmpty ()) {
156+ entry .addKeyword (descriptor , ',' );
157+ }
158+ } else if (!mesh .isNull (i )) {
159+ // Sometimes MeSH heading may be a plain string
160+ String meshPlain = mesh .optString (i , "" ).trim ();
161+ if (!meshPlain .isEmpty ()) {
162+ entry .addKeyword (meshPlain , ',' );
163+ }
164+ }
165+ }
166+ }
167+
116168 if (result .has ("pubModel" )) {
117- Optional .ofNullable (result .optString ("pubModel" )).ifPresent (pubModel -> entry .setField (StandardField .HOWPUBLISHED , pubModel ));
169+ Optional .ofNullable (result .optString ("pubModel" )).ifPresent (pubModel -> entry .withField (StandardField .HOWPUBLISHED , pubModel ));
118170 }
119171 if (result .has ("publicationStatus" )) {
120- Optional .ofNullable (result .optString ("publicationStatus" )).ifPresent (pubStatus -> entry .setField (StandardField .PUBSTATE , pubStatus ));
172+ Optional .ofNullable (result .optString ("publicationStatus" )).ifPresent (pubStatus -> entry .withField (StandardField .PUBSTATE , pubStatus ));
121173 }
122174
123175 if (result .has ("journalInfo" )) {
@@ -142,6 +194,78 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException {
142194 }
143195 }
144196
197+ private EntryType determineEntryType (JSONObject result ) {
198+ EntryType defaultType = StandardEntryType .Article ;
199+ if (!(result .has ("pubTypeList" ) && result .getJSONObject ("pubTypeList" ).has ("pubType" ))) {
200+ return defaultType ;
201+ }
202+ JSONArray pubTypes = result .getJSONObject ("pubTypeList" ).getJSONArray ("pubType" );
203+ List <String > types = new ArrayList <>();
204+ for (int i = 0 ; i < pubTypes .length (); i ++) {
205+ types .add (pubTypes .optString (i , "" ).toLowerCase ());
206+ }
207+ if (matchesAny (types , "book chapter" ) || matchesAny (types , "chapter" )) {
208+ return StandardEntryType .InCollection ;
209+ }
210+ if (matchesAny (types , "book" )) {
211+ return StandardEntryType .Book ;
212+ }
213+ if (matchesAny (types , "conference" ) || matchesAny (types , "proceedings" ) || matchesAny (types , "conference paper" ) || matchesAny (types , "proceedings paper" )) {
214+ return StandardEntryType .InProceedings ;
215+ }
216+ if (matchesAny (types , "phd" ) || matchesAny (types , "phd thesis" ) || matchesAny (types , "doctoral thesis" )) {
217+ return StandardEntryType .PhdThesis ;
218+ }
219+ if (matchesAny (types , "master" ) || matchesAny (types , "masters thesis" ) || matchesAny (types , "master's thesis" )) {
220+ return StandardEntryType .MastersThesis ;
221+ }
222+ // Letters, reviews, editorials are usually articles
223+ return defaultType ;
224+ }
225+
226+ // substring matches
227+ private boolean matchesAny (List <String > list , String searchString ) {
228+ return list .stream ().anyMatch (entry -> entry .contains (searchString ));
229+ }
230+
231+ private Optional <String > extractBestFullTextUrl (JSONObject result ) {
232+ try {
233+ if (!(result .has ("fullTextUrlList" ) && result .getJSONObject ("fullTextUrlList" ).has ("fullTextUrl" ))) {
234+ return Optional .empty ();
235+ }
236+ JSONArray urls = result .getJSONObject ("fullTextUrlList" ).getJSONArray ("fullTextUrl" );
237+ // First pass: prefer open/free PDF
238+ for (int i = 0 ; i < urls .length (); i ++) {
239+ JSONObject urlEntry = urls .getJSONObject (i );
240+ String style = urlEntry .optString ("documentStyle" , "" ).toLowerCase ();
241+ String availability = urlEntry .optString ("availability" , "" ).toLowerCase ();
242+ String url = urlEntry .optString ("url" , "" );
243+ if (url == null || url .isBlank ()) {
244+ continue ;
245+ }
246+ if ((availability .contains ("open" ) || availability .contains ("free" )) && style .contains ("pdf" )) {
247+ return Optional .of (url );
248+ }
249+ }
250+ // Second pass: any PDF
251+ for (int i = 0 ; i < urls .length (); i ++) {
252+ JSONObject urlEntry = urls .getJSONObject (i );
253+ String style = urlEntry .optString ("documentStyle" , "" ).toLowerCase ();
254+ String url = urlEntry .optString ("url" , "" );
255+ if (url == null || url .isBlank ()) {
256+ continue ;
257+ }
258+ if (style .contains ("pdf" )) {
259+ return Optional .of (url );
260+ }
261+ }
262+ return Optional .empty ();
263+ } catch (JSONException e ) {
264+ LOGGER .error ("Error parsing EuropePMC response for {}" , result , e );
265+ return Optional .empty ();
266+ }
267+ }
268+
145269 @ Override
146270 public void doPostCleanup (BibEntry entry ) {
147271 new FieldFormatterCleanup (StandardField .PAGES , new NormalizePagesFormatter ()).cleanup (entry );
0 commit comments