Skip to content

Commit 9bc5c5d

Browse files
authored
Add LOBID fetcher (JabRef#10135)
1 parent a7d3398 commit 9bc5c5d

File tree

6 files changed

+365
-0
lines changed

6 files changed

+365
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
1616
- We added support for customizing the citation command (e.g., `[@key1,@key2]`) when [pushing to external applications](https://docs.jabref.org/cite/pushtoapplications). [#10133](https://github.com/JabRef/jabref/issues/10133)
1717
- We added an integrity check for more special characters. [#8712](https://github.com/JabRef/jabref/issues/8712)
1818
- We added protected terms described as "Computer science". [#10222](https://github.com/JabRef/jabref/pull/10222)
19+
- We added a fetcher for [LOBID](https://lobid.org/resources/api) resources. [koppor#386](https://github.com/koppor/jabref/issues/386)
1920

2021
### Changed
2122

src/main/java/org/jabref/logic/importer/WebFetchers.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.jabref.logic.importer.fetcher.IEEE;
2929
import org.jabref.logic.importer.fetcher.INSPIREFetcher;
3030
import org.jabref.logic.importer.fetcher.IacrEprintFetcher;
31+
import org.jabref.logic.importer.fetcher.LOBIDFetcher;
3132
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
3233
import org.jabref.logic.importer.fetcher.MathSciNet;
3334
import org.jabref.logic.importer.fetcher.MedlineFetcher;
@@ -119,6 +120,7 @@ public static SortedSet<SearchBasedFetcher> getSearchBasedFetchers(ImportFormatP
119120
set.add(new SemanticScholar());
120121
set.add(new ResearchGate(importFormatPreferences));
121122
set.add(new BiodiversityLibrary(importerPreferences));
123+
set.add(new LOBIDFetcher(importerPreferences));
122124
return set;
123125
}
124126

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.io.BufferedReader;
4+
import java.io.InputStreamReader;
5+
import java.net.MalformedURLException;
6+
import java.net.URISyntaxException;
7+
import java.net.URL;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import java.util.Objects;
11+
import java.util.Optional;
12+
import java.util.stream.Collectors;
13+
import java.util.stream.IntStream;
14+
15+
import org.jabref.logic.importer.FetcherException;
16+
import org.jabref.logic.importer.ImporterPreferences;
17+
import org.jabref.logic.importer.PagedSearchBasedParserFetcher;
18+
import org.jabref.logic.importer.Parser;
19+
import org.jabref.logic.importer.fetcher.transformers.LOBIDQueryTransformer;
20+
import org.jabref.logic.util.OS;
21+
import org.jabref.model.entry.BibEntry;
22+
import org.jabref.model.entry.field.Field;
23+
import org.jabref.model.entry.field.StandardField;
24+
import org.jabref.model.entry.types.EntryType;
25+
import org.jabref.model.entry.types.StandardEntryType;
26+
27+
import kong.unirest.json.JSONArray;
28+
import kong.unirest.json.JSONObject;
29+
import org.apache.http.client.utils.URIBuilder;
30+
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
31+
import org.slf4j.Logger;
32+
import org.slf4j.LoggerFactory;
33+
34+
/**
35+
* Fetches data from the LOBID API
36+
*
37+
* @see <a href="https://lobid.org/resources/api">API documentation</a> for more details
38+
*/
39+
public class LOBIDFetcher implements PagedSearchBasedParserFetcher {
40+
41+
public static final String FETCHER_NAME = "LOBID";
42+
43+
private static final Logger LOGGER = LoggerFactory.getLogger(LOBIDFetcher.class);
44+
45+
private static final String API_URL = "https://lobid.org/resources/search";
46+
47+
private final ImporterPreferences importerPreferences;
48+
49+
public LOBIDFetcher(ImporterPreferences importerPreferences) {
50+
this.importerPreferences = importerPreferences;
51+
}
52+
53+
/**
54+
* Gets the query URL
55+
*
56+
* @param luceneQuery the search query
57+
* @param pageNumber the number of the page indexed from 0
58+
* @return URL
59+
*/
60+
@Override
61+
public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException {
62+
URIBuilder uriBuilder = new URIBuilder(API_URL);
63+
uriBuilder.addParameter("q", new LOBIDQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // search query
64+
uriBuilder.addParameter("from", String.valueOf(getPageSize() * pageNumber)); // from entry number, starts indexing at 0
65+
uriBuilder.addParameter("size", String.valueOf(getPageSize())); // page size
66+
uriBuilder.addParameter("format", "json"); // response format
67+
return uriBuilder.build().toURL();
68+
}
69+
70+
@Override
71+
public Parser getParser() {
72+
return inputStream -> {
73+
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
74+
JSONObject jsonObject = new JSONObject(response);
75+
76+
List<BibEntry> entries = new ArrayList<>();
77+
if (jsonObject.has("member")) {
78+
JSONArray results = jsonObject.getJSONArray("member");
79+
for (int i = 0; i < results.length(); i++) {
80+
JSONObject jsonEntry = results.getJSONObject(i);
81+
BibEntry entry = parseJSONtoBibtex(jsonEntry);
82+
entries.add(entry);
83+
}
84+
}
85+
86+
return entries;
87+
};
88+
}
89+
90+
private BibEntry parseJSONtoBibtex(JSONObject jsonEntry) {
91+
BibEntry entry = new BibEntry();
92+
Field nametype = StandardField.JOURNAL;
93+
EntryType entryType = StandardEntryType.InCollection;
94+
95+
// publication type
96+
JSONArray typeArray = jsonEntry.optJSONArray("type");
97+
String types = "";
98+
if (typeArray != null) {
99+
List<String> typeList = IntStream.range(0, typeArray.length())
100+
.mapToObj(typeArray::optString)
101+
.filter(type -> !type.isEmpty())
102+
.toList();
103+
types = String.join(", ", typeList);
104+
entry.setField(StandardField.TYPE, types);
105+
}
106+
107+
if (types.toLowerCase().contains("book")) {
108+
entryType = StandardEntryType.Book;
109+
nametype = StandardField.BOOKTITLE;
110+
} else if (types.toLowerCase().contains("article")) {
111+
entryType = StandardEntryType.Article;
112+
}
113+
entry.setType(entryType);
114+
115+
// isbn
116+
String isbn = getFirstArrayElement(jsonEntry, "isbn");
117+
entry.setField(StandardField.ISBN, isbn);
118+
119+
// parent resource
120+
String bibliographicCitation = jsonEntry.optString("bibliographicCitation", "");
121+
String[] bibSplit = bibliographicCitation.split("/");
122+
String parentResource = "";
123+
if (bibSplit.length > 0) {
124+
parentResource = bibSplit[0].trim();
125+
entry.setField(nametype, parentResource);
126+
}
127+
128+
entry.setField(StandardField.ISSN, getFirstArrayElement(jsonEntry, "issn"));
129+
entry.setField(StandardField.TITLE, jsonEntry.optString("title", ""));
130+
entry.setField(StandardField.ABSTRACT, getFirstArrayElement(jsonEntry, "note"));
131+
entry.setField(StandardField.TITLEADDON, getFirstArrayElement(jsonEntry, "otherTitleInformation"));
132+
entry.setField(StandardField.EDITION, getFirstArrayElement(jsonEntry, "edition"));
133+
134+
// authors
135+
JSONArray authors = jsonEntry.optJSONArray("contribution");
136+
if (authors != null) {
137+
List<String> authorNames = getAuthorNames(authors);
138+
if (!authors.isEmpty()) {
139+
entry.setField(StandardField.AUTHOR, String.join(" and ", authorNames));
140+
}
141+
}
142+
143+
// publication
144+
Optional.ofNullable(jsonEntry.optJSONArray("publication"))
145+
.map(array -> array.getJSONObject(0))
146+
.ifPresent(publication -> {
147+
entry.setField(StandardField.PUBLISHER, getFirstArrayElement(publication, "publishedBy"));
148+
entry.setField(StandardField.LOCATION, getFirstArrayElement(publication, "location"));
149+
String date = publication.optString("startDate");
150+
entry.setField(StandardField.DATE, date);
151+
entry.setField(StandardField.YEAR, date);
152+
});
153+
154+
// url
155+
JSONObject describedBy = jsonEntry.optJSONObject("describedBy");
156+
if (describedBy != null) {
157+
entry.setField(StandardField.URL, describedBy.optString("id"));
158+
}
159+
160+
// language
161+
JSONArray languageArray = jsonEntry.optJSONArray("language");
162+
if (languageArray != null) {
163+
List<String> languageList = IntStream.range(0, languageArray.length())
164+
.mapToObj(languageArray::getJSONObject)
165+
.filter(Objects::nonNull)
166+
.map(language -> language.optString("label"))
167+
.toList();
168+
entry.setField(StandardField.LANGUAGE, String.join(" and ", languageList));
169+
}
170+
171+
// keywords
172+
JSONArray keywordArray = jsonEntry.optJSONArray("subjectslabels");
173+
if (keywordArray != null) {
174+
List<String> keywordList = IntStream.range(0, keywordArray.length())
175+
.mapToObj(keywordArray::optString)
176+
.filter(keyword -> !keyword.isEmpty())
177+
.toList();
178+
entry.setField(StandardField.KEYWORDS, String.join(", ", keywordList));
179+
}
180+
181+
return entry;
182+
}
183+
184+
private static List<String> getAuthorNames(JSONArray authors) {
185+
return IntStream.range(0, authors.length())
186+
.mapToObj(authors::getJSONObject)
187+
.map(author -> author.optJSONObject("agent"))
188+
.filter(Objects::nonNull)
189+
.map(agent -> agent.optString("label"))
190+
.toList();
191+
}
192+
193+
private static String getFirstArrayElement(JSONObject jsonEntry, String key) {
194+
return Optional.ofNullable(jsonEntry.optJSONArray(key))
195+
.map(array -> array.getString(0))
196+
.orElse("");
197+
}
198+
199+
@Override
200+
public String getName() {
201+
return FETCHER_NAME;
202+
}
203+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package org.jabref.logic.importer.fetcher.transformers;
2+
3+
public class LOBIDQueryTransformer extends AbstractQueryTransformer {
4+
5+
@Override
6+
public String getLogicalAndOperator() {
7+
return " AND ";
8+
}
9+
10+
@Override
11+
public String getLogicalOrOperator() {
12+
return " OR ";
13+
}
14+
15+
@Override
16+
protected String getLogicalNotOperator() {
17+
return "-";
18+
}
19+
20+
@Override
21+
protected String handleAuthor(String author) {
22+
return createKeyValuePair("contribution.agent.label", author);
23+
}
24+
25+
@Override
26+
protected String handleTitle(String title) {
27+
return createKeyValuePair("title", title);
28+
}
29+
30+
@Override
31+
protected String handleJournal(String journalTitle) {
32+
return createKeyValuePair("bibliographicCitation", journalTitle);
33+
}
34+
35+
@Override
36+
protected String handleYear(String year) {
37+
return "publication.startDate:[" + year + " TO " + year + "]";
38+
}
39+
40+
@Override
41+
protected String handleYearRange(String yearRange) {
42+
parseYearRange(yearRange);
43+
if (endYear == Integer.MAX_VALUE) {
44+
return yearRange;
45+
}
46+
return "publication.startDate:[" + startYear + " TO " + endYear + "]";
47+
}
48+
}

src/test/java/org/jabref/gui/slr/ManageStudyDefinitionViewModelTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ public void emptyStudyConstructorFillsDatabasesCorrectly() {
4646
new StudyCatalogItem("GVK", false),
4747
new StudyCatalogItem("IEEEXplore", true),
4848
new StudyCatalogItem("INSPIRE", false),
49+
new StudyCatalogItem("LOBID", false),
4950
new StudyCatalogItem("MathSciNet", false),
5051
new StudyCatalogItem("Medline/PubMed", false),
5152
new StudyCatalogItem("ResearchGate", false),
@@ -86,6 +87,7 @@ public void studyConstructorFillsDatabasesCorrectly(@TempDir Path tempDir) {
8687
new StudyCatalogItem("GVK", false),
8788
new StudyCatalogItem("IEEEXplore", false),
8889
new StudyCatalogItem("INSPIRE", false),
90+
new StudyCatalogItem("LOBID", false),
8991
new StudyCatalogItem("MathSciNet", false),
9092
new StudyCatalogItem("Medline/PubMed", false),
9193
new StudyCatalogItem("ResearchGate", false),

0 commit comments

Comments
 (0)