Skip to content

Commit 6b4f64c

Browse files
vins01-4scienceAdamF42
authored andcommitted
Merged in task/dspace-cris-2023_02_x/DSC-2444 (pull request DSpace#4976)
Task/dspace cris 2023 02 x/DSC-2444 Approved-by: Fapohunda, Adamo
2 parents d03b317 + ffb6105 commit 6b4f64c

File tree

10 files changed

+2036
-2174
lines changed

10 files changed

+2036
-2174
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/**
2+
* The contents of this file are subject to the license and copyright
3+
* detailed in the LICENSE and NOTICE files at the root of the source
4+
* tree and available online at
5+
*
6+
* http://www.dspace.org/license/
7+
*/
8+
package org.dspace.importer.external.metadatamapping.contributor;
9+
10+
import java.util.ArrayList;
11+
import java.util.Collection;
12+
import java.util.Iterator;
13+
14+
import com.fasterxml.jackson.core.JsonProcessingException;
15+
import com.fasterxml.jackson.databind.JsonNode;
16+
import com.fasterxml.jackson.databind.ObjectMapper;
17+
import org.apache.commons.lang3.StringUtils;
18+
import org.apache.logging.log4j.LogManager;
19+
import org.apache.logging.log4j.Logger;
20+
21+
/**
22+
* This Processor extracts values from a JSON array, but only when a condition
23+
* on another attribute is met. For example, to extract all values of
24+
* /names/value where /names/types contains "ror_display".
25+
*
26+
* Configurable via:
27+
* pathToArray: e.g., /names
28+
* elementAttribute: e.g., /value
29+
* filterAttribute: e.g., /types
30+
* requiredValueInFilter: e.g., ror_display
31+
*
32+
* Supports filtering when the filter attribute is either a JSON array or a single string.
33+
*
34+
* Example JSON:
35+
* {
36+
* "items": [{
37+
* "names": [
38+
* { "types": ["label", "ror_display"], "value": "Instituto Federal do Piauí" },
39+
* { "types": ["acronym"], "value": "IFPI" }
40+
* ]
41+
* }]
42+
* }
43+
* This processor can extract "Instituto Federal do Piauí" using proper configuration.
44+
*
45+
* Author: Jesiel (based on Mykhaylo Boychuk’s original processor)
46+
*/
47+
public class ConditionalArrayElementAttributeProcessor implements JsonPathMetadataProcessor {
48+
49+
private static final Logger log = LogManager.getLogger();
50+
51+
private String pathToArray;
52+
private String elementAttribute;
53+
private String filterAttribute;
54+
private String requiredValueInFilter;
55+
56+
@Override
57+
public Collection<String> processMetadata(String json) {
58+
JsonNode rootNode = convertStringJsonToJsonNode(json);
59+
Collection<String> results = new ArrayList<>();
60+
61+
if (rootNode == null) {
62+
return results;
63+
}
64+
65+
Iterator<JsonNode> array = rootNode.at(pathToArray).iterator();
66+
while (array.hasNext()) {
67+
JsonNode element = array.next();
68+
JsonNode filterNode = element.at(filterAttribute);
69+
70+
boolean match = false;
71+
72+
if (filterNode.isArray()) {
73+
for (JsonNode filterValue : filterNode) {
74+
if (requiredValueInFilter.equalsIgnoreCase(filterValue.textValue())) {
75+
match = true;
76+
break;
77+
}
78+
}
79+
} else if (filterNode.isTextual()) {
80+
if (requiredValueInFilter.equalsIgnoreCase(filterNode.textValue())) {
81+
match = true;
82+
}
83+
}
84+
85+
if (match) {
86+
JsonNode valueNode = element.at(elementAttribute);
87+
if (valueNode.isTextual()) {
88+
results.add(valueNode.textValue());
89+
} else if (valueNode.isArray()) {
90+
for (JsonNode item : valueNode) {
91+
if (item.isTextual() && StringUtils.isNotBlank(item.textValue())) {
92+
results.add(item.textValue());
93+
}
94+
}
95+
}
96+
}
97+
}
98+
99+
return results;
100+
}
101+
102+
private JsonNode convertStringJsonToJsonNode(String json) {
103+
ObjectMapper mapper = new ObjectMapper();
104+
try {
105+
return mapper.readTree(json);
106+
} catch (JsonProcessingException e) {
107+
log.error("Unable to process JSON response.", e);
108+
return null;
109+
}
110+
}
111+
112+
public void setPathToArray(String pathToArray) {
113+
this.pathToArray = pathToArray;
114+
}
115+
116+
public void setElementAttribute(String elementAttribute) {
117+
this.elementAttribute = elementAttribute;
118+
}
119+
120+
public void setFilterAttribute(String filterAttribute) {
121+
this.filterAttribute = filterAttribute;
122+
}
123+
124+
public void setRequiredValueInFilter(String requiredValueInFilter) {
125+
this.requiredValueInFilter = requiredValueInFilter;
126+
}
127+
}

dspace-api/src/main/java/org/dspace/importer/external/ror/service/RorImportMetadataSourceServiceImpl.java

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
*/
88
package org.dspace.importer.external.ror.service;
99

10+
import static org.dspace.importer.external.liveimportclient.service.LiveImportClientImpl.HEADER_PARAMETERS;
11+
1012
import java.net.URISyntaxException;
1113
import java.util.ArrayList;
1214
import java.util.Collection;
@@ -15,8 +17,6 @@
1517
import java.util.List;
1618
import java.util.Map;
1719
import java.util.concurrent.Callable;
18-
import java.util.stream.Collectors;
19-
import javax.el.MethodNotFoundException;
2020

2121
import com.fasterxml.jackson.core.JsonProcessingException;
2222
import com.fasterxml.jackson.databind.JsonNode;
@@ -32,16 +32,25 @@
3232
import org.dspace.importer.external.exception.MetadataSourceException;
3333
import org.dspace.importer.external.liveimportclient.service.LiveImportClient;
3434
import org.dspace.importer.external.service.AbstractImportMetadataSourceService;
35+
import org.dspace.services.factory.DSpaceServicesFactory;
3536
import org.springframework.beans.factory.annotation.Autowired;
3637

38+
/**
39+
* Implements a {@code AbstractImportMetadataSourceService} for querying ROR services.
40+
*
41+
* @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com)
42+
*/
3743
public class RorImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<String>
3844
implements RorImportMetadataSourceService {
3945

4046
private final static Logger log = LogManager.getLogger();
47+
protected static final String ROR_IDENTIFIER_PREFIX = "https://ror.org/";
48+
protected static final String ROR_CLIENT_ID_HEADER = "Client-Id";
49+
protected static final String ROR_CLIENT_ID_PROP = "ror.client-id";
4150

4251
private String url;
4352

44-
private int timeout = 1000;
53+
private int timeout = 5000;
4554

4655
@Autowired
4756
private LiveImportClient liveImportClient;
@@ -69,7 +78,7 @@ public int getRecordsCount(Query query) throws MetadataSourceException {
6978

7079
@Override
7180
public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException {
72-
return retry(new SearchByQueryCallable(query, start));
81+
return retry(new SearchByQueryCallable(query));
7382
}
7483

7584
@Override
@@ -85,20 +94,20 @@ public ImportRecord getRecord(Query query) throws MetadataSourceException {
8594

8695
@Override
8796
public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException {
88-
throw new MethodNotFoundException("This method is not implemented for ROR");
97+
throw new UnsupportedOperationException("This method is not implemented for ROR");
8998
}
9099

91100
@Override
92101
public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException {
93-
throw new MethodNotFoundException("This method is not implemented for ROR");
102+
throw new UnsupportedOperationException("This method is not implemented for ROR");
94103
}
95104

96105
@Override
97106
public void init() throws Exception {
98107
}
99108

100109
/**
101-
* This class is a Callable implementation to get ADS entries based on query
110+
* This class is a Callable implementation to get ROR entries based on query
102111
* object. This Callable use as query value the string queryString passed to
103112
* constructor. If the object will be construct through Query.class instance, a
104113
* Query's map entry with key "query" will be used. Pagination is supported too,
@@ -110,10 +119,9 @@ private class SearchByQueryCallable implements Callable<List<ImportRecord>> {
110119

111120
private Query query;
112121

113-
private SearchByQueryCallable(String queryString, int start) {
122+
private SearchByQueryCallable(String queryString) {
114123
query = new Query();
115124
query.addParameter("query", queryString);
116-
query.addParameter("start", start);
117125
}
118126

119127
private SearchByQueryCallable(Query query) {
@@ -122,13 +130,12 @@ private SearchByQueryCallable(Query query) {
122130

123131
@Override
124132
public List<ImportRecord> call() throws Exception {
125-
return search(query.getParameterAsClass("query", String.class),
126-
query.getParameterAsClass("start", Integer.class));
133+
return search(query.getParameterAsClass("query", String.class));
127134
}
128135
}
129136

130137
/**
131-
* This class is a Callable implementation to get an ADS entry using bibcode The
138+
* This class is a Callable implementation to get an ROR entry using bibcode The
132139
* bibcode to use can be passed through the constructor as a String or as
133140
* Query's map entry, with the key "id".
134141
*
@@ -153,12 +160,12 @@ public List<ImportRecord> call() throws Exception {
153160
}
154161

155162
/**
156-
* This class is a Callable implementation to count the number of entries for an
157-
* ADS query. This Callable use as query value to ADS the string queryString
158-
* passed to constructor. If the object will be construct through Query.class
163+
* This class is a Callable implementation to count the number of entries for a
164+
* ROR query. This Callable uses as query value to ROR the string queryString
165+
* passed to constructor. If the object will be construct through {@code Query}
159166
* instance, the value of the Query's map with the key "query" will be used.
160-
*
161-
* @author Mykhaylo Boychuk (mykhaylo.boychuk@4science.com)
167+
*
168+
* @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com)
162169
*/
163170
private class CountByQueryCallable implements Callable<Integer> {
164171
private Query query;
@@ -178,9 +185,15 @@ public Integer call() throws Exception {
178185
}
179186
}
180187

188+
/**
189+
* Counts the number of results for the given query.
190+
*
191+
* @param query the query string to count results for
192+
* @return the number of results for the given query
193+
*/
181194
public Integer count(String query) {
182195
try {
183-
Map<String, Map<String, String>> params = new HashMap<String, Map<String, String>>();
196+
Map<String, Map<String, String>> params = getBaseParams();
184197

185198
URIBuilder uriBuilder = new URIBuilder(this.url);
186199
uriBuilder.addParameter("query", query);
@@ -199,43 +212,40 @@ public Integer count(String query) {
199212

200213
private List<ImportRecord> searchById(String id) {
201214

202-
List<ImportRecord> adsResults = new ArrayList<>();
215+
List<ImportRecord> importResults = new ArrayList<>();
203216

204-
id = StringUtils.removeStart(id, "https://ror.org/");
217+
id = StringUtils.removeStart(id, ROR_IDENTIFIER_PREFIX);
205218

206219
try {
207-
Map<String, Map<String, String>> params = new HashMap<String, Map<String, String>>();
220+
Map<String, Map<String, String>> params = getBaseParams();
208221

209222
URIBuilder uriBuilder = new URIBuilder(this.url + "/" + id);
210223

211224
String resp = liveImportClient.executeHttpGetRequest(timeout, uriBuilder.toString(), params);
212225
if (StringUtils.isEmpty(resp)) {
213-
return adsResults;
226+
return importResults;
214227
}
215228

216229
JsonNode jsonNode = convertStringJsonToJsonNode(resp);
217-
adsResults.add(transformSourceRecords(jsonNode.toString()));
230+
importResults.add(transformSourceRecords(jsonNode.toString()));
218231

219232
} catch (URISyntaxException e) {
220233
e.printStackTrace();
221234
}
222-
return adsResults;
235+
return importResults;
223236
}
224237

225-
private List<ImportRecord> search(String query, Integer start) {
226-
List<ImportRecord> adsResults = new ArrayList<>();
238+
private List<ImportRecord> search(String query) {
239+
List<ImportRecord> importResults = new ArrayList<>();
227240
try {
228-
Map<String, Map<String, String>> params = new HashMap<String, Map<String, String>>();
241+
Map<String, Map<String, String>> params = getBaseParams();
229242

230243
URIBuilder uriBuilder = new URIBuilder(this.url);
231244
uriBuilder.addParameter("query", query);
232-
if (start != null) {
233-
uriBuilder.addParameter("page", String.valueOf((start / 20) + 1));
234-
}
235245

236246
String resp = liveImportClient.executeHttpGetRequest(timeout, uriBuilder.toString(), params);
237247
if (StringUtils.isEmpty(resp)) {
238-
return adsResults;
248+
return importResults;
239249
}
240250

241251
JsonNode jsonNode = convertStringJsonToJsonNode(resp);
@@ -244,28 +254,25 @@ private List<ImportRecord> search(String query, Integer start) {
244254
Iterator<JsonNode> nodes = docs.elements();
245255
while (nodes.hasNext()) {
246256
JsonNode node = nodes.next();
247-
adsResults.add(transformSourceRecords(node.toString()));
257+
importResults.add(transformSourceRecords(node.toString()));
248258
}
249259
} else {
250-
adsResults.add(transformSourceRecords(docs.toString()));
260+
importResults.add(transformSourceRecords(docs.toString()));
251261
}
252262
} catch (URISyntaxException e) {
253263
e.printStackTrace();
254264
}
265+
return importResults;
266+
}
255267

256-
if (start == null) {
257-
return adsResults;
258-
}
259-
260-
if (start % 20 == 0) {
261-
return adsResults.stream()
262-
.limit(10)
263-
.collect(Collectors.toList());
264-
} else {
265-
return adsResults.stream()
266-
.skip(10)
267-
.collect(Collectors.toList());
268+
protected Map<String, Map<String, String>> getBaseParams() {
269+
Map<String, Map<String, String>> params = new HashMap<>();
270+
String rorClientId =
271+
DSpaceServicesFactory.getInstance().getConfigurationService().getProperty(ROR_CLIENT_ID_PROP);
272+
if (StringUtils.isNotEmpty(rorClientId)) {
273+
params.put(HEADER_PARAMETERS, Map.of(ROR_CLIENT_ID_HEADER, rorClientId));
268274
}
275+
return params;
269276
}
270277

271278
private JsonNode convertStringJsonToJsonNode(String json) {

0 commit comments

Comments
 (0)