Skip to content

Commit 5c78e2b

Browse files
author
Jorrit Poelen
committed
propagate provenance anchor into streaming review data context
1 parent 60ac48c commit 5c78e2b

File tree

5 files changed

+110
-51
lines changed

5 files changed

+110
-51
lines changed

src/main/java/org/globalbioticinteractions/elton/cmd/CmdReview.java

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.globalbioticinteractions.elton.cmd;
22

3-
import bio.guoda.preston.HashType;
43
import com.fasterxml.jackson.databind.JsonNode;
54
import com.fasterxml.jackson.databind.ObjectMapper;
65
import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -298,15 +297,38 @@ public static void logReviewHeader(PrintStream out) {
298297
logReviewComment(out, "reviewId", "reviewDate", "reviewer", "namespace", "reviewCommentType", "reviewComment", "archiveURI", "referenceUrl", "institutionCode", "collectionCode", "collectionId", "catalogNumber", "occurrenceId", "sourceCitation", "dataContext");
299298
}
300299

301-
private static void logWithContext(LogContext ctx, String msg, ReviewCommentType commentType, String reviewId, DateFactory dateFactory, String namespace, String reviewerName, PrintStream stdout) {
300+
301+
public static void log(LogContext ctx, String msg, ReviewCommentType commentType, ReviewReport report, PrintStream stdout) {
302+
if (report.getDesiredReviewCommentTypes().contains(commentType)) {
303+
if (ctx == null) {
304+
CmdReview.log(msg, report.getNamespace(), stdout, commentType.getLabel(), report.getReviewId(), report.getDateFactory(), report.getReviewerName());
305+
} else {
306+
logWithContext(ctx, msg, commentType, stdout, report);
307+
}
308+
}
309+
}
310+
311+
private static void logWithContext(LogContext ctx, String msg, ReviewCommentType commentType, PrintStream stdout, ReviewReport report) {
312+
String reviewId1 = report.getReviewId();
313+
DateFactory dateFactory1 = report.getDateFactory();
314+
String namespace = report.getNamespace();
315+
String reviewerName1 = report.getReviewerName();
302316
try {
303317
String contextString = ctx.toString();
304318
ObjectMapper mapper = new ObjectMapper();
305-
JsonNode dataContext = parseAndSortContext(contextString);
319+
final ObjectMapper objectMapper = new ObjectMapper();
320+
JsonNode dataContext1 = objectMapper.readTree(contextString);
321+
if (dataContext1 instanceof ObjectNode
322+
&& !dataContext1.has(DatasetConstant.ARCHIVE_URI)
323+
&& StringUtils.isNotBlank(report.getProvenanceAnchor())) {
324+
((ObjectNode) dataContext1).put(DatasetConstant.ARCHIVE_URI, report.getProvenanceAnchor());
325+
((ObjectNode) dataContext1).put("contentHash", report.getProvenanceAnchor());
326+
}
327+
JsonNode dataContext = sortIfPossible(dataContext1, objectMapper);
306328
ObjectNode review = mapper.createObjectNode();
307-
review.put("reviewId", reviewId);
308-
review.put("reviewDate", DateUtil.printDate(dateFactory.getDate()));
309-
review.put("reviewerName", reviewerName);
329+
review.put("reviewId", reviewId1);
330+
review.put("reviewDate", DateUtil.printDate(dateFactory1.getDate()));
331+
review.put("reviewerName", reviewerName1);
310332
review.put("reviewCommentType", commentType.getLabel());
311333
review.put("reviewComment", msg);
312334
review.put("namespace", namespace);
@@ -321,20 +343,9 @@ private static void logWithContext(LogContext ctx, String msg, ReviewCommentType
321343
String occurrenceId = getFindTermValueOrEmptyString(dataContext, SOURCE_OCCURRENCE_ID);
322344
String referenceUrl = getFindTermValueOrEmptyString(dataContext, "referenceUrl");
323345
String sourceCitation = getFindTermValueOrEmptyString(dataContext, DatasetImporterForTSV.STUDY_SOURCE_CITATION);
324-
logReviewCommentWithReviewerInfo(stdout, reviewId, dateFactory, reviewerName, namespace, commentType.getLabel(), msg, archiveURI, referenceUrl, institutionCode, collectionCode, collectionId, catalogNumber, occurrenceId, sourceCitation, reviewJsonString);
346+
logReviewCommentWithReviewerInfo(stdout, reviewId1, dateFactory1, reviewerName1, namespace, commentType.getLabel(), msg, archiveURI, referenceUrl, institutionCode, collectionCode, collectionId, catalogNumber, occurrenceId, sourceCitation, reviewJsonString);
325347
} catch (IOException e) {
326-
CmdReview.log(e.getMessage(), namespace, stdout, ReviewCommentType.note.getLabel(), reviewId, dateFactory, reviewerName);
327-
}
328-
}
329-
330-
331-
public static void log(LogContext ctx, String msg, ReviewCommentType commentType, ReviewReport report, PrintStream stdout) {
332-
if (report.getDesiredReviewCommentTypes().contains(commentType)) {
333-
if (ctx == null) {
334-
CmdReview.log(msg, report.getNamespace(), stdout, commentType.getLabel(), report.getReviewId(), report.getDateFactory(), report.getReviewerName());
335-
} else {
336-
logWithContext(ctx, msg, commentType, report.getReviewId(), report.getDateFactory(), report.getNamespace(), report.getReviewerName(), stdout);
337-
}
348+
CmdReview.log(e.getMessage(), namespace, stdout, ReviewCommentType.note.getLabel(), reviewId1, dateFactory1, reviewerName1);
338349
}
339350
}
340351

@@ -468,9 +479,7 @@ public void setReviewId(String reviewId) {
468479
}
469480

470481

471-
static JsonNode parseAndSortContext(String content) throws IOException {
472-
final ObjectMapper objectMapper = new ObjectMapper();
473-
JsonNode dataContext = objectMapper.readTree(content);
482+
static JsonNode sortIfPossible(JsonNode dataContext, ObjectMapper objectMapper) {
474483
return dataContext.isObject()
475484
? sortJsonObjByPropertyNames(objectMapper, dataContext)
476485
: dataContext;

src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ private boolean handleDataset(final Dataset datasetProvided, boolean shouldWrite
192192
recordType,
193193
datasetProvided.getNamespace(),
194194
Arrays.asList(ReviewCommentType.values()),
195-
getStdout()
195+
getStdout(),
196+
getProvenanceAnchor() == null ? null : getProvenanceAnchor().getIRIString()
196197
);
197198
try {
198199
Dataset datasetApplied = hasConfigOverride()
@@ -281,15 +282,18 @@ public static class ImportLoggerFactoryImpl implements ImportLoggerFactory {
281282
private final String namespace;
282283
private final List<ReviewCommentType> desiredReviewCommentTypes;
283284
private final PrintStream stdout;
285+
private String provenanceAnchor;
284286

285287
public ImportLoggerFactoryImpl(String recordType,
286288
String namespace,
287289
List<ReviewCommentType> desiredReviewCommentTypes,
288-
PrintStream stdout) {
290+
PrintStream stdout,
291+
String provenanceAnchor) {
289292
this.recordType = recordType;
290293
this.namespace = namespace;
291294
this.desiredReviewCommentTypes = desiredReviewCommentTypes;
292295
this.stdout = stdout;
296+
this.provenanceAnchor = provenanceAnchor;
293297
}
294298

295299
@Override
@@ -299,7 +303,7 @@ public ImportLogger createImportLogger() {
299303
logger = new NullImportLogger();
300304
} else if (StringUtils.equals("review", recordType)) {
301305
logger = new ReviewReportLogger(
302-
new ReviewReport(namespace, desiredReviewCommentTypes),
306+
new ReviewReport(namespace, desiredReviewCommentTypes, provenanceAnchor),
303307
stdout,
304308
null,
305309
new ProgressCursorFactory() {

src/main/java/org/globalbioticinteractions/elton/cmd/ReviewReport.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ public class ReviewReport {
1616
private final String reviewId;
1717
private final DateFactory dateFactory;
1818
private final String reviewerName;
19+
private final String provenanceAnchor;
1920

20-
ReviewReport(String namespace, List<ReviewCommentType> desiredReviewCommentTypes) {
21+
ReviewReport(String namespace,
22+
List<ReviewCommentType> desiredReviewCommentTypes,
23+
String provenanceAnchor) {
2124
this.infoCounter = new AtomicLong(0);
2225
this.noteCounter = new AtomicLong(0);
2326
this.interactionCounter = new AtomicLong(0);
@@ -27,9 +30,18 @@ public class ReviewReport {
2730
this.dateFactory = () -> new Date();
2831
this.reviewId = UUID.randomUUID().toString();
2932
this.reviewerName = CmdReview.REVIEWER_DEFAULT;
33+
this.provenanceAnchor = provenanceAnchor;
3034
}
3135

32-
ReviewReport(AtomicLong infoCounter, AtomicLong noteCounter, String namespace, List<ReviewCommentType> desiredReviewCommentTypes, AtomicLong lineCount, String reviewId, DateFactory dateFactory, String reviewerName, AtomicLong interactionCounter) {
36+
ReviewReport(AtomicLong infoCounter,
37+
AtomicLong noteCounter,
38+
String namespace,
39+
List<ReviewCommentType> desiredReviewCommentTypes,
40+
AtomicLong lineCount,
41+
String reviewId,
42+
DateFactory dateFactory,
43+
String reviewerName,
44+
AtomicLong interactionCounter) {
3345
this.infoCounter = infoCounter;
3446
this.noteCounter = noteCounter;
3547
this.interactionCounter = interactionCounter;
@@ -39,6 +51,7 @@ public class ReviewReport {
3951
this.dateFactory = dateFactory;
4052
this.reviewId = reviewId;
4153
this.reviewerName = reviewerName;
54+
this.provenanceAnchor = null;
4255
}
4356

4457
public AtomicLong getInfoCounter() {
@@ -78,4 +91,7 @@ public String getReviewerName() {
7891
}
7992

8093

94+
public String getProvenanceAnchor() {
95+
return provenanceAnchor;
96+
}
8197
}

src/test/java/org/globalbioticinteractions/elton/cmd/CmdReviewTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,9 @@ public void parseAndSortContext() throws IOException {
380380
review.put("reviewId", "some id");
381381
review.put("namespace", "some namespace");
382382
final String content = "{ \"zfoo\": \"bar\", \"foo\": \"bar\"}";
383-
final JsonNode dataContextSorted = CmdReview.parseAndSortContext(content);
383+
final ObjectMapper objectMapper = new ObjectMapper();
384+
JsonNode dataContext = objectMapper.readTree(content);
385+
final JsonNode dataContextSorted = CmdReview.sortIfPossible(dataContext, objectMapper);
384386

385387
review.set("context", dataContextSorted);
386388
String reviewJsonString = mapper.writeValueAsString(review.get("context"));

src/test/java/org/globalbioticinteractions/elton/cmd/CmdStreamTest.java

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import bio.guoda.preston.store.KeyTo3LevelPath;
88
import bio.guoda.preston.store.KeyValueStoreLocalFileSystem;
99
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
10+
import com.fasterxml.jackson.databind.JsonNode;
1011
import com.fasterxml.jackson.databind.ObjectMapper;
1112
import com.fasterxml.jackson.databind.node.ObjectNode;
1213
import org.apache.commons.io.FileUtils;
@@ -138,7 +139,7 @@ public void streamSomeProvStatements() throws IOException {
138139

139140
CmdStream cmdStream = new CmdStream();
140141

141-
Collection<File> filesAfter = getFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip");
142+
Collection<File> filesAfter = runStreamAndGetFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip");
142143

143144
List<String> filenames = filesAfter.stream().map(File::getName).collect(Collectors.toList());
144145

@@ -148,8 +149,48 @@ public void streamSomeProvStatements() throws IOException {
148149
}
149150

150151
@Test
151-
public void streamSomeProvStatementsAnchored() throws IOException {
152+
public void streamSomeProvStatementsAnchoredForInteractionRecordType() throws IOException {
153+
ByteArrayOutputStream outputStream = streamData("interaction", "hash://md5/d3b07384d113edec49eaa6238ad5ff00");
154+
String columnNames = headerInteractions();
155+
assertHeaderAndMore(outputStream, columnNames);
156+
List<String> split = Arrays.asList(StringUtils.split(columnNames, "\t"));
157+
int indexContentHash = split.indexOf("contentHash");
158+
159+
assertThat(indexContentHash, Is.is(51));
160+
161+
String interactions = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);
162+
163+
String[] lines = interactions.split("\n");
164+
165+
assertThat(StringUtils.splitByWholeSeparatorPreserveAllTokens(lines[1], "\t")[indexContentHash], Is.is("hash://md5/d3b07384d113edec49eaa6238ad5ff00"));
166+
assertThat(lines[1], containsString("hash://md5/d3b07384d113edec49eaa6238ad5ff00"));
167+
}
168+
169+
@Test
170+
public void streamSomeProvStatementsAnchoredForReviewRecordType() throws IOException {
171+
String provenanceAnchor = "hash://md5/d3b07384d113edec49eaa6238ad5ff00";
172+
ByteArrayOutputStream outputStream = streamData("review", provenanceAnchor);
173+
String columnNames = headerReviewNotes();
174+
assertHeaderAndMore(outputStream, columnNames);
175+
List<String> split = Arrays.asList(StringUtils.split(columnNames, "\t"));
176+
int indexContentHash = split.indexOf("dataContext");
177+
178+
assertThat(indexContentHash, Is.is(14));
179+
180+
String interactions = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);
181+
182+
String[] lines = interactions.split("\n");
183+
184+
String dataContext = StringUtils.splitByWholeSeparatorPreserveAllTokens(lines[1], "\t")[indexContentHash];
185+
186+
JsonNode jsonNode = new ObjectMapper().readTree(dataContext);
187+
assertThat(jsonNode.at("/reviewComment").asText(), Is.is("biotic interaction found"));
188+
assertThat(jsonNode.at("/context/contentHash").asText(), Is.is(provenanceAnchor));
189+
190+
}
191+
152192

193+
private ByteArrayOutputStream streamData(String recordType, String provenanceAnchor) throws IOException {
153194
String provLogGeneratedByElton = "<urn:lsid:globalbioticinteractions.org:globalbioticinteractions/template-dataset> <http://www.w3.org/ns/prov#wasAssociatedWith> <https://github.com/globalbioticinteractions/template-dataset/archive/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip> <urn:uuid:16b63a6d-153b-4f16-afed-a67fa09383a7> .\n" +
154195
"<https://github.com/globalbioticinteractions/template-dataset/archive/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip> <http://purl.org/dc/elements/1.1/format> \"application/globi\" <urn:uuid:16b63a6d-153b-4f16-afed-a67fa09383a7> .\n" +
155196
"<urn:uuid:41389744-0f4d-47e2-8506-76999e1b5c34> <http://www.w3.org/ns/prov#used> <https://github.com/globalbioticinteractions/template-dataset/archive/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip> <urn:uuid:16b63a6d-153b-4f16-afed-a67fa09383a7> .\n" +
@@ -163,28 +204,15 @@ public void streamSomeProvStatementsAnchored() throws IOException {
163204
ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
164205

165206
CmdStream cmdStream = new CmdStream();
166-
cmdStream.setProvenanceAnchor(RefNodeFactory.toIRI("hash://md5/d3b07384d113edec49eaa6238ad5ff00"));
207+
cmdStream.setProvenanceAnchor(RefNodeFactory.toIRI(provenanceAnchor));
208+
cmdStream.setRecordType(recordType);
167209

168-
Collection<File> filesAfter = getFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip");
210+
Collection<File> filesAfter = runStreamAndGetFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147.zip");
169211

170212
List<String> filenames = filesAfter.stream().map(File::getName).collect(Collectors.toList());
171213

172214
assertThat(filenames, hasItems("76c00c8b64e422800b85d29db93bcfa9ebee999f52f21e16cbd00ba750e98b44"));
173-
174-
String columnNames = headerInteractions();
175-
assertHeaderAndMore(outputStream, columnNames);
176-
List<String> split = Arrays.asList(StringUtils.split(columnNames, "\t"));
177-
int indexContentHash = split.indexOf("contentHash");
178-
179-
assertThat(indexContentHash, Is.is(51));
180-
181-
String interactions = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);
182-
183-
String[] lines = interactions.split("\n");
184-
185-
assertThat(StringUtils.splitByWholeSeparatorPreserveAllTokens(lines[1], "\t")[indexContentHash], Is.is("hash://md5/d3b07384d113edec49eaa6238ad5ff00"));
186-
assertThat(lines[1], containsString("hash://md5/d3b07384d113edec49eaa6238ad5ff00"));
187-
215+
return outputStream;
188216
}
189217

190218
@Test
@@ -204,7 +232,7 @@ public void streamSomeProvStatementsEmbeddedGzip() throws IOException {
204232

205233
CmdStream cmdStream = new CmdStream();
206234

207-
Collection<File> filesAfter = getFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147_gzip.zip");
235+
Collection<File> filesAfter = runStreamAndGetFiles(provLogGeneratedByElton, outputStream, errorStream, cmdStream, "/b92cd44dcba945c760229a14d3b9becb2dd0c147_gzip.zip");
208236

209237
List<String> filenames = filesAfter.stream().map(File::getName).collect(Collectors.toList());
210238

@@ -213,7 +241,7 @@ public void streamSomeProvStatementsEmbeddedGzip() throws IOException {
213241
assertHeaderAndMore(outputStream, headerInteractions());
214242
}
215243

216-
private Collection<File> getFiles(String provLogGeneratedByElton, ByteArrayOutputStream outputStream, ByteArrayOutputStream errorStream, CmdStream cmdStream, String resource) throws IOException {
244+
private Collection<File> runStreamAndGetFiles(String provLogGeneratedByElton, ByteArrayOutputStream outputStream, ByteArrayOutputStream errorStream, CmdStream cmdStream, String resource) throws IOException {
217245
File tmpDir = folder.newFolder("tmpDir");
218246
tmpDir.mkdirs();
219247

@@ -471,7 +499,7 @@ public void streamSomeProvStatementsTemplateDataset() throws IOException {
471499

472500
CmdStream cmdStream = new CmdStream();
473501

474-
Collection<File> filesAfter = getFiles(provLogGeneratedByEltonTrack, outputStream, errorStream, cmdStream, "/template-dataset-0.0.3.zip");
502+
Collection<File> filesAfter = runStreamAndGetFiles(provLogGeneratedByEltonTrack, outputStream, errorStream, cmdStream, "/template-dataset-0.0.3.zip");
475503

476504
List<String> filenames = filesAfter.stream().map(File::getName).collect(Collectors.toList());
477505

@@ -551,7 +579,7 @@ public void streamProvStatementsGeneratedByEltonProv() throws IOException {
551579

552580
CmdStream cmdStream = new CmdStream();
553581

554-
Collection<File> filesAfter = getFiles(provLogGeneratedByEltonTrack, outputStream, errorStream, cmdStream, "/template-dataset-0.0.3.zip");
582+
Collection<File> filesAfter = runStreamAndGetFiles(provLogGeneratedByEltonTrack, outputStream, errorStream, cmdStream, "/template-dataset-0.0.3.zip");
555583

556584
List<String> filenames = filesAfter.stream().map(File::getName).collect(Collectors.toList());
557585

0 commit comments

Comments
 (0)