Skip to content

Commit 4ccce17

Browse files
Fix issue with same tsv file being processed twice.
1 parent 2719132 commit 4ccce17

File tree

2 files changed

+84
-30
lines changed

2 files changed

+84
-30
lines changed

oxo2-dataload/oxo2-sssom2json/src/main/java/uk/ac/ebi/spot/oxo/sssom2json/SSSOM2JSON.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
package uk.ac.ebi.spot.oxo.sssom2json;
22

3-
import org.apache.commons.cli.*;
4-
import org.slf4j.Logger;
5-
import org.slf4j.LoggerFactory;
6-
import org.apache.commons.csv.CSVFormat;
7-
import org.apache.commons.csv.CSVPrinter;
3+
import static uk.ac.ebi.spot.oxo.sssom2json.parser.TSV2JSON.processDirectory;
84

95
import java.io.File;
106
import java.io.IOException;
11-
import java.io.BufferedWriter;
127
import java.nio.file.Files;
138
import java.nio.file.Path;
149
import java.nio.file.Paths;
15-
import java.util.*;
10+
import java.util.List;
11+
import java.util.Objects;
1612
import java.util.stream.Collectors;
1713
import java.util.stream.Stream;
1814

19-
import static uk.ac.ebi.spot.oxo.sssom2json.parser.TSV2JSON.processDirectory;
15+
import org.apache.commons.cli.CommandLine;
16+
import org.apache.commons.cli.CommandLineParser;
17+
import org.apache.commons.cli.DefaultParser;
18+
import org.apache.commons.cli.HelpFormatter;
19+
import org.apache.commons.cli.Option;
20+
import org.apache.commons.cli.Options;
21+
import org.apache.commons.cli.ParseException;
22+
import org.slf4j.Logger;
23+
import org.slf4j.LoggerFactory;
2024

2125
/**
2226
* @Todo:
@@ -113,7 +117,6 @@ public static void main(String[] args) throws IOException {
113117
}
114118

115119
private static void processMappingSets(String inputDirectory, String outputDirectory) throws IOException {
116-
Stream<Path> directoriesOfMappingSets = getDirectories(inputDirectory);
117120

118121
String mappingSetDirectory = outputDirectory + File.separator + "mappingSet";
119122
String mappingDirectory = outputDirectory + File.separator + "mapping";
@@ -126,7 +129,7 @@ private static void processMappingSets(String inputDirectory, String outputDirec
126129
throw new IOException("Error creating output directories", e);
127130
}
128131

129-
directoriesOfMappingSets.forEach(path -> processDirectory(path.toString(), mappingSetDirectory, mappingDirectory));
132+
processDirectory(inputDirectory, mappingSetDirectory, mappingDirectory);
130133

131134
long usedMemoryBytes = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
132135
double usedMemoryMB = usedMemoryBytes / (1024.0 * 1024.0);

oxo2-dataload/oxo2-sssom2json/src/main/java/uk/ac/ebi/spot/oxo/sssom2json/parser/TSV2JSON.java

Lines changed: 71 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,85 @@
11
package uk.ac.ebi.spot.oxo.sssom2json.parser;
22

3-
import com.fasterxml.jackson.annotation.JsonInclude;
4-
import com.fasterxml.jackson.core.StreamReadFeature;
5-
import com.fasterxml.jackson.databind.DeserializationFeature;
6-
import com.fasterxml.jackson.databind.ObjectMapper;
7-
import com.fasterxml.jackson.databind.SerializationFeature;
8-
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
9-
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
10-
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
11-
import org.apache.commons.csv.CSVFormat;
12-
import org.apache.commons.csv.CSVParser;
13-
import org.apache.commons.csv.CSVRecord;
14-
import org.slf4j.Logger;
15-
import org.slf4j.LoggerFactory;
16-
import uk.ac.ebi.spot.oxo.model.sssom.*;
17-
18-
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.*;
19-
20-
import uk.ac.ebi.spot.oxo.sssom2json.SSSOM2JSON;
21-
import uk.ac.ebi.spot.oxo.utils.StringUtils;
3+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.AUTHOR_ID;
4+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.AUTHOR_LABEL;
5+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.COMMENT;
6+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.CONFIDENCE;
7+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.CREATOR_ID;
8+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.CREATOR_LABEL;
9+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.CURATION_RULE;
10+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.ISSUE_TRACKER_ITEM;
11+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.LICENSE;
12+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_CARDINALITY;
13+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_DATE;
14+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_JUSTIFICATION;
15+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_PROVIDER;
16+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_SOURCE;
17+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_TOOL;
18+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MAPPING_TOOL_VERSION;
19+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.MATCH_STRING;
20+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_CATEGORY;
21+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_ID;
22+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_LABEL;
23+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_MATCH_FIELD;
24+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_PREPROCESSING;
25+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_SOURCE;
26+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_SOURCE_VERSION;
27+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OBJECT_TYPE;
28+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.OTHER;
29+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.PREDICATE_ID;
30+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.PREDICATE_LABEL;
31+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.PREDICATE_MODIFIER;
32+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.PUBLICATION_DATE;
33+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.REVIEWER_ID;
34+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.REVIEWER_LABEL;
35+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SEE_ALSO;
36+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SIMILARITY_MEASURE;
37+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SIMILARITY_SCORE;
38+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_CATEGORY;
39+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_ID;
40+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_LABEL;
41+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_MATCH_FIELD;
42+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_PREPROCESSING;
43+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_SOURCE;
44+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_SOURCE_VERSION;
45+
import static uk.ac.ebi.spot.oxo.model.sssom.MappingConstants.SUBJECT_TYPE;
2246

2347
import java.io.File;
2448
import java.io.FileInputStream;
2549
import java.io.IOException;
2650
import java.nio.file.Files;
2751
import java.nio.file.Path;
2852
import java.nio.file.Paths;
29-
import java.util.*;
53+
import java.util.ArrayList;
54+
import java.util.Collection;
55+
import java.util.HashMap;
56+
import java.util.List;
57+
import java.util.Map;
58+
import java.util.Optional;
59+
import java.util.Scanner;
60+
import java.util.SortedSet;
61+
import java.util.TreeSet;
3062
import java.util.stream.Collectors;
3163
import java.util.stream.Stream;
3264

65+
import org.apache.commons.csv.CSVFormat;
66+
import org.apache.commons.csv.CSVParser;
67+
import org.apache.commons.csv.CSVRecord;
68+
import org.slf4j.Logger;
69+
import org.slf4j.LoggerFactory;
70+
71+
import com.fasterxml.jackson.annotation.JsonInclude;
72+
import com.fasterxml.jackson.databind.DeserializationFeature;
73+
import com.fasterxml.jackson.databind.ObjectMapper;
74+
import com.fasterxml.jackson.databind.SerializationFeature;
75+
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
76+
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
77+
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
78+
79+
import uk.ac.ebi.spot.oxo.model.sssom.CurieMap;
80+
import uk.ac.ebi.spot.oxo.model.sssom.Mapping;
81+
import uk.ac.ebi.spot.oxo.model.sssom.MappingSet;
82+
3383
/**
3484
* A SSSOM TSV file contains 1 MappingSet object. See structure of TSV discussed
3585
* <a href="https://mapping-commons.github.io/sssom/spec-formats-tsv/#structure">here</a>.
@@ -49,6 +99,7 @@ public static void processDirectory(String directory, String mappingSetOutputDir
4999
.filter(Files::isRegularFile)
50100
.filter(path -> path.toString().endsWith(".tsv"))
51101
.forEach(path -> {
102+
logger.info("Processing file: {}", path);
52103
String filename = getFilenameWithoutExtension(path);
53104
Optional<MappingSet.Builder> externalMappingSetBuilderOptional = Optional.empty();
54105
if (filenameToExternalMetadataMap.containsKey(filename)) {

0 commit comments

Comments
 (0)