Skip to content

Commit 497a205

Browse files
authored
Merge pull request #12 from monarch-initiative/develop
Develop
2 parents 1754fb2 + 71c6168 commit 497a205

19 files changed

+941
-92
lines changed

docs/download.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# download
2+
3+
download hp.json and several otherfiles needed by hpotools applications to a directory
4+
called ``data`` that will be created if it does not already exist.

docs/index.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
# hpotools
22

3-
This is a library of small applications that the HPO project regularly uses for internal purposes. Currently, we are not supporting this library for external users, but anybody is welcome to use it at their own risk. :-0.
3+
hpotools is a collection of various HPO-based applications.
4+
5+
- [download](download.md)
6+
- [mondo](mondo.md): This application uses the mondo.json file to characterize collections of GA4GH phenopackets for use in ClintLR simulations.
7+
- [word](word.md)

docs/word.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# word
2+
3+
Production and output of an RTF file that contains a table with all the terms
4+
that emanate from a certain term in the HPO. The RTF file can be opened in Word and is useful for
5+
those who want to revise an entire section of the HPO conveniently.

pom.xml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<modelVersion>4.0.0</modelVersion>
55
<groupId>org.monarchinitiative.hpotools</groupId>
66
<artifactId>hpotools</artifactId>
7-
<version>0.0.7</version>
7+
<version>0.0.9</version>
88
<packaging>jar</packaging>
99
<name>hpotools</name>
1010

@@ -51,12 +51,12 @@
5151
<dependency>
5252
<groupId>info.picocli</groupId>
5353
<artifactId>picocli</artifactId>
54-
<version>4.7.5</version>
54+
<version>4.7.6</version>
5555
</dependency>
5656
<dependency>
5757
<groupId>org.phenopackets</groupId>
5858
<artifactId>phenopacket-schema</artifactId>
59-
<version>2.0.0</version>
59+
<version>2.0.2</version>
6060
</dependency>
6161
<dependency>
6262
<groupId>org.phenopackets.phenopackettools</groupId>
@@ -71,12 +71,12 @@
7171
<dependency>
7272
<groupId>org.monarchinitiative.biodownload</groupId>
7373
<artifactId>biodownload</artifactId>
74-
<version>1.0.1</version>
74+
<version>1.1.0</version>
7575
</dependency>
7676
<dependency>
7777
<groupId>org.apache.poi</groupId>
7878
<artifactId>poi-ooxml</artifactId>
79-
<version>5.2.2</version>
79+
<version>5.3.0</version>
8080
</dependency>
8181

8282
<dependency>
@@ -159,7 +159,7 @@
159159
<dependency>
160160
<groupId>org.junit.jupiter</groupId>
161161
<artifactId>junit-jupiter</artifactId>
162-
<version>5.9.0</version> <!-- Use the latest version available -->
162+
<version>5.11.4</version> <!-- Use the latest version available -->
163163
<scope>test</scope>
164164
</dependency>
165165
</dependencies>
@@ -176,7 +176,7 @@
176176
<plugin>
177177
<groupId>org.apache.maven.plugins</groupId>
178178
<artifactId>maven-compiler-plugin</artifactId>
179-
<version>3.8.0</version>
179+
<version>3.13.0</version>
180180
<configuration>
181181
<source>${java.version}</source>
182182
<target>${java.version}</target>
@@ -186,7 +186,7 @@
186186
<plugin>
187187
<groupId>org.apache.maven.plugins</groupId>
188188
<artifactId>maven-jar-plugin</artifactId>
189-
<version>3.2.0</version>
189+
<version>3.4.2</version>
190190
<configuration>
191191
<archive>
192192
<index>true</index>
@@ -202,12 +202,12 @@
202202
<plugin>
203203
<groupId>org.apache.maven.plugins</groupId>
204204
<artifactId>maven-install-plugin</artifactId>
205-
<version>3.0.0-M1</version>
205+
<version>3.1.3</version>
206206
</plugin>
207207
<plugin>
208208
<groupId>org.apache.maven.plugins</groupId>
209209
<artifactId>maven-deploy-plugin</artifactId>
210-
<version>3.0.0-M1</version>
210+
<version>3.1.3</version>
211211
</plugin>
212212
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
213213
<plugin>
@@ -218,12 +218,12 @@
218218
<plugin>
219219
<groupId>org.apache.maven.plugins</groupId>
220220
<artifactId>maven-surefire-plugin</artifactId>
221-
<version>3.0.0-M5</version> <!-- Use the latest version -->
221+
<version>3.5.2</version> <!-- Use the latest version -->
222222
</plugin>
223223
<plugin>
224224
<groupId>org.apache.maven.plugins</groupId>
225225
<artifactId>maven-shade-plugin</artifactId>
226-
<version>3.2.2</version>
226+
<version>3.6.0</version>
227227
<configuration>
228228
<createDependencyReducedPom>false</createDependencyReducedPom>
229229
</configuration>
@@ -251,7 +251,7 @@
251251
<plugin>
252252
<groupId>org.codehaus.mojo</groupId>
253253
<artifactId>versions-maven-plugin</artifactId>
254-
<version>2.16.2</version>
254+
<version>2.18.0</version>
255255
<configuration>
256256
<generateBackupPoms>false</generateBackupPoms>
257257
</configuration>

src/main/java/org/monarchinitiative/hpotools/Main.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ public static void main(String[] args) {
2323
.addSubcommand("mondo", new MondoCommand())
2424
.addSubcommand("onset", new OnsetCommand())
2525
.addSubcommand("simhpo", new SimHpoCommand())
26+
.addSubcommand("stats", new StatsCommand())
27+
.addSubcommand("translate", new DiseaseTranslateCommand())
28+
.addSubcommand("tsv", new Hpo2TsvCommand())
2629
.addSubcommand("word", new WordCommand())
2730
;
2831
cline.setToggleBooleanFlags(false);

src/main/java/org/monarchinitiative/hpotools/analysis/EncodingCheck.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import java.io.*;
88
import java.nio.charset.StandardCharsets;
9+
import java.util.Set;
910
import java.util.regex.Matcher;
1011
import java.util.regex.Pattern;
1112

@@ -16,6 +17,9 @@ public class EncodingCheck {
1617
private final Pattern HPO_TERM_PATTERN = Pattern.compile(pattern);
1718
private final File hpoOwlFile;
1819

20+
private final static Set<Character> ACCETABLE_CHARS = Set.of('ö','ü', 'ï', 'à', 'é');
21+
22+
private int badChars = 0;
1923

2024
/**
2125
* @param hpoOwlFile file for hp-edit.owl. Presumed checked by client code
@@ -27,9 +31,11 @@ public EncodingCheck(File hpoOwlFile) {
2731
public void checkEncoding() {
2832

2933
int i = 0;
34+
int encodingErrors = 0;
3035
try (BufferedReader br = new BufferedReader(new FileReader(hpoOwlFile))){
3136
String line;
3237
String hp_term = "";
38+
3339
while ((line = br.readLine()) != null) {
3440
Matcher m = HPO_TERM_PATTERN.matcher(line);
3541
if (m.find()) {
@@ -40,21 +46,33 @@ public void checkEncoding() {
4046
} catch (IOException e) {
4147
LOGGER.error("Could not read hpo owl file", e);
4248
}
49+
if (badChars > 0) {
50+
System.out.printf("Bad characters in HPO owl file: %d%n", badChars);
51+
} else {
52+
System.out.println("No bad encodings found in HPO owl file");
53+
}
4354
}
4455

45-
public static void checkLine(String line, int lineno, String previous) {
56+
public void checkLine(String line, int lineno, String previous) {
4657
byte[] bytes = line.getBytes(StandardCharsets.ISO_8859_1);
4758
String decodedLine = new String(bytes);
4859
if (! line.equals(decodedLine)) {
4960
for (int i=0; i< line.length();i++) {
5061
if (line.charAt(i) != decodedLine.charAt(i)) {
62+
Character unequalChar = line.charAt(i);
63+
if (ACCETABLE_CHARS.contains(unequalChar)) {
64+
continue;
65+
} else {
66+
badChars++;
67+
}
5168
System.out.println(previous);
5269
System.out.printf("L.%d:Pos:%d: ", lineno, i);
5370
int b = Math.max(0, i-20);
5471
int e = Math.min(line.length(), i+20);
5572
String ss1 = line.substring(b, i);
5673
String ss2 = line.substring(i+1, e);
57-
System.out.printf("%s{%c}%sc\n\n", ss1, line.charAt(i), ss2);
74+
System.out.printf("%s{%c}%sc\n\n", ss1, unequalChar, ss2);
75+
return ;
5876
}
5977
}
6078
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package org.monarchinitiative.hpotools.analysis;
2+
3+
import org.monarchinitiative.phenol.base.PhenolRuntimeException;
4+
import org.monarchinitiative.phenol.ontology.data.Ontology;
5+
import org.monarchinitiative.phenol.ontology.data.Term;
6+
import org.monarchinitiative.phenol.ontology.data.TermId;
7+
import org.monarchinitiative.phenol.ontology.data.TermSynonym;
8+
9+
import java.util.HashSet;
10+
import java.util.List;
11+
import java.util.Optional;
12+
import java.util.Set;
13+
14+
public class HpoStats {
15+
private final Ontology ontology;
16+
17+
private final String HP_PREFIX = "HP";
18+
19+
private final Set<Term> nonObsoleteTerms;
20+
21+
public HpoStats(Ontology ontology) {
22+
this.ontology = ontology;
23+
nonObsoleteTerms = new HashSet<>();
24+
for (TermId tid :ontology.nonObsoleteTermIds()) {
25+
Optional<Term> opt = ontology.termForTermId(tid);
26+
if (!opt.isPresent()) {
27+
throw new PhenolRuntimeException("Could not find term " + tid + " in ontology");
28+
}
29+
nonObsoleteTerms.add(opt.get());
30+
}
31+
}
32+
33+
private int countTermsWithDefinition() {
34+
int n_with_def = 0;
35+
for (Term term: nonObsoleteTerms) {
36+
if (!term.id().getPrefix().equals(HP_PREFIX))
37+
continue;
38+
String def = term.getDefinition();
39+
if (!def.isEmpty()) n_with_def++;
40+
}
41+
return n_with_def;
42+
}
43+
44+
private int countTermsWithSynonym() {
45+
int n_with_synonym = 0;
46+
for (Term term: nonObsoleteTerms) {
47+
if (!term.id().getPrefix().equals(HP_PREFIX))
48+
continue;
49+
List<TermSynonym> synonyms = term.getSynonyms();
50+
if (!synonyms.isEmpty()) n_with_synonym++;
51+
}
52+
return n_with_synonym;
53+
54+
}
55+
56+
public void printStats() {
57+
System.out.println(ontology);
58+
int n_non_obsolete = ontology.nonObsoleteTermIdCount();
59+
int n_with_def = countTermsWithDefinition();
60+
System.out.printf("Terms with definition: %d/%d (%.1f%%).\n",
61+
n_with_def, n_non_obsolete, (100.0*n_with_def/n_non_obsolete));
62+
int nWithSynonym = countTermsWithSynonym();
63+
System.out.printf("Terms with synonyms: %d/%d (%.1f%%).\n",
64+
nWithSynonym, n_non_obsolete, (100.0*nWithSynonym/n_non_obsolete));
65+
66+
}
67+
68+
69+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package org.monarchinitiative.hpotools.analysis.mondo;
2+
3+
import org.monarchinitiative.phenol.ontology.data.Dbxref;
4+
import org.monarchinitiative.phenol.ontology.data.Ontology;
5+
import org.monarchinitiative.phenol.ontology.data.Term;
6+
import org.monarchinitiative.phenol.ontology.data.TermId;
7+
8+
import java.util.*;
9+
10+
public class MondoFromOmimMapper {
11+
12+
13+
private final Ontology ontology;
14+
15+
private final Map<String, TermId> omimToMondoMap;
16+
17+
public MondoFromOmimMapper(Ontology mondo) {
18+
ontology = mondo;
19+
omimToMondoMap = fromOmimIds();
20+
}
21+
22+
23+
/**
24+
* @param mondoId Mondo Term
25+
26+
* @return Omim term id corresponding to the MONDO id (if possible)
27+
*/
28+
private Optional<TermId> getOmimIdIfPossible(TermId mondoId) {
29+
if (! ontology.containsTerm(mondoId)) {
30+
System.out.println("Obsolete");
31+
return Optional.empty();
32+
}
33+
Optional<Term> opt = ontology.termForTermId(mondoId);
34+
if (opt.isPresent()) {
35+
Term term = opt.get();
36+
Optional<Dbxref> dbxopt = term.getXrefs().stream()
37+
.filter(xr -> xr.getName().startsWith("OMIM:"))
38+
.findFirst();
39+
if (dbxopt.isPresent()) {
40+
Dbxref dbxref = dbxopt.get();
41+
String name = dbxref.getName();
42+
if (name.startsWith("OMIM:")) {
43+
return Optional.of(TermId.of(name));
44+
}
45+
}
46+
47+
}
48+
return Optional.empty();
49+
}
50+
51+
private Map<String, TermId> fromOmimIds() {
52+
53+
Map<String, TermId> omimToMondoMap = new HashMap<>();
54+
for (TermId mondoId : ontology.allTermIds()) {
55+
if (mondoId.getPrefix().equals("MONDO")) {
56+
Optional<TermId> opt = getOmimIdIfPossible(mondoId);
57+
opt.ifPresent(termId -> omimToMondoMap.put(termId.getValue(), mondoId));
58+
}
59+
60+
}
61+
return omimToMondoMap;
62+
}
63+
64+
65+
public List<String> getMappings(String geneSymbol, String omimId) {
66+
List<String> items = new ArrayList<>();
67+
if (omimToMondoMap.containsKey(omimId)) {
68+
TermId mondoId = omimToMondoMap.get(omimId);
69+
Optional<Term> opt = ontology.termForTermId(mondoId);
70+
if (opt.isPresent()) {
71+
Term term = opt.get();
72+
String label = term.getName();
73+
items.add(geneSymbol);
74+
items.add(omimId);
75+
items.add(mondoId.getValue());
76+
items.add(label);
77+
// TODO extract ORDO -- get Ordo if possible
78+
}
79+
} else {
80+
System.err.println("Warning could not find "+ omimId);
81+
}
82+
83+
return items;
84+
}
85+
}

0 commit comments

Comments
 (0)