Skip to content

Commit 32dd315

Browse files
committed
simulate PPKTs
1 parent 939988e commit 32dd315

File tree

8 files changed

+81
-112
lines changed

8 files changed

+81
-112
lines changed

pom.xml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<modelVersion>4.0.0</modelVersion>
55
<groupId>org.monarchinitiative.hpotools</groupId>
66
<artifactId>hpotools</artifactId>
7-
<version>0.1.3</version>
7+
<version>0.1.4</version>
88
<packaging>jar</packaging>
99
<name>hpotools</name>
1010

@@ -167,7 +167,11 @@
167167
<artifactId>commons-math3</artifactId>
168168
<version>3.6.1</version>
169169
</dependency>
170-
170+
<dependency>
171+
<groupId>com.google.protobuf</groupId>
172+
<artifactId>protobuf-java-util</artifactId>
173+
<version>3.21.12</version> <!-- or compatible version -->
174+
</dependency>
171175
</dependencies>
172176

173177
<build>

src/main/java/org/monarchinitiative/hpotools/Main.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ public static void main(String[] args) {
2424
.addSubcommand("maxo", new MaxoCommand())
2525
.addSubcommand("mondo", new MondoCommand())
2626
.addSubcommand("onset", new OnsetCommand())
27-
.addSubcommand("simhpo", new SimHpoCommand())
27+
.addSubcommand("sim", new SimHpoCommand())
2828
.addSubcommand("stats", new StatsCommand())
29-
.addSubcommand("translate", new DiseaseTranslateCommand())
3029
.addSubcommand("tsv", new Hpo2TsvCommand())
3130
.addSubcommand("word", new WordCommand())
3231
;

src/main/java/org/monarchinitiative/hpotools/analysis/simhpo/SimulatedHpoDiseaseGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@ public Optional<Phenopacket > generateSimulatedPhenopacket(TermId omimId, int nT
6868
// choose a random onset from the range
6969
int start = onsetRange.start().days();
7070
int end = onsetRange.end().days();
71-
onset = random.nextInt(start, end + 1);
71+
onset = (int)(start+end)/2;
7272
} else {
7373
LOGGER.debug("No onset information available for disease {}", omimId.getValue());
7474
}
7575

7676
// Add some age to the phenopacket by adding a few years to the onset
77-
if (onset != 0) {
77+
if (onset > 0) {
7878
age = onset + random.nextInt(0, 10 * 365); // add up to 10 years
7979
}
8080

src/main/java/org/monarchinitiative/hpotools/cmd/DiseaseTranslateCommand.java

Lines changed: 0 additions & 68 deletions
This file was deleted.

src/main/java/org/monarchinitiative/hpotools/cmd/DownloadCommand.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212

1313

1414
/**
15-
* Implementation of download in HpoWorkbench. The command is intended to download
16-
* both the OBO file and the association file. For HPO, this is {@code hp.obo} and
17-
* {@code phenotype_annotation.tab}.
18-
* Code modified from Download command in Jannovar.
15+
* Implementation of download in HpoTools. The command is intended to download
16+
* both the JSON file and the association file. For HPO, this is {@code hp.json} and
17+
* {@code phenotype.hpoa}.
1918
* @author <a href="mailto:manuel.holtgrewe@charite.de">Manuel Holtgrewe</a>
2019
* @author <a href="mailto:peter.robinson@jax.org">Peter Robinson</a>
2120
* @version 0.0.1 (May 10, 2017)

src/main/java/org/monarchinitiative/hpotools/cmd/Hpo2TsvCommand.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
import java.util.concurrent.Callable;
1414

1515

16+
/**
17+
* Create TSV file with a target term and all of its descendants.
18+
*/
1619
@CommandLine.Command(name = "word",
1720
mixinStandardHelpOptions = true,
1821
description = "Output subontology as word file (experimental)")
@@ -35,14 +38,10 @@ public Integer call() {
3538
TermId hpoId = TermId.of(startTermId);
3639
Optional<Term> opt = hpOntology.termForTermId(hpoId);
3740
if (opt.isEmpty()) {
38-
System.err.printf("[ERROR] No HPO term found for %s.\n", startTermId);
39-
}
40-
if (opt.isEmpty()) {
41-
LOGGER.error("[ERROR] No term found for {}.", startTermId);
41+
LOGGER.error("[ERROR] No HPO term found for {}.", startTermId);
4242
return 1;
4343
}
4444
Term targetTerm = opt.get();
45-
4645
String name = targetTerm.getName().replaceAll(" ", "_");
4746
String id = targetTerm.id().getValue().replaceAll(":", "_");
4847
String outfilename = String.format("%s_%s.tsv", name, id);

src/main/java/org/monarchinitiative/hpotools/cmd/HpoDistCommand.java

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
import java.util.*;
1414
import java.util.concurrent.Callable;
1515

16-
16+
/**
17+
* Test the distribution of terms in the various top-level HPO categories. Are the categories with
18+
* significantly more terms than we would expect by chance?
19+
*/
1720
@CommandLine.Command(name = "dist",
1821
mixinStandardHelpOptions = true,
1922
description = "Output subontology as word file (experimental)")
@@ -33,12 +36,6 @@ public Integer call() {
3336
double probability = ExactMultinomial.exactMultinomialTest(orderedObservedCounts, expectedProportions);
3437
System.out.println("Exact multinomial p value " + probability);
3538
outputProprotions(topLevelHpoTermCounts, orderedObservedCounts, expectedProportions, topLevelHpoLabels);
36-
/*
37-
ChiSquareTest chiSquareTest = new ChiSquareTest();
38-
double pValue = chiSquareTest.chiSquareTest(expected, observedCounts);
39-
double chiSquareStatistic = chiSquareTest.chiSquare(expected, observedCounts);
40-
*/
41-
4239
return 0;
4340
}
4441

@@ -74,7 +71,7 @@ private void outputProprotions(List<Map.Entry<TermId, Integer>> topLevelHpoTermC
7471
* Create an array with observed counts and arrange it in the same order
7572
* @param topLevelHpoTermCounts List with top-level HPO terms in order
7673
* @param observedHpoCounts map with counts observed in an experiment
77-
* @return
74+
* @return A list of observed counts
7875
*/
7976
private int[] getOrderedObservedCounts(List<Map.Entry<TermId, Integer>> topLevelHpoTermCounts, Map<TermId, Integer> observedHpoCounts) {
8077
int[] observed_counts = new int[topLevelHpoTermCounts.size()];
@@ -108,7 +105,7 @@ private double[] getExpectedProportions(List<Map.Entry<TermId, Integer>> topLeve
108105

109106
/**
110107
* Get a list of the top level terms, ordered by total number of children
111-
* @param hpo
108+
* @param hpo the Ontology
112109
* @return A list of Map Entries, with key the TermId and value the count
113110
*/
114111
private List<Map.Entry<TermId, Integer>> getTopLevelTermList(Ontology hpo) {
@@ -154,21 +151,21 @@ private Map<TermId, Integer> observedHpoCounts() {
154151
// Abnormality of the musculoskeletal system
155152
hpoCounts.put(TermId.of("HP:0033127"), 38);
156153
// Abnormality of the nervous system
157-
hpoCounts.put(TermId.of("HP:0000707"), 37);
154+
hpoCounts.put(TermId.of("HP:0000707"), 40);
158155
//Abnormality of limbs
159156
hpoCounts.put(TermId.of("HP:0040064"), 19);
160157
//Abnormality of the cardiovascular system
161158
hpoCounts.put(TermId.of("HP:0001626"), 11);
162159
//Neoplasm
163-
hpoCounts.put(TermId.of("HP:0002664"), 13);
160+
hpoCounts.put(TermId.of("HP:0002664"), 16);
164161
// Abnormality of head or neck
165162
hpoCounts.put(TermId.of("HP:0000152"), 8);
166163
// Abnormality of the eye
167164
hpoCounts.put(TermId.of("HP:0000478"), 6);
168165
// Abnormality of the integument
169-
hpoCounts.put(TermId.of("HP:0001574"), 8);
166+
hpoCounts.put(TermId.of("HP:0001574"), 10);
170167
// Growth abnormality
171-
hpoCounts.put(TermId.of("HP:0001507"), 6);
168+
hpoCounts.put(TermId.of("HP:0001507"), 5);
172169
// Abnormality of blood and blood-forming tissues
173170
hpoCounts.put(TermId.of("HP:0001871"), 6);
174171
// Abnormality of the immune system

src/main/java/org/monarchinitiative/hpotools/cmd/SimHpoCommand.java

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,35 @@
1616
import org.slf4j.LoggerFactory;
1717
import picocli.CommandLine;
1818

19+
import com.google.protobuf.util.JsonFormat;
20+
import org.phenopackets.schema.v2.Phenopacket;
21+
1922
import java.io.File;
23+
import java.io.IOException;
24+
import java.nio.file.Files;
2025
import java.nio.file.Path;
21-
import java.util.Optional;
22-
import java.util.Set;
26+
import java.nio.file.Paths;
27+
import java.util.*;
2328
import java.util.concurrent.Callable;
2429

25-
@CommandLine.Command(name = "onset",
30+
@CommandLine.Command(name = "sim",
2631
mixinStandardHelpOptions = true,
27-
description = "Calculate number of diseases with onset data")
32+
description = "Simulate phenopackets")
2833
public class SimHpoCommand extends HPOCommand implements Callable<Integer> {
2934
private final static Logger LOGGER = LoggerFactory.getLogger(SimHpoCommand.class);
3035

3136
/** default Noonan syndrome 1 163950 */
32-
@CommandLine.Option(names={"--disease"}, description = "OMIM identifer of disease to be simulated", required = true)
33-
private String omimIdentifier = "163950";
37+
@CommandLine.Option(names={"-c","--cases"}, description = "Number of cases to be simulated", required = false)
38+
private int nCases = 100;
3439

3540
/** default simulate 5 HPO terms */
3641
@CommandLine.Option(names={"-n","--nterms"}, description = "number of HPO terms to be simulated", required = false)
3742
private int nterms = 5;
3843

44+
/** output directory for simulated phenopackets */
45+
@CommandLine.Option(names={"--outdir"}, description = "Output directory", required = false)
46+
private File outdir = new File("sim_data");
47+
3948
@Override
4049
public Integer call() throws Exception {
4150
if (hpopath==null) {
@@ -58,18 +67,48 @@ public Integer call() throws Exception {
5867
HpoDiseaseLoader loader = HpoDiseaseLoaders.defaultLoader(ontology, options);
5968
Path annotpath = annotFile.toPath();
6069
HpoDiseases diseases = loader.load(annotpath);
61-
62-
SimulatedHpoDiseaseGenerator generator = new SimulatedHpoDiseaseGenerator(diseases, ontology);
63-
TermId diseaseId = TermId.of("OMIM", omimIdentifier);
64-
Optional<Phenopacket> opt = generator.generateSimulatedPhenopacket(diseaseId);
65-
// When we get here, we want to output the simulated phenopackets to file
66-
// now, the generator is just a skeleton and it always returns Optional.empty()!!!
67-
if (opt.isPresent()) {
68-
System.out.println(opt.get());
70+
// make directory if needed
71+
if (outdir.exists()) {
72+
System.out.println("[WARN] Output directory already exists: " + outdir.getAbsolutePath());
6973
} else {
70-
System.out.println("Could not retrieve phenopacket for \"" + omimIdentifier + "\"");
71-
System.out.println("NEED TO IMPLEMENT generateSimulatedPhenopacket()");
74+
boolean created = outdir.mkdirs();
75+
if (!created) {
76+
System.err.println("[ERROR] Could not create directory at: " + outdir.getAbsolutePath());
77+
return 1;
78+
}
7279
}
80+
SimulatedHpoDiseaseGenerator generator = new SimulatedHpoDiseaseGenerator(diseases, ontology);
81+
List<TermId> diseaseIds = new ArrayList<>(diseases.diseaseIds());
82+
int count = 0;
83+
while (count < nCases) {
84+
Random rand = new Random();
85+
int idx = rand.nextInt(diseaseIds.size());
86+
TermId randomOmimId = diseaseIds.get(idx);
87+
Optional<Phenopacket> opt = generator.generateSimulatedPhenopacket(randomOmimId);
88+
// When we get here, we want to output the simulated phenopackets to file
89+
// now, the generator is just a skeleton and it always returns Optional.empty()!!!
90+
if (opt.isPresent()) {
91+
Phenopacket ppkt = opt.get();
92+
String jsonString = JsonFormat.printer().print(ppkt);
93+
System.out.println(jsonString);
94+
String ppkt_id = ppkt.getId();
95+
String cleaned = ppkt_id.replaceAll("[^\\x00-\\x7F]", "_");
96+
String outname = String.format("%s.json", cleaned);
97+
try {
98+
Path filePath = Paths.get(String.valueOf(outdir), outname);
99+
Files.write(filePath, jsonString.getBytes());
100+
System.out.println("File written successfully");
101+
} catch (IOException e) {
102+
System.err.println("Error writing file: " + e.getMessage());
103+
}
104+
} else {
105+
System.out.println("Could not retrieve phenopacket for \"" + count + "\"");
106+
System.out.println("NEED TO IMPLEMENT generateSimulatedPhenopacket()");
107+
}
108+
count++;
109+
}
110+
111+
73112

74113
return 0;
75114
}

0 commit comments

Comments
 (0)