Skip to content

Commit 7b25b9e

Browse files
committed
New chart for gProfiler 2 phenotypes. Fixes #269
1 parent 44838aa commit 7b25b9e

File tree

7 files changed

+95
-105
lines changed

7 files changed

+95
-105
lines changed

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EMDataSet.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public String getLabel() {
3131
private Color color;
3232
private DataSetFiles dataSetFiles;
3333
private Map<String, Ranking> ranks = new HashMap<>();
34+
private boolean isTwoPhenotypeGeneric;
3435

3536
protected EMDataSet(EnrichmentMap map, String name, Method method, DataSetFiles files) {
3637
super(map, name);
@@ -190,4 +191,12 @@ public void createNewRanking(String name) {
190191
this.ranks.put(name, new_ranking);
191192
}
192193

194+
195+
public void setIsTwoPhenotypeGeneric(boolean b) {
196+
this.isTwoPhenotypeGeneric = b;
197+
}
198+
199+
public boolean getIsTwoPheotypeGeneric() {
200+
return method == Method.Generic && isTwoPhenotypeGeneric;
201+
}
193202
}

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EnrichmentMap.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,8 @@ public Set<String> getAllRankNames() {
456456

457457
public Map<String, Ranking> getAllRanks() {
458458
Map<String, Ranking> allranks = new HashMap<>();
459-
460459
for (EMDataSet dataset : dataSets.values())
461460
allranks.putAll(dataset.getRanks());
462-
463461
return allranks;
464462
}
465463

@@ -583,6 +581,14 @@ public boolean isLegacy() {
583581
return isLegacy;
584582
}
585583

584+
public boolean isTwoPhenotypeGeneric() {
585+
return dataSets.values().stream().allMatch(EMDataSet::getIsTwoPheotypeGeneric);
586+
}
587+
588+
public boolean hasNonGSEADataSet() {
589+
return dataSets.values().stream().anyMatch(ds -> ds.getMethod() != Method.GSEA);
590+
}
591+
586592
@Override
587593
public String toString() {
588594
return getName();

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/parsers/ParseGenericEnrichmentResults.java

Lines changed: 64 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@
1010
import org.baderlab.csplugins.enrichmentmap.model.GeneSet;
1111
import org.baderlab.csplugins.enrichmentmap.model.GenericResult;
1212
import org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults;
13-
import org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor;
13+
import org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor;
1414
import org.cytoscape.work.AbstractTask;
1515
import org.cytoscape.work.TaskMonitor;
1616

17+
import com.google.common.base.Strings;
1718
import com.google.common.collect.ImmutableSet;
1819

1920
public class ParseGenericEnrichmentResults extends AbstractTask {
@@ -26,154 +27,130 @@ public ParseGenericEnrichmentResults(EMDataSet dataset) {
2627

2728
@Override
2829
public void run(TaskMonitor taskMonitor) throws IOException {
29-
if(taskMonitor == null)
30-
taskMonitor = new NullTaskMonitor();
31-
taskMonitor.setTitle("Parsing Generic Result file");
32-
3330
List<String> lines = LineReader.readLines(dataset.getDataSetFiles().getEnrichmentFileName1());
34-
35-
//Get the current genesets so we can check that all the results are in the geneset list
36-
//and put the size of the genesets into the visual style
37-
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
38-
39-
int currentProgress = 0;
40-
int maxValue = lines.size();
41-
taskMonitor.setStatusMessage("Parsing Generic Results file - " + maxValue + " rows");
42-
boolean FDR = false;
43-
44-
//skip the first line which just has the field names (start i=1)
45-
//check to see how many columns the data has
46-
String line = lines.get(0);
47-
String[] tokens = line.split("\t");
48-
int length = tokens.length;
49-
31+
DiscreteTaskMonitor tm = new DiscreteTaskMonitor(taskMonitor, lines.size());
32+
tm.setStatusMessage("Parsing Generic Results file - " + lines.size() + " rows");
33+
tm.setTitle("Parsing Generic Result file");
34+
parse(tm, lines);
35+
}
36+
37+
38+
private void parse(DiscreteTaskMonitor tm, List<String> lines) {
39+
boolean FDR = false; // false data rate
40+
boolean hasNegOneNES = false, hasPosOneNES = false, hasOtherNES = false;
41+
5042
EnrichmentMap map = dataset.getMap();
5143
SetOfEnrichmentResults enrichments = dataset.getEnrichments();
5244
Map<String, EnrichmentResult> results = enrichments.getEnrichments();
5345
String upPhenotype = enrichments.getPhenotype1();
5446
String downPhenotype = enrichments.getPhenotype2();
5547

48+
//Get the current genesets so we can check that all the results are in the geneset list
49+
//and put the size of the genesets into the visual style
50+
Map<String,GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
51+
5652
//check to see if there are genesets.
5753
//if there are no genesets then populate the genesets from the generic file
5854
//can only do this if the 6th column has a list of genes for that geneset.
5955
boolean populate_gs = false;
60-
if(genesets == null || genesets.isEmpty())
56+
if(genesets == null || genesets.isEmpty()) {
6157
populate_gs = true;
62-
//as this is the default for gprofiler use the Description in the visual style instead of the formatted name
63-
//but only if there is a gmt supplied. If using just the generic output file there is not field for description
64-
else
58+
} else {
59+
//as this is the default for gprofiler use the Description in the visual style instead of the formatted name
60+
//but only if there is a gmt supplied. If using just the generic output file there is not field for description
6561
dataset.getMap().getParams().setEMgmt(true);
62+
}
6663

67-
//if (length < 3)
68-
//not enough data in the file!!
69-
64+
//skip the first line which just has the field names (start i=1), check to see how many columns the data has
7065
for(int i = 1; i < lines.size(); i++) {
71-
line = lines.get(i);
72-
73-
tokens = line.split("\t");
74-
75-
//update the length each time because some line might have missing values
76-
length = tokens.length;
66+
String line = lines.get(i);
67+
String[] tokens = line.split("\t");
7768

7869
double pvalue = 1.0;
7970
double FDRqvalue = 1.0;
80-
GenericResult result;
8171
int gs_size = 0;
8272
double NES = 1.0;
73+
74+
GenericResult result;
8375

8476
//The first column of the file is the name of the geneset
8577
final String name = tokens[0].toUpperCase().trim();
8678
final String description = tokens[1].toUpperCase();
87-
8879
if(genesets.containsKey(name)) {
8980
gs_size = genesets.get(name).getGenes().size();
9081
}
91-
92-
//The third column is the nominal p-value
93-
if(tokens[2] == null || tokens[2].equalsIgnoreCase("")) {
94-
//do nothing
95-
} else {
82+
if(!Strings.isNullOrEmpty(tokens[2])) {
9683
pvalue = Double.parseDouble(tokens[2]);
9784
}
98-
99-
if(length > 3) {
100-
//the fourth column is the FDR q-value
101-
if(tokens[3] == null || tokens[3].equalsIgnoreCase("")) {
102-
//do nothing
103-
} else {
85+
86+
// if (length < 3) not enough data in the file!! The fourth column is the FDR q-value.
87+
if(tokens.length > 3) {
88+
if(!Strings.isNullOrEmpty(tokens[3])) {
10489
FDRqvalue = Double.parseDouble(tokens[3]);
10590
FDR = true;
10691
}
107-
//the fifth column is the phenotype.
108-
//it can either be a signed number or it can be text specifying the phenotype
109-
//in order for it to be parseable the text has to match the user specified phenotypes
92+
93+
// the fifth column is the phenotype.
94+
// it can either be a signed number or it can be text specifying the phenotype
95+
// in order for it to be parseable the text has to match the user specified phenotypes
11096
// and if it is a number the only important part is the sign
111-
if(length > 4) {
112-
113-
if(tokens[4] == null || tokens[4].equalsIgnoreCase("")) {
114-
115-
} else {
97+
if(tokens.length > 4) {
98+
if(!Strings.isNullOrEmpty(tokens[4])) {
11699
//check to see if the string matches the specified phenotypes
117-
if(tokens[4].equalsIgnoreCase(upPhenotype))
100+
if(tokens[4].equalsIgnoreCase(upPhenotype)) {
118101
NES = 1.0;
119-
else if(tokens[4].equalsIgnoreCase(downPhenotype))
102+
} else if(tokens[4].equalsIgnoreCase(downPhenotype)) {
120103
NES = -1.0;
121-
//try and see if the user has specified the phenotype as a number
122-
else {
104+
} else {
123105
try {
106+
//try and see if the user has specified the phenotype as a number
124107
NES = Double.parseDouble(tokens[4]);
125108
} catch(NumberFormatException nfe) {
126-
throw new IllegalThreadStateException(tokens[4]
127-
+ " is not a valid phenotype. Phenotype specified in generic enrichment results file must have the same phenotype as specified in advanced options or must be a positive or negative number.");
109+
throw new IllegalArgumentException(tokens[4] + " is not a valid phenotype. Phenotype specified in generic enrichment results file must have the same phenotype as specified in advanced options or must be a positive or negative number.");
128110
}
129111
}
130112
}
131-
132-
//ticket#57 - adding additional column to generic format, similiar to Bingo and David
133-
// that outlines the genes from the query that are found in the geneset and results in
134-
//its enrichment
135-
if(length > 5 && populate_gs) {
136-
137-
//get all the genes in the field
113+
114+
if(NES == 1.0)
115+
hasPosOneNES = true;
116+
else if(NES == -1.0)
117+
hasNegOneNES = true;
118+
else
119+
hasOtherNES = true;
120+
121+
// ticket#57 - adding additional column to generic format, similiar to Bingo and David
122+
// that outlines the genes from the query that are found in the geneset and results in its enrichment
123+
if(tokens.length > 5 && populate_gs) {
138124
String[] gene_tokens = tokens[5].split(",");
139125

140126
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
141127

142-
//All subsequent fields in the list are the geneset associated with this geneset.
128+
//All subsequent fields in the list are the genes associated with this geneset.
143129
for(String token : gene_tokens) {
144130
String gene = token.trim().toUpperCase();
145131

146-
//Check to see if the gene is already in the hashmap of genes
147-
//if it is already in the hash then get its associated key and put it into the set of genes
148132
if(map.containsGene(gene)) {
149133
builder.add(map.getHashFromGene(gene));
150-
}
151-
else if(!gene.isEmpty()) {
134+
} else if(!gene.isEmpty()) {
152135
Integer hash = map.addGene(gene).get();
153136
builder.add(hash);
154137
}
155138
}
156139

157140
GeneSet gs = new GeneSet(name, description, builder.build());
158141
gs_size = gs.getGenes().size();
159-
//put the new or filtered geneset back into the set.
160142
genesets.put(name, gs);
161143

162144
} //end of tokens>5
163145
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue, NES);
164-
} //end of tokens>4
165-
166-
else
146+
} else { //end of tokens>4
167147
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
168-
148+
}
169149
} else {
170150
result = new GenericResult(name, description, pvalue, gs_size);
171151
}
172152

173-
// Calculate Percentage. This must be a value between 0..100.
174-
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
175-
taskMonitor.setProgress(percentComplete);
176-
currentProgress++;
153+
tm.inc();
177154

178155
//check to see if the gene set has already been entered in the results
179156
//it is possible that one geneset will be in both phenotypes.
@@ -183,14 +160,14 @@ else if(!gene.isEmpty()) {
183160
GenericResult temp = (GenericResult) results.get(name);
184161
if(temp == null)
185162
results.put(name, result);
186-
else {
187-
if(result.getPvalue() < temp.getPvalue())
188-
results.put(name, result);
189-
}
190-
163+
else if(result.getPvalue() < temp.getPvalue())
164+
results.put(name, result);
191165
}
166+
192167
if(FDR)
193168
dataset.getMap().getParams().setFDR(FDR);
169+
if(hasPosOneNES && hasNegOneNES && !hasOtherNES)
170+
dataset.setIsTwoPhenotypeGeneric(true);
194171
}
195172

196173
}

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/style/ChartData.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ public enum ChartData {
77
NES_VALUE("NES Columns", Columns.NODE_NES),
88
P_VALUE("P-value Columns", Columns.NODE_PVALUE),
99
FDR_VALUE("Q-value (FDR) Columns", Columns.NODE_FDR_QVALUE),
10+
PHENOTYPES("Phenotypes", Columns.NODE_COLOURING),
1011
DATA_SET("Color by Data Set", Columns.DATASET_CHART);
1112

1213
private final String label;

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/util/DiscreteTaskMonitor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class DiscreteTaskMonitor implements TaskMonitor {
1818

1919

2020
public DiscreteTaskMonitor(TaskMonitor delegate, int totalWork, double low, double high) {
21-
this.delegate = delegate;
21+
this.delegate = delegate == null ? new NullTaskMonitor() : delegate;
2222
this.totalWork = totalWork;
2323
this.low = low;
2424
this.high = high;

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanel.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,12 @@ JComboBox<ChartData> getChartDataCombo() {
758758
EnrichmentMap map = getEnrichmentMap();
759759
if (map != null) {
760760
EMCreationParameters params = map.getParams();
761-
if (params != null && params.isFDR())
761+
if(params != null && params.isFDR())
762762
chartDataCombo.addItem(ChartData.FDR_VALUE);
763+
if(map.isTwoPhenotypeGeneric())
764+
chartDataCombo.addItem(ChartData.PHENOTYPES);
763765
}
766+
764767
chartDataCombo.addItem(ChartData.DATA_SET);
765768
}
766769

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanelMediator.java

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import java.util.HashSet;
2121
import java.util.List;
2222
import java.util.Map;
23-
import java.util.Optional;
2423
import java.util.Properties;
2524
import java.util.Set;
2625
import java.util.concurrent.ForkJoinPool;
@@ -42,7 +41,6 @@
4241
import org.baderlab.csplugins.enrichmentmap.model.AbstractDataSet;
4342
import org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters;
4443
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
45-
import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method;
4644
import org.baderlab.csplugins.enrichmentmap.model.EMSignatureDataSet;
4745
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap;
4846
import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapManager;
@@ -591,20 +589,16 @@ private void maybeShowContextMenu(final MouseEvent e) {
591589
* Call this method on the EDT only!
592590
*/
593591
private void setDefaults(EMViewControlPanel viewPanel, EnrichmentMap map) {
594-
List<EMDataSet> dataSets = map.getDataSetList();
595-
596-
if (dataSets.size() > 0) {
597-
ChartData chartData = ChartData.NES_VALUE; // Default for GSEA data sets
592+
if(map.getDataSetCount() > 0) {
598593
EMCreationParameters params = map.getParams();
599594

600-
if (params != null && params.isFDR()) {
601-
Optional<EMDataSet> nonGsea = dataSets.stream()
602-
.filter(ds -> ds.getMethod() != Method.GSEA)
603-
.findFirst();
604-
605-
if (nonGsea.isPresent())
606-
chartData = ChartData.FDR_VALUE; // Default for other data sets
607-
}
595+
ChartData chartData;
596+
if(map.isTwoPhenotypeGeneric())
597+
chartData = ChartData.PHENOTYPES;
598+
else if(params != null && params.isFDR() && map.hasNonGSEADataSet())
599+
chartData = ChartData.FDR_VALUE; // Default for other data sets
600+
else
601+
chartData = ChartData.NES_VALUE; // Default for GSEA data sets
608602

609603
viewPanel.getChartDataCombo().setSelectedItem(chartData);
610604
}

0 commit comments

Comments
 (0)