New chart for gProfiler 2 phenotypes. Fixes #269

mikekucera · mikekucera · commit 7b25b9e7467f · 2017-12-20T11:29:50.000-05:00
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EMDataSet.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EMDataSet.java
@@ -31,6 +31,7 @@ public String getLabel() {
 	private Color color;
 	private DataSetFiles dataSetFiles;
 	private Map<String, Ranking> ranks = new HashMap<>();
+	private boolean isTwoPhenotypeGeneric;
 	
 	protected EMDataSet(EnrichmentMap map, String name, Method method, DataSetFiles files) {
 		super(map, name);
@@ -190,4 +191,12 @@ public void createNewRanking(String name) {
 		this.ranks.put(name, new_ranking);
 	}
 
+	
+	public void setIsTwoPhenotypeGeneric(boolean b) {
+		this.isTwoPhenotypeGeneric = b;
+	}
+	
+	public boolean getIsTwoPheotypeGeneric() {
+		return method == Method.Generic && isTwoPhenotypeGeneric;
+	}
 }
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EnrichmentMap.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/model/EnrichmentMap.java
@@ -456,10 +456,8 @@ public Set<String> getAllRankNames() {
 
 	public Map<String, Ranking> getAllRanks() {
 		Map<String, Ranking> allranks = new HashMap<>();
-		
 		for (EMDataSet dataset : dataSets.values())
 			allranks.putAll(dataset.getRanks());
-		
 		return allranks;
 	}
 	
@@ -583,6 +581,14 @@ public boolean isLegacy() {
 		return isLegacy;
 	}
 	
+	public boolean isTwoPhenotypeGeneric() {
+		return dataSets.values().stream().allMatch(EMDataSet::getIsTwoPheotypeGeneric);
+	}
+	
+	public boolean hasNonGSEADataSet() {
+		return dataSets.values().stream().anyMatch(ds -> ds.getMethod() != Method.GSEA);
+	}
+	
 	@Override
 	public String toString() {
 		return getName();
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/parsers/ParseGenericEnrichmentResults.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/parsers/ParseGenericEnrichmentResults.java
@@ -10,10 +10,11 @@
 import org.baderlab.csplugins.enrichmentmap.model.GeneSet;
 import org.baderlab.csplugins.enrichmentmap.model.GenericResult;
 import org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults;
-import org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor;
+import org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor;
 import org.cytoscape.work.AbstractTask;
 import org.cytoscape.work.TaskMonitor;
 
+import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableSet;
 
 public class ParseGenericEnrichmentResults extends AbstractTask {
@@ -26,154 +27,130 @@ public ParseGenericEnrichmentResults(EMDataSet dataset) {
 	
 	@Override
 	public void run(TaskMonitor taskMonitor) throws IOException {
-		if(taskMonitor == null)
-			taskMonitor = new NullTaskMonitor();
-		taskMonitor.setTitle("Parsing Generic Result file");
-		
 		List<String> lines = LineReader.readLines(dataset.getDataSetFiles().getEnrichmentFileName1());
-
-		//Get the current genesets so we can check that all the results are in the geneset list
-		//and put the size of the genesets into the visual style
-		Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
-
-		int currentProgress = 0;
-		int maxValue = lines.size();
-		taskMonitor.setStatusMessage("Parsing Generic Results file - " + maxValue + " rows");
-		boolean FDR = false;
-
-		//skip the first line which just has the field names (start i=1)
-		//check to see how many columns the data has
-		String line = lines.get(0);
-		String[] tokens = line.split("\t");
-		int length = tokens.length;
-
+		DiscreteTaskMonitor tm = new DiscreteTaskMonitor(taskMonitor, lines.size());
+		tm.setStatusMessage("Parsing Generic Results file - " + lines.size() + " rows");
+		tm.setTitle("Parsing Generic Result file");
+		parse(tm, lines);
+	}
+	
+	
+	private void parse(DiscreteTaskMonitor tm, List<String> lines) {
+		boolean FDR = false; // false data rate
+		boolean hasNegOneNES = false, hasPosOneNES = false, hasOtherNES = false;
+		
 		EnrichmentMap map = dataset.getMap();
 		SetOfEnrichmentResults enrichments = dataset.getEnrichments();
 		Map<String, EnrichmentResult> results = enrichments.getEnrichments();
 		String upPhenotype = enrichments.getPhenotype1();
 		String downPhenotype = enrichments.getPhenotype2();
 		
+		//Get the current genesets so we can check that all the results are in the geneset list
+		//and put the size of the genesets into the visual style
+		Map<String,GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
+				
 		//check to see if there are genesets.
 		//if there are no genesets then populate the genesets from the generic file
 		//can only do this if the 6th column has a list of genes for that geneset.
 		boolean populate_gs = false;
-		if(genesets == null || genesets.isEmpty())
+		if(genesets == null || genesets.isEmpty()) {
 			populate_gs = true;
-		//as this is the default for gprofiler use the Description in the visual style instead of the formatted name
-		//but only if there is a gmt supplied.  If using just the generic output file there is not field for description
-		else
+		} else {
+			//as this is the default for gprofiler use the Description in the visual style instead of the formatted name
+			//but only if there is a gmt supplied.  If using just the generic output file there is not field for description
 			dataset.getMap().getParams().setEMgmt(true);
+		}
 
-		//if (length < 3)
-		//not enough data in the file!!
-
+		//skip the first line which just has the field names (start i=1), check to see how many columns the data has
 		for(int i = 1; i < lines.size(); i++) {
-			line = lines.get(i);
-
-			tokens = line.split("\t");
-
-			//update the length each time because some line might have missing values
-			length = tokens.length;
+			String line = lines.get(i);
+			String[] tokens = line.split("\t");
 
 			double pvalue = 1.0;
 			double FDRqvalue = 1.0;
-			GenericResult result;
 			int gs_size = 0;
 			double NES = 1.0;
+			
+			GenericResult result;
 
 			//The first column of the file is the name of the geneset
 			final String name = tokens[0].toUpperCase().trim();
 			final String description = tokens[1].toUpperCase();
-
 			if(genesets.containsKey(name)) {
 				gs_size = genesets.get(name).getGenes().size();
 			} 
-
-			//The third column is the nominal p-value
-			if(tokens[2] == null || tokens[2].equalsIgnoreCase("")) {
-				//do nothing
-			} else {
+			if(!Strings.isNullOrEmpty(tokens[2])) {
 				pvalue = Double.parseDouble(tokens[2]);
 			}
-
-			if(length > 3) {
-				//the fourth column is the FDR q-value
-				if(tokens[3] == null || tokens[3].equalsIgnoreCase("")) {
-					//do nothing
-				} else {
+			
+			// if (length < 3) not enough data in the file!! The fourth column is the FDR q-value.
+			if(tokens.length > 3) { 
+				if(!Strings.isNullOrEmpty(tokens[3])) {
 					FDRqvalue = Double.parseDouble(tokens[3]);
 					FDR = true;
 				}
-				//the fifth column is the phenotype.
-				//it can either be a signed number or it can be text specifying the phenotype
-				//in order for it to be parseable the text has to match the user specified phenotypes
+				
+				// the fifth column is the phenotype.
+				// it can either be a signed number or it can be text specifying the phenotype
+				// in order for it to be parseable the text has to match the user specified phenotypes
 				// and if it is a number the only important part is the sign
-				if(length > 4) {
-
-					if(tokens[4] == null || tokens[4].equalsIgnoreCase("")) {
-
-					} else {
+				if(tokens.length > 4) {
+					if(!Strings.isNullOrEmpty(tokens[4])) {
 						//check to see if the string matches the specified phenotypes
-						if(tokens[4].equalsIgnoreCase(upPhenotype))
+						if(tokens[4].equalsIgnoreCase(upPhenotype)) {
 							NES = 1.0;
-						else if(tokens[4].equalsIgnoreCase(downPhenotype))
+						} else if(tokens[4].equalsIgnoreCase(downPhenotype)) {
 							NES = -1.0;
-						//try and see if the user has specified the phenotype as a number
-						else {
+						} else {
 							try {
+								//try and see if the user has specified the phenotype as a number
 								NES = Double.parseDouble(tokens[4]);
 							} catch(NumberFormatException nfe) {
-								throw new IllegalThreadStateException(tokens[4]
-										+ " is not a valid phenotype.  Phenotype specified in generic enrichment results file must have the same phenotype as specified in advanced options or must be a positive or negative number.");
+								throw new IllegalArgumentException(tokens[4] + " is not a valid phenotype. Phenotype specified in generic enrichment results file must have the same phenotype as specified in advanced options or must be a positive or negative number.");
 							}
 						}
 					}
-
-					//ticket#57 - adding additional column to generic format, similiar to Bingo and David
-					// that outlines the genes from the query that are found in the geneset and results in
-					//its enrichment
-					if(length > 5 && populate_gs) {
-
-						//get all the genes in the field
+					
+					if(NES == 1.0) 
+						hasPosOneNES = true;
+					else if(NES == -1.0) 
+						hasNegOneNES = true;
+					else
+						hasOtherNES = true;
+
+					// ticket#57 - adding additional column to generic format, similiar to Bingo and David
+					// that outlines the genes from the query that are found in the geneset and results in its enrichment
+					if(tokens.length > 5 && populate_gs) {
 						String[] gene_tokens = tokens[5].split(",");
 
 						ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
 						
-						//All subsequent fields in the list are the geneset associated with this geneset.
+						//All subsequent fields in the list are the genes associated with this geneset.
 						for(String token : gene_tokens) {
 							String gene = token.trim().toUpperCase();
 
-							//Check to see if the gene is already in the hashmap of genes
-							//if it is already in the hash then get its associated key and put it into the set of genes
 							if(map.containsGene(gene)) {
 								builder.add(map.getHashFromGene(gene));
-							}
-							else if(!gene.isEmpty()) {
+							} else if(!gene.isEmpty()) {
 								Integer hash = map.addGene(gene).get();
 								builder.add(hash);
 							}
 						}
 
 						GeneSet gs = new GeneSet(name, description, builder.build());
 						gs_size = gs.getGenes().size();
-						//put the new or filtered geneset back into the set.
 						genesets.put(name, gs);
 
 					} //end of tokens>5
 					result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue, NES);
-				} //end of tokens>4
-
-				else
+				} else { //end of tokens>4
 					result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
-
+				}
 			} else {
 				result = new GenericResult(name, description, pvalue, gs_size);
 			}
 
-			// Calculate Percentage.  This must be a value between 0..100.
-			int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
-			taskMonitor.setProgress(percentComplete);
-			currentProgress++;
+			tm.inc();
 
 			//check to see if the gene set has already been entered in the results
 			//it is possible that one geneset will be in both phenotypes.
@@ -183,14 +160,14 @@ else if(!gene.isEmpty()) {
 			GenericResult temp = (GenericResult) results.get(name);
 			if(temp == null)
 				results.put(name, result);
-			else {
-				if(result.getPvalue() < temp.getPvalue())
-					results.put(name, result);
-			}
-
+			else if(result.getPvalue() < temp.getPvalue()) 
+				results.put(name, result);
 		}
+		
 		if(FDR)
 			dataset.getMap().getParams().setFDR(FDR);
+		if(hasPosOneNES && hasNegOneNES && !hasOtherNES)
+			dataset.setIsTwoPhenotypeGeneric(true);
 	}
 
 }
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/style/ChartData.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/style/ChartData.java
@@ -7,6 +7,7 @@ public enum ChartData {
 	NES_VALUE("NES Columns", Columns.NODE_NES),
 	P_VALUE("P-value Columns", Columns.NODE_PVALUE),
 	FDR_VALUE("Q-value (FDR) Columns", Columns.NODE_FDR_QVALUE),
+	PHENOTYPES("Phenotypes", Columns.NODE_COLOURING),
 	DATA_SET("Color by Data Set", Columns.DATASET_CHART);
 	
 	private final String label;
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/util/DiscreteTaskMonitor.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/util/DiscreteTaskMonitor.java
@@ -18,7 +18,7 @@ public class DiscreteTaskMonitor implements TaskMonitor {
 	
 	
 	public DiscreteTaskMonitor(TaskMonitor delegate, int totalWork, double low, double high) {
-		this.delegate = delegate;
+		this.delegate = delegate == null ? new NullTaskMonitor() : delegate;
 		this.totalWork = totalWork;
 		this.low = low;
 		this.high = high;
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanel.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanel.java
@@ -758,9 +758,12 @@ JComboBox<ChartData> getChartDataCombo() {
 				EnrichmentMap map = getEnrichmentMap();
 				if (map != null) {
 					EMCreationParameters params = map.getParams();
-					if (params != null && params.isFDR())
+					if(params != null && params.isFDR())
 						chartDataCombo.addItem(ChartData.FDR_VALUE);
+					if(map.isTwoPhenotypeGeneric())
+						chartDataCombo.addItem(ChartData.PHENOTYPES);
 				}
+				
 				chartDataCombo.addItem(ChartData.DATA_SET);
 			}
 			
diff --git a/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanelMediator.java b/EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanelMediator.java
@@ -20,7 +20,6 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.ForkJoinPool;
@@ -42,7 +41,6 @@
 import org.baderlab.csplugins.enrichmentmap.model.AbstractDataSet;
 import org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters;
 import org.baderlab.csplugins.enrichmentmap.model.EMDataSet;
-import org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method;
 import org.baderlab.csplugins.enrichmentmap.model.EMSignatureDataSet;
 import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap;
 import org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapManager;
@@ -591,20 +589,16 @@ private void maybeShowContextMenu(final MouseEvent e) {
 	 * Call this method on the EDT only!
 	 */
 	private void setDefaults(EMViewControlPanel viewPanel, EnrichmentMap map) {
-		List<EMDataSet> dataSets = map.getDataSetList();
-		
-		if (dataSets.size() > 0) {
-			ChartData chartData = ChartData.NES_VALUE; // Default for GSEA data sets
+		if(map.getDataSetCount() > 0) {
 			EMCreationParameters params = map.getParams();
 			
-			if (params != null && params.isFDR()) {
-				Optional<EMDataSet> nonGsea = dataSets.stream()
-					.filter(ds -> ds.getMethod() != Method.GSEA)
-					.findFirst();
-				
-				if (nonGsea.isPresent())
-					chartData = ChartData.FDR_VALUE; // Default for other data sets
-			}
+			ChartData chartData;
+			if(map.isTwoPhenotypeGeneric())
+				chartData = ChartData.PHENOTYPES;
+			else if(params != null && params.isFDR() && map.hasNonGSEADataSet())
+				chartData = ChartData.FDR_VALUE; // Default for other data sets
+			else
+				chartData = ChartData.NES_VALUE; // Default for GSEA data sets
 			
 			viewPanel.getChartDataCombo().setSelectedItem(chartData);
 		}

Original file line number	Diff line number	Diff line change
`@@ -758,9 +758,12 @@ JComboBox<ChartData> getChartDataCombo() {`
`758`	`758`	`EnrichmentMap map = getEnrichmentMap();`
`759`	`759`	`if (map != null) {`
`760`	`760`	`EMCreationParameters params = map.getParams();`
`761`		`- if (params != null && params.isFDR())`
	`761`	`+ if(params != null && params.isFDR())`
`762`	`762`	`chartDataCombo.addItem(ChartData.FDR_VALUE);`
	`763`	`+ if(map.isTwoPhenotypeGeneric())`
	`764`	`+ chartDataCombo.addItem(ChartData.PHENOTYPES);`
`763`	`765`	`}`
	`766`	`+`
`764`	`767`	`chartDataCombo.addItem(ChartData.DATA_SET);`
`765`	`768`	`}`
`766`	`769`