Skip to content

Commit 199d12c

Browse files
committed
Resolver recognizes Bingo input file.
Refs #449
1 parent f3db179 commit 199d12c

File tree

3 files changed

+107
-84
lines changed

3 files changed

+107
-84
lines changed

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/resolver/DataSetResolver.java

Lines changed: 104 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package org.baderlab.csplugins.enrichmentmap.resolver;
22

3+
import java.io.BufferedReader;
4+
import java.io.FileReader;
35
import java.io.IOException;
4-
import java.io.UncheckedIOException;
5-
import java.nio.charset.Charset;
66
import java.nio.file.Files;
77
import java.nio.file.Path;
88
import java.nio.file.Paths;
@@ -55,6 +55,16 @@ public boolean isEnrichmentFile() {
5555
}
5656

5757

58+
private static class DataLines {
59+
final String firstLine;
60+
final String firstDataLine;
61+
public DataLines(String firstLine, String firstDataLine) {
62+
this.firstLine = firstLine;
63+
this.firstDataLine = firstDataLine;
64+
}
65+
}
66+
67+
5868
public static List<DataSetParameters> guessDataSets(Path rootFolder, CancelStatus cancelStatus) {
5969
// First test if rootFolder is itself a GSEA results folder
6070
Optional<DataSetParameters> dataset = GSEAResolver.resolveGSEAResultsFolder(rootFolder);
@@ -225,45 +235,45 @@ private static Type guess(Path path) {
225235
Map<Type,Integer> scores = new EnumMap<>(Type.class);
226236

227237
String fileName = path.getFileName().toString();
228-
Optional<String> firstLine = getFirstDataLine(path);
238+
DataLines lines = getFirstDataLines(path);
229239

230-
if(firstLine.isPresent() && isTabSeparated(firstLine.get())) {
231-
// Guess based on extension and/or first line of file
232-
if(hasExtension(path, "gct")) {
233-
addScore(scores, Type.RANKS, 1);
234-
}
235-
if(hasExtension(path, "gmt")) {
236-
addScore(scores, Type.GENE_SETS, 1);
237-
}
238-
if(hasExtension(path, "rnk")) {
239-
addScore(scores, Type.RANKS, 1);
240-
addScore(scores, Type.EXPRESSION, 1);
241-
}
242-
if(hasExtension(path, "xls", "bgo", "tsv", "txt")) {
243-
Type type = guessEnrichmentType(path);
244-
if(type == Type.IGNORE) {
245-
addScore(scores, Type.ENRICHMENT_GENERIC, 1);
240+
if(lines != null) {
241+
if(isTabSeparated(lines.firstDataLine)) {
242+
// Guess based on extension and/or first line of file
243+
if(hasExtension(path, "gct")) {
244+
addScore(scores, Type.RANKS, 1);
245+
}
246+
if(hasExtension(path, "gmt")) {
247+
addScore(scores, Type.GENE_SETS, 1);
248+
}
249+
if(hasExtension(path, "rnk")) {
250+
addScore(scores, Type.RANKS, 1);
246251
addScore(scores, Type.EXPRESSION, 1);
247-
} else {
252+
}
253+
if(hasExtension(path, "xls", "tsv", "txt")) {
254+
Type type = guessEnrichmentType(lines.firstLine);
248255
addScore(scores, type, 2); // this is a lot of evidence
249256
}
250-
}
251-
252-
// Test first line
253-
if(!isRankLine(firstLine.get())) {
254-
addScore(scores, Type.RANKS, -1);
255-
}
256-
if(!isExpressionLine(firstLine.get())) {
257-
addScore(scores, Type.EXPRESSION, -1);
258-
}
259-
260-
// Guess based on file name
257+
258+
// Test first line
259+
if(!isRankLine(lines.firstDataLine)) {
260+
addScore(scores, Type.RANKS, -1);
261+
}
262+
if(!isExpressionLine(lines.firstDataLine)) {
263+
addScore(scores, Type.EXPRESSION, -1);
264+
}
265+
266+
// Guess based on file name
267+
if(matches(fileName, ".*expr(ession)?.*")) {
268+
addScore(scores, Type.EXPRESSION, 3);
269+
}
270+
if(matches(fileName, ".*rank.*")) {
271+
addScore(scores, Type.RANKS, 3);
272+
}
273+
}
261274

262-
if(matches(fileName, ".*expr(ession)?.*")) {
263-
addScore(scores, Type.EXPRESSION, 3);
264-
}
265-
if(matches(fileName, ".*rank.*")) {
266-
addScore(scores, Type.RANKS, 3);
275+
if(hasExtension(path, "bgo") || isBingoHeader(lines.firstLine)) {
276+
addScore(scores, Type.ENRICHMENT_BINGO, 2);
267277
}
268278
}
269279

@@ -346,15 +356,29 @@ private static boolean hasExtension(Path path, String... extensions) {
346356
}
347357

348358

349-
private static Optional<String> getFirstDataLine(Path path) {
350-
try(Stream<String> lines = Files.lines(path)) {
351-
return lines
352-
.filter(l -> !l.startsWith("#")) // filter out comment lines
353-
.skip(1) // skip header line
354-
.findFirst();
355-
} catch(IOException | UncheckedIOException e) {
356-
return Optional.empty();
359+
private static DataLines getFirstDataLines(Path path) {
360+
try(FileReader fileReader = new FileReader(path.toFile());
361+
BufferedReader reader = new BufferedReader(fileReader))
362+
{
363+
String firstLine = null;
364+
String firstDataLine = null;
365+
366+
String line = null;
367+
while((line = reader.readLine()) != null) {
368+
if(firstLine == null) {
369+
firstLine = line;
370+
} else if(!line.startsWith("#")) {
371+
firstDataLine = line;
372+
break;
373+
}
374+
}
375+
376+
if(firstLine != null && firstDataLine != null) {
377+
return new DataLines(firstLine, firstDataLine);
378+
}
379+
} catch(IOException e) {
357380
}
381+
return null;
358382
}
359383

360384
private static boolean isExpressionLine(String line) {
@@ -384,54 +408,51 @@ private static boolean isTabSeparated(String line) {
384408
}
385409

386410

387-
public static Type guessEnrichmentType(String path) {
388-
return guessEnrichmentType(Paths.get(path));
411+
public static Type guessEnrichmentTypeFromPath(String path) {
412+
Path p = Paths.get(path);
413+
DataLines lines = getFirstDataLines(p);
414+
return guessEnrichmentType(lines.firstLine);
389415
}
390416

391417
/*
392418
* This logic was moved here from {@link DetermineEnrichmentResultFileReader}
393419
*/
394-
public static Type guessEnrichmentType(Path path) {
395-
try {
396-
String firstLine = com.google.common.io.Files.readFirstLine(path.toFile(), Charset.defaultCharset());
397-
398-
String[] tokens = firstLine.split("\t");
399-
400-
//check to see if there are exactly 11 columns - = GSEA results
401-
if(tokens.length == 11) {
402-
//check to see if the ES is the 5th column and that NES is the 6th column
403-
if((tokens[4].equalsIgnoreCase("ES")) && (tokens[5].equalsIgnoreCase("NES")))
404-
return Type.ENRICHMENT_GSEA;
405-
//it is possible that the file can have 11 columns but that it is still a generic file
406-
//if it doesn't specify ES and NES in the 5 and 6th columns
407-
else
408-
return Type.ENRICHMENT_GENERIC;
409-
}
410-
//check to see if there are exactly 13 columns - = DAVID results
411-
else if(tokens.length == 13) {
412-
//check to see that the 6th column is called Genes and that the 12th column is called "Benjamini"
413-
if((tokens[5].equalsIgnoreCase("Genes")) && tokens[11].equalsIgnoreCase("Benjamini"))
414-
return Type.ENRICHMENT_DAVID;
415-
else
416-
return Type.ENRICHMENT_GENERIC;
420+
public static Type guessEnrichmentType(String firstLine) {
421+
String[] tokens = firstLine.split("\t");
417422

418-
}
419-
//fix bug with new version of bingo plugin change the case of the header file.
420-
else if(firstLine.toLowerCase().contains("File created with BiNGO".toLowerCase())) {
421-
return Type.ENRICHMENT_BINGO;
422-
} else if(firstLine.contains("GREAT version")) {
423-
return Type.ENRICHMENT_GREAT;
424-
} else if(tokens.length == 9 && firstLine.contains("Term") && firstLine.contains("Old P-value")) {
425-
return Type.ENRICHMENT_ENRICHR;
426-
} else {
423+
//check to see if there are exactly 11 columns - = GSEA results
424+
if(tokens.length == 11) {
425+
//check to see if the ES is the 5th column and that NES is the 6th column
426+
if((tokens[4].equalsIgnoreCase("ES")) && (tokens[5].equalsIgnoreCase("NES")))
427+
return Type.ENRICHMENT_GSEA;
428+
//it is possible that the file can have 11 columns but that it is still a generic file
429+
//if it doesn't specify ES and NES in the 5 and 6th columns
430+
else
427431
return Type.ENRICHMENT_GENERIC;
428-
}
429432
}
430-
catch(IOException e) {
431-
// MKTODO log the exception
433+
//check to see if there are exactly 13 columns - = DAVID results
434+
else if(tokens.length == 13) {
435+
//check to see that the 6th column is called Genes and that the 12th column is called "Benjamini"
436+
if((tokens[5].equalsIgnoreCase("Genes")) && tokens[11].equalsIgnoreCase("Benjamini"))
437+
return Type.ENRICHMENT_DAVID;
438+
else
439+
return Type.ENRICHMENT_GENERIC;
440+
432441
}
433-
434-
return Type.IGNORE;
442+
else if(isBingoHeader(firstLine)) {
443+
return Type.ENRICHMENT_BINGO;
444+
} else if(firstLine.contains("GREAT version")) {
445+
return Type.ENRICHMENT_GREAT;
446+
} else if(tokens.length == 9 && firstLine.contains("Term") && firstLine.contains("Old P-value")) {
447+
return Type.ENRICHMENT_ENRICHR;
448+
} else {
449+
return Type.ENRICHMENT_GENERIC;
450+
}
451+
}
452+
453+
454+
private static boolean isBingoHeader(String firstLine) {
455+
return firstLine.toLowerCase().contains("File created with BiNGO".toLowerCase());
435456
}
436457

437458

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/task/CreateEnrichmentMapTaskFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ private static AbstractTask readFile(EMDataSet dataset, String fileName, ParseGS
203203
if(fileName.endsWith(".edb")) {
204204
return new ParseEDBEnrichmentResults(dataset);
205205
} else {
206-
DataSetResolver.Type type = DataSetResolver.guessEnrichmentType(fileName);
206+
DataSetResolver.Type type = DataSetResolver.guessEnrichmentTypeFromPath(fileName);
207207
switch(type) {
208208
default:
209209
case ENRICHMENT_GENERIC: return new ParseGenericEnrichmentResults(dataset);

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/enrichmentmap/view/control/ControlPanelMediator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ public void reset(ViewParams params) {
250250
return;
251251

252252
try {
253+
updating = true;
254+
253255
// Update Filters
254256
if (params.getPValue() != null && viewPanel.getPValueSliderPanel() != null)
255257
viewPanel.getPValueSliderPanel().setValue(params.getPValue());

0 commit comments

Comments
 (0)