Skip to content

Commit bcd7427

Browse files
committed
allow both gff and txt output at the same time
1 parent 42ec900 commit bcd7427

File tree

4 files changed

+88
-28
lines changed

4 files changed

+88
-28
lines changed

CRISPRFinder.java

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ public class CRISPRFinder
55
{
66
private String inputFileName;
77
private String outputFileName;
8+
private String outputGffFileName;
89

910
private int screenDisplay;
1011
private int minNumRepeats;
@@ -25,6 +26,7 @@ public class CRISPRFinder
2526

2627
public CRISPRFinder(String _inputFileName,
2728
String _outputFileName,
29+
String _outputGffFileName,
2830
int _screenDisplay,
2931
int _minNumRepeats,
3032
int _minRepeatLength,
@@ -37,6 +39,7 @@ public CRISPRFinder(String _inputFileName,
3739
{
3840
inputFileName = _inputFileName;
3941
outputFileName = _outputFileName;
42+
outputGffFileName = _outputGffFileName;
4043

4144
screenDisplay = _screenDisplay;
4245
minNumRepeats = _minNumRepeats;
@@ -63,7 +66,7 @@ public CRISPRFinder(String _inputFileName,
6366
}
6467

6568
try {
66-
outputFileStream = new FileOutputStream(outputFile, false);
69+
outputFileStream = new FileOutputStream(outputFile, false);
6770
spacers = new PrintStream(outputFileStream);
6871
} catch (FileNotFoundException e) {
6972
// TODO Auto-generated catch block
@@ -254,15 +257,16 @@ private boolean findRepeats( DNASequence sequence, int readNum )
254257
FileOutputStream outputFileStream;
255258
PrintStream out;
256259

260+
FileOutputStream outputGffFileStream;
261+
PrintStream gffOut = null;
262+
257263
if (screenDisplay == 1)
258264
out = System.out;
259265
else
260266
{
261-
if ( outputFileName.equals("") )
267+
if ( outputFileName == "" )
262268
outputFileName = "a.out";
263269

264-
//System.out.println("Writing results in file '" + outputFileName + "'");
265-
//System.out.println("");
266270

267271
File outputFile = new File(outputFileName);
268272
if ( readNum == 1 && outputFile.exists() )
@@ -274,6 +278,23 @@ private boolean findRepeats( DNASequence sequence, int readNum )
274278

275279
outputFileStream = new FileOutputStream(outputFile, true);
276280
out = new PrintStream(outputFileStream);
281+
282+
283+
if (! outputGffFileName.equals(""))
284+
{
285+
File outputGffFile = new File(outputGffFileName);
286+
if ( readNum == 1 && outputGffFile.exists() )
287+
{
288+
boolean success = outputFile.delete();
289+
if (!success)
290+
throw new IllegalArgumentException("Error: Could not delete file '" + outputFile + "'");
291+
}
292+
293+
outputGffFileStream = new FileOutputStream(outputGffFile, true);
294+
gffOut = new PrintStream(outputGffFileStream);
295+
gffOut.println("##gff-version 3");
296+
printGffHeader = false;
297+
}
277298
}
278299

279300
if (repeatsFound)
@@ -297,20 +318,15 @@ private boolean findRepeats( DNASequence sequence, int readNum )
297318
for (int k = 0; k < CRISPRVector.size(); k++)
298319
{
299320
currCRISPR = (CRISPR)CRISPRVector.elementAt(k);
300-
if(outputformat > 0) {
301-
String crispr_id = "CRISPR" + (++totalCrisprCount);
302-
out.print(sequence.getName() + "\tminced:" + minced.VERSION + "\trepeat_region\t");
303-
out.print((currCRISPR.start() + 1) + "\t" + (currCRISPR.end() + 1) + "\t");
304-
out.print(currCRISPR.numRepeats() + "\t.\t.\tID="+ crispr_id + ";rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq="+ currCRISPR.repeatStringAt(1));
305-
out.print("\n");
306-
if(outputformat == 2) {
307-
out.print(currCRISPR.toGff(sequence.getName(), crispr_id));
308-
}
321+
totalCrisprCount++;
322+
if(outputformat > 0 && gffOut == null ) {
323+
printGff(out, sequence, currCRISPR);
309324
} else {
310-
out.print("CRISPR " + (++totalCrisprCount) + " Range: " + (currCRISPR.start() + 1) + " - " + (currCRISPR.end() + 1) + "\n");
311-
out.print(currCRISPR.toString());
312-
out.print("Repeats: " + currCRISPR.numRepeats() + "\t" + "Average Length: " + currCRISPR.averageRepeatLength() + "\t\t");
313-
out.print("Average Length: " + currCRISPR.averageSpacerLength() + "\n\n");
325+
printTable(out, currCRISPR);
326+
}
327+
328+
if (gffOut != null) {
329+
printGff(gffOut, sequence, currCRISPR);
314330
}
315331
if(printSpacers) {
316332
for (int i = 0; i < currCRISPR.numSpacers(); ++i) {
@@ -333,8 +349,33 @@ private boolean findRepeats( DNASequence sequence, int readNum )
333349
out.close();
334350

335351
}
336-
catch (Exception e) { System.err.println ("--Error writing to file-- \n"); }
352+
catch (Exception e) {
353+
System.err.println ("--Error writing to file-- \n");
354+
e.printStackTrace(System.err);
355+
}
337356

338357
return true;
339358
}
359+
360+
private boolean printGff(PrintStream out, DNASequence sequence, CRISPR currCRISPR) {
361+
String crispr_id = "CRISPR" + totalCrisprCount;
362+
out.print(sequence.getName() + "\tminced:" + minced.VERSION + "\trepeat_region\t");
363+
out.print((currCRISPR.start() + 1) + "\t" + (currCRISPR.end() + 1) + "\t");
364+
out.print(currCRISPR.numRepeats() + "\t.\t.\tID="+ crispr_id + ";rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq="+ currCRISPR.repeatStringAt(1));
365+
out.print("\n");
366+
if(outputformat == 2) {
367+
out.print(currCRISPR.toGff(sequence.getName(), crispr_id));
368+
}
369+
return true;
370+
}
371+
372+
private boolean printTable(PrintStream out, CRISPR currCRISPR) {
373+
out.print("CRISPR " + totalCrisprCount + " Range: " + (currCRISPR.start() + 1) + " - " + (currCRISPR.end() + 1) + "\n");
374+
out.print(currCRISPR.toString());
375+
out.print("Repeats: " + currCRISPR.numRepeats() + "\t" + "Average Length: " + currCRISPR.averageRepeatLength() + "\t\t");
376+
out.print("Average Length: " + currCRISPR.averageSpacerLength() + "\n\n");
377+
return true;
378+
}
379+
340380
}
381+

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ The output can be large, so save it in a file:
5252

5353
minced -minNR 2 metagenome.fna metagenome.crisprs
5454

55+
You can also save both the table output and the gff output at the same
56+
time:
57+
58+
minced ecoli.fna out.txt out.gff
59+
5560
## COPYRIGHT AND LICENSE
5661

5762
```

minced.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
public class minced
44
{
5-
public static final String VERSION = "0.3.3";
5+
public static final String VERSION = "0.4.0";
66
public static void main(String[] args)
77
{
88
//default values
@@ -178,9 +178,12 @@ else if (args[i].endsWith("screen"))
178178
// Last options should be an input file and optional output file
179179
String inputFileName = "";
180180
String outputFileName = "";
181+
String outputGffFileName = "";
181182
boolean outputFileSpecified = false;
183+
boolean outputGffFileSpecified = false;
182184
int numArgsRemaining = args.length - numOptions;
183185

186+
184187
if (numArgsRemaining == 1)
185188
inputFileName = args[i];
186189
else if (numArgsRemaining == 2)
@@ -190,6 +193,15 @@ else if (numArgsRemaining == 2)
190193
outputFileName = args[i + 1];
191194
screenDisplay = 0;
192195
}
196+
else if (numArgsRemaining == 3)
197+
{
198+
inputFileName = args[i];
199+
outputFileSpecified = true;
200+
outputGffFileSpecified = true;
201+
outputFileName = args[i + 1];
202+
outputGffFileName = args[i + 2];
203+
screenDisplay = 0;
204+
}
193205
else
194206
{
195207
System.out.println("Improper usage.");
@@ -234,6 +246,7 @@ else if (numArgsRemaining == 2)
234246

235247
CRISPRFinder client = new CRISPRFinder(inputFileName,
236248
outputFileName,
249+
outputGffFileName,
237250
screenDisplay,
238251
minNumRepeats,
239252
minRepeatLength,
@@ -251,7 +264,7 @@ public static void printUsage()
251264
{
252265
System.out.println("MinCED, a program to find CRISPRs in shotgun DNA sequences or full genomes");
253266
System.out.println();
254-
System.out.println("Usage: minced [options] file.fa [outputFile]");
267+
System.out.println("Usage: minced [options] file.fa [outputFile.txt] [outputFile.gff]");
255268
System.out.println();
256269
System.out.println("Options: -searchWL Length of search window used to discover CRISPRs (range: 6-9). Default: 8");
257270
System.out.println(" -minNR Minimum number of repeats a CRISPR must contain. Default: 3");
@@ -268,8 +281,9 @@ public static void printUsage()
268281
System.out.println(" --version Output version information");
269282
System.out.println();
270283
System.out.println("Examples: minced ecoli.fna");
271-
System.out.println(" minced -minNR 2 metagenome.fna");
272-
System.out.println(" minced -minNR 2 metagenome.fna metagenome.crisprs");
284+
System.out.println(" minced metagenome.fna");
285+
System.out.println(" minced metagenome.fna metagenome.crisprs");
286+
System.out.println(" minced metagenome.fna metagenome.crisprs metagenome.gff");
273287
System.out.println();
274288
}
275289

t/Aquifex_aeolicus_VF5.expected

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##gff-version 3
2-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 156460 156767 5 . . ID=CRISPR1;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAACC
3-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 244561 244791 4 . . ID=CRISPR2;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACACGGTACATTAGGAAC
4-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 279264 279555 5 . . ID=CRISPR3;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTTAACTCCACACGGTACATTAGAAAC
5-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1226626 1226861 4 . . ID=CRISPR4;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=CGTTTCTAATGTACCGTAGAGGAGTTGAAAC
6-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1379598 1379894 5 . . ID=CRISPR5;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACTACGGTACATTAGGAAC
7-
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1418900 1419060 3 . . ID=CRISPR6;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAAC
2+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 156460 156767 5 . . ID=CRISPR1;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAACC
3+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 244561 244791 4 . . ID=CRISPR2;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACACGGTACATTAGGAAC
4+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 279264 279555 5 . . ID=CRISPR3;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTTAACTCCACACGGTACATTAGAAAC
5+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1226626 1226861 4 . . ID=CRISPR4;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=CGTTTCTAATGTACCGTAGAGGAGTTGAAAC
6+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1379598 1379894 5 . . ID=CRISPR5;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACTACGGTACATTAGGAAC
7+
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1418900 1419060 3 . . ID=CRISPR6;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAAC

0 commit comments

Comments
 (0)