Skip to content

Commit f3def06

Browse files
committed
Confirm Genbank parser handles new accession formats.
1 parent a22b728 commit f3def06

File tree

5 files changed

+81
-1
lines changed

5 files changed

+81
-1
lines changed

core/src/test/java/org/biojava/bio/seq/io/SeqIOToolsTest.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.biojava.bio.BioException;
3434
import org.biojava.bio.seq.Sequence;
3535
import org.biojava.bio.seq.SequenceIterator;
36+
import org.biojava.bio.seq.io.GenbankFormat;
3637
import org.biojava.bio.symbol.SymbolList;
3738

3839
/**
@@ -200,7 +201,41 @@ public void testDNAReadersAndWriters()
200201
}
201202

202203
}
203-
204+
205+
private Sequence readGenbankResource(final String resource) throws Exception {
206+
Sequence sequence = null;
207+
BufferedReader reader = null;
208+
try {
209+
reader = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(resource)));
210+
sequence = SeqIOTools.readGenbank(reader).nextSequence();
211+
}
212+
catch (Exception e) {
213+
e.printStackTrace();
214+
fail(e.getMessage());
215+
}
216+
finally {
217+
try {
218+
reader.close();
219+
}
220+
catch (Exception e) {
221+
// ignore
222+
}
223+
}
224+
return sequence;
225+
}
226+
227+
public void testNcbiExpandedAccessionFormats() throws Exception
228+
{
229+
Sequence header0 = readGenbankResource("/empty_header0.gb");
230+
assertEquals("CP032762", header0.getAnnotation().getProperty(GenbankFormat.LOCUS_TAG));
231+
232+
Sequence header1 = readGenbankResource("/empty_header1.gb");
233+
assertEquals("AZZZAA02123456789", header1.getAnnotation().getProperty(GenbankFormat.LOCUS_TAG));
234+
235+
Sequence header2 = readGenbankResource("/empty_header2.gb");
236+
assertEquals("AZZZAA02123456789", header2.getAnnotation().getProperty(GenbankFormat.LOCUS_TAG));
237+
}
238+
204239
public void testProteinReadersAndWriters()
205240
{
206241
/******* test readFastaProtein *********/

core/src/test/java/org/biojavax/bio/seq/io/GenbankFormatTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,27 @@ public void testReadEmptySequence() {
161161
assertEquals(sequence.getInternalSymbolList().length(), 0);
162162
}
163163

164+
public void testNcbiExpandedAccessionFormats() throws Exception
165+
{
166+
RichSequence header0 = readDNAFile("/empty_header0.gb");
167+
assertEquals("CP032762", header0.getName());
168+
assertTrue(header0.getCircular());
169+
assertEquals("BCT", header0.getDivision());
170+
assertEquals("15-OCT-2018", header0.getAnnotation().getProperty(Terms.getDateUpdatedTerm()));
171+
172+
RichSequence header1 = readDNAFile("/empty_header1.gb");
173+
assertEquals("AZZZAA02123456789", header1.getName());
174+
assertFalse(header1.getCircular());
175+
assertEquals("PRI", header1.getDivision());
176+
assertEquals("15-OCT-2018", header1.getAnnotation().getProperty(Terms.getDateUpdatedTerm()));
177+
178+
RichSequence header2 = readDNAFile("/empty_header2.gb");
179+
assertEquals("AZZZAA02123456789", header2.getName());
180+
assertFalse(header2.getCircular());
181+
assertEquals("PRI", header2.getDivision());
182+
assertEquals("15-OCT-2018", header2.getAnnotation().getProperty(Terms.getDateUpdatedTerm()));
183+
}
184+
164185
/**
165186
* Read a genbank file, return a RichSequence
166187
* @param filename name of file to read
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS CP032762 5868661 bp DNA circular BCT 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS AZZZAA02123456789 9999999999 bp DNA linear PRI 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//

0 commit comments

Comments
 (0)