Skip to content

Commit 3228cd7

Browse files
committed
Add test cases for parser
1 parent 547be99 commit 3228cd7

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

src/test/groovy/com/antigenomics/migmap/blast/BlastParserTest.groovy

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,77 @@ class BlastParserTest {
333333
assert !mapping.complete
334334
}
335335

336+
@Test
337+
void parserCase1Test() {
338+
def segmentDatabase = new SegmentDatabase("data/", "human", ["IGK"])
339+
def parser = new BlastParser(segmentDatabase)
340+
341+
def chunk = "# IGBLASTN 2.2.29+\n" +
342+
"# Query: @MIG UMI:TACCGCCGCTTGT:5\n" +
343+
"# Database: /Users/mikesh/Programming/higblast/data/database-46f68e99-c1e5-45cc-b3cd-0934b850d3d4/v /Users/mikesh/Programming/higblast/data/database-46f68e99-c1e5-45cc-b3cd-0934b850d3d4/d /Users/mikesh/Programming/higblast/data/database-46f68e99-c1e5-45cc-b3cd-0934b850d3d4/j\n" +
344+
"# Domain classification requested: imgt\n" +
345+
"\n" +
346+
"# V-(D)-J rearrangement summary for query sequence (Top V gene match, Top D gene match, Top J gene match, Chain type, stop codon, V-J frame, Productive, Strand). Multiple equivalent top matches having the same score and percent identity, if present, are separated by a comma.\n" +
347+
"IGKV4-1*01\tN/A\tN/A\tVH\tNo\tN/A\tN/A\t+\n" +
348+
"\n" +
349+
"# V-(D)-J junction details based on top germline gene matches (V end, V-D junction, D region, D-J junction, J start). Note that possible overlapping nucleotides at VDJ junction (i.e, nucleotides that could be assigned to either rearranging gene) are indicated in parentheses (i.e., (TACT)) but are not included under the V, D, or J gene itself\n" +
350+
"ACTGT\tN/A\tN/A\tN/A\tN/A\t\n" +
351+
"\n" +
352+
"# Alignment summary between query and top germline V gene hit (from, to, length, matches, mismatches, gaps, percent identity)\n" +
353+
"FR3-IMGT\t382\t424\t43\t33\t10\t0\t76.7\n" +
354+
"Total\tN/A\tN/A\t43\t33\t10\t0\t76.7\n" +
355+
"\n" +
356+
"# Hit table (the first field indicates the chain type of the hit)\n" +
357+
"# Fields: subject id, q. start, query seq, s. start, subject seq\n" +
358+
"# 3 hits found\n" +
359+
"V\tIGKV4-1*01\t382\tCATGAGCAGCCTGAGAGCCGAAGACACGGCCGTATATTACTGT\t240\tCATCAGCAGCCTGCAGGCTGAAGATGTGGCAGTTTATTACTGT\n" +
360+
"V\tIGKV6D-41*01\t382\tCATGAGCAGCCTGAGAGCCGAAGACACGGCCGTATATTACTGT\t222\tCATCAGTAGCCTGGAAGCTGAAGATGCTGCAACATATTACTGT\n" +
361+
"V\tIGKV3D-7*01\t382\tCATGAGCAGCCTGAGAGCCGAAGACACGGCCGTATATTACTGT\t225\tCATCAGCAGCCTGCAGCCTGAAGATTTTGCAGTTTATTACTGT"
362+
363+
def mapping = parser.parse(chunk)
364+
365+
assert mapping.vSegment.name == "IGKV4-1*01"
366+
assert mapping.dSegment.name == "."
367+
assert mapping.jSegment.name == "."
368+
}
369+
370+
@Test
371+
void parserCase2Test() {
372+
def segmentDatabase = new SegmentDatabase("data/", "human", ["TRA"])
373+
def parser = new BlastParser(segmentDatabase)
374+
375+
def chunk = "# Query: @MIG UMI:TAACAATCTGAAC:11\n" +
376+
"# Database: /Users/mikesh/Programming/higblast/data/database-ba5de9d1-adfb-4cd7-b514-f52f28ab614e/v /Users/mikesh/Programming/higblast/data/database-ba5de9d1-adfb-4cd7-b514-f52f28ab614e/d /Users/mikesh/Programming/higblast/data/database-ba5de9d1-adfb-4cd7-b514-f52f28ab614e/j\n" +
377+
"# Domain classification requested: imgt\n" +
378+
"\n" +
379+
"# V-(D)-J rearrangement summary for query sequence (Top V gene match, Top D gene match, Top J gene match, Chain type, stop codon, V-J frame, Productive, Strand). Multiple equivalent top matches having the same score and percent identity, if present, are separated by a comma.\n" +
380+
"TRAV8-6*02,TRAV8-6*01\t.,.,.\tN/A\tVB\tNo\tN/A\tN/A\t+\n" +
381+
"\n" +
382+
"# V-(D)-J junction details based on top germline gene matches (V end, V-D junction, D region, D-J junction, J start). Note that possible overlapping nucleotides at VDJ junction (i.e, nucleotides that could be assigned to either rearranging gene) are indicated in parentheses (i.e., (TACT)) but are not included under the V, D, or J gene itself\n" +
383+
"TGTGC\tGAGACTGATTAGGGACGA\tTTTTT\tN/A\tN/A\t\n" +
384+
"\n" +
385+
"# Alignment summary between query and top germline V gene hit (from, to, length, matches, mismatches, gaps, percent identity)\n" +
386+
"FR3-IMGT\t365\t385\t21\t18\t3\t0\t85.7\n" +
387+
"CDR3-IMGT (germline)\t386\t387\t2\t2\t0\t0\t100\n" +
388+
"Total\tN/A\tN/A\t23\t20\t3\t0\t87\n" +
389+
"\n" +
390+
"# Hit table (the first field indicates the chain type of the hit)\n" +
391+
"# Fields: subject id, q. start, query seq, s. start, subject seq\n" +
392+
"# 6 hits found\n" +
393+
"V\tTRAV8-6*02\t365\tGACACGGCTGTGTATTACTGTGC\t253\tGACACGGCTGAGTACTTCTGTGC\n" +
394+
"V\tTRAV8-6*01\t365\tGACACGGCTGTGTATTACTGTGC\t253\tGACACGGCTGAGTACTTCTGTGC\n" +
395+
"V\tTRAV16*01\t364\tAGACACGGCTGTGTATTACTGTGC\t240\tAGACTCAGCCATGTATTACTGTGC\n" +
396+
"D\t.\t406\tTTTTT\t25\tTTTTT\n" +
397+
"D\t.\t406\tTTTTT\t24\tTTTTT\n" +
398+
"D\t.\t406\tTTTTT\t23\tTTTTT"
399+
400+
def mapping = parser.parse(chunk)
401+
402+
assert mapping.vSegment.name == "TRAV8-6*01"
403+
assert mapping.dSegment.name == "."
404+
assert mapping.jSegment.name == "."
405+
}
406+
336407
@AfterClass
337408
static void tearDown() {
338409
SegmentDatabase.clearTemporaryFiles()

0 commit comments

Comments
 (0)