Skip to content

Commit 25b032a

Browse files
Fix tests and add round-trip GFF<->JSON tests
1 parent f441883 commit 25b032a

12 files changed

+221
-184
lines changed

packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,38 @@
11
/* eslint-disable prefer-destructuring */
22
/* eslint-disable @typescript-eslint/no-floating-promises */
3+
import { readFileSync, writeFileSync } from 'node:fs'
34
import { describe, it } from 'node:test'
45

56
import { type AnnotationFeatureSnapshot } from '@apollo-annotation/mst'
6-
import { assert } from 'chai'
7+
import { formatSync } from '@gmod/gff'
8+
import { assert, expect } from 'chai'
79

810
import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3'
911
import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test'
12+
import { testCases } from './testUtil'
13+
14+
describe('Converts AnnotationFeatureSnapshot JSON to GFF3 when', () => {
15+
for (const testCase of testCases) {
16+
const { filenameStem, description } = testCase
17+
it(description, () => {
18+
const annotationFeatures = JSON.parse(
19+
readFileSync(`test_data/${filenameStem}.json`, 'utf8'),
20+
) as AnnotationFeatureSnapshot[]
21+
const expectedGFF3 = readFileSync(
22+
`test_data/${filenameStem}.gff3`,
23+
'utf8',
24+
)
25+
const gffFeatures = annotationFeatures.map((annotationFeature) =>
26+
annotationFeatureToGFF3(annotationFeature),
27+
)
28+
const gff3 = formatSync(gffFeatures)
29+
if (gff3 !== expectedGFF3) {
30+
writeFileSync(`test_data/${filenameStem}.tmp.gff3`, gff3)
31+
}
32+
expect(gff3).to.equal(expectedGFF3)
33+
})
34+
}
35+
})
1036

1137
describe('annotationFeatureToGFF3', () => {
1238
it('Test mandatory columns', () => {

packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts

Lines changed: 29 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* eslint-disable @typescript-eslint/no-floating-promises */
2-
import { readFileSync } from 'node:fs'
2+
import { readFileSync, writeFileSync } from 'node:fs'
33
import { describe, it } from 'node:test'
44

55
import { type AnnotationFeatureSnapshot } from '@apollo-annotation/mst'
@@ -8,77 +8,10 @@ import { assert, use } from 'chai'
88
import chaiExclude from 'chai-exclude'
99

1010
import { gff3ToAnnotationFeature } from './gff3ToAnnotationFeature'
11+
import { testCases } from './testUtil'
1112

1213
use(chaiExclude)
1314

14-
const testCases: [string, string, AnnotationFeatureSnapshot][] = [
15-
[
16-
'a feature with no children',
17-
'ctgA example remark 1000 2000 . . . Name=Remark:hga;Alias=hga\n',
18-
{
19-
_id: '66c51f3e002c683eaf98a223',
20-
refSeq: 'ctgA',
21-
type: 'remark',
22-
min: 999,
23-
max: 2000,
24-
attributes: {
25-
gff_source: ['example'],
26-
gff_name: ['Remark:hga'],
27-
gff_alias: ['hga'],
28-
},
29-
},
30-
],
31-
[
32-
'a feature with two children',
33-
`ctgA est EST_match 1050 3202 . + . ID=Match1;Name=agt830.5;Target=agt830.5 1 654
34-
ctgA est match_part 1050 1500 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 1 451
35-
ctgA est match_part 3000 3202 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 452 654
36-
`,
37-
{
38-
_id: '66cf9fbb4e947fa2c27d3d6a',
39-
featureId: 'Match1',
40-
refSeq: 'ctgA',
41-
type: 'EST_match',
42-
min: 1049,
43-
max: 3202,
44-
strand: 1,
45-
children: {
46-
'66cf9fbb4e947fa2c27d3d68': {
47-
_id: '66cf9fbb4e947fa2c27d3d68',
48-
refSeq: 'ctgA',
49-
type: 'match_part',
50-
min: 1049,
51-
max: 1500,
52-
strand: 1,
53-
attributes: {
54-
gff_source: ['est'],
55-
gff_name: ['agt830.5'],
56-
gff_target: ['agt830.5 1 451'],
57-
},
58-
},
59-
'66cf9fbb4e947fa2c27d3d69': {
60-
_id: '66cf9fbb4e947fa2c27d3d69',
61-
refSeq: 'ctgA',
62-
type: 'match_part',
63-
min: 2999,
64-
max: 3202,
65-
strand: 1,
66-
attributes: {
67-
gff_source: ['est'],
68-
gff_name: ['agt830.5'],
69-
gff_target: ['agt830.5 452 654'],
70-
},
71-
},
72-
},
73-
attributes: {
74-
gff_source: ['est'],
75-
gff_name: ['agt830.5'],
76-
gff_target: ['agt830.5 1 654'],
77-
},
78-
},
79-
],
80-
]
81-
8215
interface AnnotationFeatureSnapshotWithChildrenArray
8316
extends Omit<AnnotationFeatureSnapshot, 'children'> {
8417
children?: AnnotationFeatureSnapshotWithChildrenArray[]
@@ -108,6 +41,33 @@ function compareFeatures(
10841
)
10942
}
11043

44+
describe('Converts GFF3 to AnnotationFeatureSnapshot JSON when', () => {
45+
for (const testCase of testCases) {
46+
const { filenameStem, description } = testCase
47+
it(description, () => {
48+
const fileText = readFileSync(`test_data/${filenameStem}.gff3`, 'utf8')
49+
const gffFeatures = parseStringSync(fileText, { parseSequences: false })
50+
const annotationFeatures = gffFeatures.map((gff3Feature) =>
51+
gff3ToAnnotationFeature(gff3Feature),
52+
)
53+
const annotationFeaturesExpected = JSON.parse(
54+
readFileSync(`test_data/${filenameStem}.json`, 'utf8'),
55+
) as AnnotationFeatureSnapshot[]
56+
writeFileSync(
57+
`test_data/${filenameStem}.tmp.json`,
58+
JSON.stringify(annotationFeatures, null, 2),
59+
)
60+
for (const [
61+
i,
62+
annotationFeatureExpected,
63+
] of annotationFeaturesExpected.entries()) {
64+
const annotationFeature = annotationFeatures[i]
65+
compareFeatures(annotationFeature, annotationFeatureExpected)
66+
}
67+
})
68+
}
69+
})
70+
11171
function readFeatureFile(fn: string): GFF3Feature[] {
11272
const lines = readFileSync(fn).toString().split('\n')
11373
const feature: string[] = []
@@ -139,13 +99,6 @@ describe('gff3ToAnnotationFeature examples', () => {
13999
const expected = readAnnotationFeatureSnapshot('test_data/one_cds.json')
140100
compareFeatures(actual, expected)
141101
})
142-
it('Convert two CDSs', () => {
143-
const actual = gff3ToAnnotationFeature(
144-
readFeatureFile('test_data/two_cds.gff3')[0],
145-
)
146-
const expected = readAnnotationFeatureSnapshot('test_data/two_cds.json')
147-
compareFeatures(actual, expected)
148-
})
149102
it('Convert example 1', () => {
150103
const actual = gff3ToAnnotationFeature(ex1)
151104
const txt = JSON.stringify(actual, null, 2)
@@ -228,19 +181,6 @@ describe('CDS without exons', () => {
228181
})
229182
})
230183

231-
describe('gff3ToAnnotationFeature', () => {
232-
for (const testCase of testCases) {
233-
const [description, featureLine, convertedFeature] = testCase
234-
it(`converts ${description}`, () => {
235-
const gff3Feature = parseStringSync(featureLine, {
236-
parseSequences: false,
237-
})
238-
const feature = gff3ToAnnotationFeature(gff3Feature[0])
239-
compareFeatures(convertedFeature, feature)
240-
})
241-
}
242-
})
243-
244184
describe('Source and score', () => {
245185
it('Convert score and source', () => {
246186
const gffFeature: GFF3Feature = [
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
export const testCases: { filenameStem: string; description: string }[] = [
2+
{
3+
filenameStem: 'single_feature_no_children',
4+
description: 'there is a single feature with no children',
5+
},
6+
{
7+
filenameStem: 'single_feature_two_children',
8+
description: 'there is a single feature with two children',
9+
},
10+
{
11+
filenameStem: 'gene_with_two_cds',
12+
description: 'Gene with two CDS',
13+
},
14+
]

packages/apollo-shared/test_data/gene.json

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
"max": 9000,
77
"strand": 1,
88
"attributes": {
9-
"gff_id": ["gene10001"],
109
"gff_name": ["EDEN"],
1110
"gff_score": ["123"],
1211
"gff_source": ["test_data"],
@@ -31,7 +30,6 @@
3130
"max": 9000,
3231
"strand": 1,
3332
"attributes": {
34-
"gff_id": ["mRNA10001"],
3533
"gff_name": ["EDEN.1"],
3634
"testid": ["t004", "t001", "t004"]
3735
},
@@ -44,9 +42,9 @@
4442
"max": 1500,
4543
"strand": 1,
4644
"attributes": {
47-
"gff_id": ["exon10001"],
4845
"testid": ["t007"]
49-
}
46+
},
47+
"featureId": "exon10001"
5048
},
5149
"66d70e4ccc30b55b65e5f616": {
5250
"_id": "66d70e4ccc30b55b65e5f616",
@@ -56,7 +54,6 @@
5654
"max": 5500,
5755
"strand": 1,
5856
"attributes": {
59-
"gff_id": ["exon10004"],
6057
"testid": ["t010"]
6158
},
6259
"children": {
@@ -67,11 +64,10 @@
6764
"min": 5300,
6865
"max": 5400,
6966
"strand": 1,
70-
"attributes": {
71-
"gff_id": ["exon_region10001"]
72-
}
67+
"featureId": "exon_region10001"
7368
}
74-
}
69+
},
70+
"featureId": "exon10004"
7571
},
7672
"66d70e4ccc30b55b65e5f617": {
7773
"_id": "66d70e4ccc30b55b65e5f617",
@@ -81,7 +77,6 @@
8177
"max": 5100,
8278
"strand": 1,
8379
"attributes": {
84-
"gff_id": ["cds10001"],
8580
"gff_name": ["edenprotein.1"],
8681
"testid": ["t012", "t013", "t014", "t015"]
8782
},
@@ -93,11 +88,10 @@
9388
"min": "1350",
9489
"max": "1400",
9590
"strand": 1,
96-
"attributes": {
97-
"gff_id": ["cds_region10001"]
98-
}
91+
"featureId": "cds_region10001"
9992
}
100-
}
93+
},
94+
"featureId": "cds10001"
10195
},
10296
"66e049f17b9cedae9ad89106": {
10397
"_id": "66e049f17b9cedae9ad89106",
@@ -107,11 +101,13 @@
107101
"max": 5200,
108102
"strand": 1,
109103
"attributes": {
110-
"gff_id": ["cds10004"],
111104
"gff_name": ["edenprotein.4"]
112-
}
105+
},
106+
"featureId": "cds10004"
113107
}
114-
}
108+
},
109+
"featureId": "mRNA10001"
115110
}
116-
}
111+
},
112+
"featureId": "gene10001"
117113
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
chr1 . gene 1000 9000 . + . testid=t003;ID=gene10001;Name=EDEN
2+
chr1 . mRNA 1050 9000 . + . testid=t004,t001,t004;Parent=gene10001;ID=mRNA10001;Name=EDEN.1
3+
chr1 . exon 1050 1500 . + . testid=t007;Parent=mRNA10001;ID=exon10001
4+
chr1 . exon 5000 5500 . + . testid=t010;Parent=mRNA10001;ID=exon10004
5+
chr1 . CDS 1201 1500 . + 0 testid=t012,t013,t014;Parent=mRNA10001;ID=cds10001;Name=edenprotein.1
6+
chr1 . CDS 5000 5000 . + 0 testid=t012,t013,t014;Parent=mRNA10001;ID=cds10001;Name=edenprotein.1
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
[
2+
{
3+
"_id": "66d70f3b9c7a7460925687a3",
4+
"refSeq": "chr1",
5+
"type": "gene",
6+
"min": 999,
7+
"max": 9000,
8+
"strand": 1,
9+
"children": {
10+
"66d70f3b9c7a7460925687a2": {
11+
"_id": "66d70f3b9c7a7460925687a2",
12+
"refSeq": "chr1",
13+
"type": "mRNA",
14+
"min": 1049,
15+
"max": 9000,
16+
"strand": 1,
17+
"children": {
18+
"66d70f3b9c7a74609256879f": {
19+
"_id": "66d70f3b9c7a74609256879f",
20+
"refSeq": "chr1",
21+
"type": "exon",
22+
"min": 1049,
23+
"max": 1500,
24+
"strand": 1,
25+
"attributes": {
26+
"testid": ["t007"]
27+
},
28+
"featureId": "exon10001"
29+
},
30+
"66d70f3b9c7a7460925687a0": {
31+
"_id": "66d70f3b9c7a7460925687a0",
32+
"refSeq": "chr1",
33+
"type": "exon",
34+
"min": 4999,
35+
"max": 5500,
36+
"strand": 1,
37+
"attributes": {
38+
"testid": ["t010"]
39+
},
40+
"featureId": "exon10004"
41+
},
42+
"66d70f3b9c7a7460925687a1": {
43+
"_id": "66d70f3b9c7a7460925687a1",
44+
"refSeq": "chr1",
45+
"type": "CDS",
46+
"min": 1200,
47+
"max": 5000,
48+
"strand": 1,
49+
"attributes": {
50+
"gff_name": ["edenprotein.1"],
51+
"testid": ["t012", "t013", "t014"]
52+
},
53+
"featureId": "cds10001"
54+
}
55+
},
56+
"attributes": {
57+
"gff_name": ["EDEN.1"],
58+
"testid": ["t004", "t001", "t004"]
59+
},
60+
"featureId": "mRNA10001"
61+
}
62+
},
63+
"attributes": {
64+
"gff_name": ["EDEN"],
65+
"testid": ["t003"]
66+
},
67+
"featureId": "gene10001"
68+
}
69+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ctgA example remark 1000 2000 . . . Name=Remark:hga;Alias=hga
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[
2+
{
3+
"_id": "6931ef3bf5c33f70085c3a7b",
4+
"refSeq": "ctgA",
5+
"type": "remark",
6+
"min": 999,
7+
"max": 2000,
8+
"attributes": {
9+
"gff_source": ["example"],
10+
"gff_name": ["Remark:hga"],
11+
"gff_alias": ["hga"]
12+
}
13+
}
14+
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ctgA est EST_match 1050 3202 . + . ID=Match1;Name=agt830.5;Target=agt830.5 1 654
2+
ctgA est match_part 1050 1500 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 1 451
3+
ctgA est match_part 3000 3202 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 452 654

0 commit comments

Comments
 (0)