Skip to content

Commit b3b40a7

Browse files
committed
Fix seq bug
1 parent 149cbac commit b3b40a7

File tree

5 files changed

+130
-3
lines changed

5 files changed

+130
-3
lines changed

microhapulator/profile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ def haploindexes(self):
7979
def markers(self):
8080
return set(self.data["markers"])
8181

82+
def loci(self):
83+
return set(m.split(".")[0] for m in self.markers())
84+
8285
def haplotypes(self, markerid, index=None):
8386
if markerid not in self.data["markers"]:
8487
return set()
@@ -176,7 +179,7 @@ def __str__(self):
176179
return json.dumps(self.data, indent=4, sort_keys=True)
177180

178181
def bedstream(self, mhindex):
179-
mhindex.validate(refrids=self.markers(), symmetric=True)
182+
mhindex.validate(refrids=self.loci(), symmetric=True)
180183
for markerid in sorted(self.markers()):
181184
marker = mhindex.markers[markerid]
182185
offsets = marker.offsets_locus
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Marker Offset Chrom OffsetHg38
2+
mh03USC-3qC.v2 47 chr3 196652865
3+
mh03USC-3qC.v2 153 chr3 196652971
4+
mh03USC-3qC.v2 207 chr3 196653025
5+
mh03USC-3qC.v2 216 chr3 196653034
6+
mh03USC-3qC.v2 226 chr3 196653044
7+
mh03USC-3qC.v2 266 chr3 196653084
8+
mh03USC-3qC.v2 303 chr3 196653121
9+
mh04WL-052.v1 54 chr4 2303788
10+
mh04WL-052.v1 61 chr4 2303795
11+
mh04WL-052.v1 99 chr4 2303833
12+
mh04WL-052.v1 105 chr4 2303839
13+
mh04WL-052.v1 172 chr4 2303906
14+
mh04WL-052.v1 185 chr4 2303919
15+
mh04WL-052.v1 210 chr4 2303944
16+
mh04WL-052.v1 272 chr4 2304006
17+
mh04WL-052.v1 296 chr4 2304030
18+
mh06SCUZJ-0528857 92 chr6 73429488
19+
mh06SCUZJ-0528857 157 chr6 73429553
20+
mh06SCUZJ-0528857 167 chr6 73429563
21+
mh06SCUZJ-0528857 200 chr6 73429596
22+
mh06SCUZJ-0528857 226 chr6 73429622
23+
mh06SCUZJ-0528857 233 chr6 73429629
24+
mh06SCUZJ-0528857 258 chr6 73429654
25+
mh17FHL-005.v3 51 chr17 78268164
26+
mh17FHL-005.v3 92 chr17 78268205
27+
mh17FHL-005.v3 200 chr17 78268313
28+
mh17FHL-005.v3 235 chr17 78268348
29+
mh17FHL-005.v3 265 chr17 78268378
30+
mh17FHL-005.v3 299 chr17 78268412
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"markers": {
3+
"mh03USC-3qC.v2": {
4+
"genotype": [
5+
{
6+
"haplotype": "C,T,A,C,C,G,G",
7+
"index": 0
8+
},
9+
{
10+
"haplotype": "G,T,G,T,C,A,G",
11+
"index": 1
12+
}
13+
]
14+
},
15+
"mh04WL-052.v1": {
16+
"genotype": [
17+
{
18+
"haplotype": "A,C,C,G,A,G,T,C,C",
19+
"index": 0
20+
},
21+
{
22+
"haplotype": "G,T,C,A,A,G,C,T,C",
23+
"index": 1
24+
}
25+
]
26+
},
27+
"mh06SCUZJ-0528857": {
28+
"genotype": [
29+
{
30+
"haplotype": "A,A,C,T,G,T,T",
31+
"index": 0
32+
},
33+
{
34+
"haplotype": "G,A,C,T,G,T,C",
35+
"index": 1
36+
}
37+
]
38+
},
39+
"mh17FHL-005.v3": {
40+
"genotype": [
41+
{
42+
"haplotype": "A,G,C,C,T,T",
43+
"index": 0
44+
},
45+
{
46+
"haplotype": "A,G,T,T,T,T",
47+
"index": 1
48+
}
49+
]
50+
}
51+
},
52+
"metadata": {
53+
"HaploSeed": 1278493438
54+
},
55+
"ploidy": 2,
56+
"type": "SimulatedProfile",
57+
"version": "0.8.4+3.g41bc147"
58+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
>mh03USC-3qC GRCh38:chr3:196652818-196653169 mh03USC-3qC.v2=47,153,207,216,226,266,303
2+
CTGGAGTCACAGCAGCAGGTACCACCCCACACCTGACAAGAGGTGGCCAGGGAGGAAGGGAGGGTTCTTACCTCCCCATC
3+
TCCCCTCAGTAAAATTCAGGATGCCCAGTGAAGTTTGAATGTCAGATAAACAATTTGTTAGTATAAGGATGTATCTAGCA
4+
TTGAAATGATGCCTTGTAATTTACTAAATCTGCAACTATGCAGCCTTATTTCATGGCGGGCAGTGGTGGTGATCCCAGGT
5+
TTCAGGGGCGGGGAAGGGTGCTGGGGGGATCCTGAGGTCAGGAACCCGTACACCTCTGCTTCTGCCCTCTCTTCCCTGTG
6+
CCGGCCACAAGGCAATGACTCCTGTGTGGGT
7+
>mh04WL-052 GRCh38:chr4:2303734-2304085 mh04WL-052.v1=54,61,99,105,172,185,210,272,296
8+
CCCCTGGGAATTTGCCCATGGGTGGTCTCAGCACCCCTCATGGCCCCAGTCTCCGCGCCCACGGCCACGGGTCTGCGTCT
9+
CGTTTCAGCCCGGTGTGAGCCAGGCAGCCACAGTGGTCAGCAAAGAGAAGTGGGCAGTGCGCTCGGATGCGGCTCAGGGC
10+
AAGGGGCCCGGCGCTGGCACAGCCCGCTCCCACTGCCCCGCACAGGCACGCGGCTCACACCAGGGGCGGGATGGGCCGGG
11+
GCTGCGGCAGGTGAGCTGAGGGCTTGGCACTGCGCCCATGTCACCTGACAGGCACCCGGAACATCAGAAACAGGGAAGCG
12+
GGCCCGCTGGCGCACATTTTCTGGTGCATGA
13+
>mh06SCUZJ-0528857 GRCh38:chr6:73429396-73429747 mh06SCUZJ-0528857=92,157,167,200,226,233,258
14+
GTTAAATATTTTGTTTCAATAATAATTTGAATCAATTTATTCACTTTTTCTGTACCATCCTTACAGGTTTTATATTAATA
15+
CAAAATGAATTGGGGACCATTTCCTTTATTCTTATTTATGAGAGTTTGTGTAAATATAGAATGATTTCACCTGGGCGCGG
16+
TGGCTCACGCCTGTAATCCCAGCACCTTGGGAGGCCAAGGTGGGCGGATCACAAGGTCGGGAGATCAAGACCATCCTGGC
17+
TAACACAGTGAAACCCTGCCTCTACCAAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAATCCCAGCTAC
18+
TTGGGAGGCTGAGGCAGGAGAATGGCGCGAA
19+
>mh17FHL-005 GRCh38:chr17:78268113-78268464 mh17FHL-005.v3=51,92,200,235,265,299
20+
GGTGCCCAGGTGGGGGGCGGGCGGGTGGTCTTTCTTTTGGGGCAGCATCCCAAAAGAAAGGAAAAACAGCAGGGGGCGTG
21+
GCTCCCCTCACTGCCCCGCAACCATCCCTGCCTGGCCAGGGCCTTTTGGGCAGTGCCACTGGAGATTCCTGGAAGTGCCT
22+
TCGGGCCACACACACACACATCCTCCGGGACCCTCCCAGCCCCCCACTCTGCGCTTCCCGGGTTTGCAGGAGGGCCCCAA
23+
GGCCAGCCGGGAGCCCTGCAGGACCTGGGAGGAACCTGAGCCTGAAGGGTCTAGGAGCCCGCCGTGTCTGCTCTCAAGTG
24+
CCCCGCGTTCCCTTGGCCTGATCCCCGATCC

microhapulator/tests/test_seq.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_proportions(n, totalreads, prop, result):
3636

3737
def test_proportions_failure_modes():
3838
message = r"mismatch between contributor number and proportions"
39-
with pytest.raises(ValueError, match=message) as ve:
39+
with pytest.raises(ValueError, match=message):
4040
mhapi.calc_n_reads_from_proportions(3, 1000, [0.6, 0.4])
4141

4242

@@ -250,6 +250,18 @@ def test_main_out_three_filenames(capsys):
250250
data_file("prof/orange-sim-profile.json"),
251251
]
252252
with pytest.raises(SystemExit):
253-
args = microhapulator.cli.get_parser().parse_args(arglist)
253+
_ = microhapulator.cli.get_parser().parse_args(arglist)
254254
terminal = capsys.readouterr()
255255
assert "expected 1 or 2 output filenames, got 3" in terminal.err
256+
257+
258+
def test_regression_seq_locus_names_in_refr():
259+
profile = Profile(fromfile=data_file("prof/mwg4-sim.json"))
260+
index = MicrohapIndex.from_files(
261+
data_file("def/mwg4-offsets.tsv"), data_file("refr/mwg4-refr.fasta")
262+
)
263+
sequencer = mhapi.seq([profile], index, totalreads=1000)
264+
for n, read1, read2 in sequencer:
265+
pass
266+
numfragments = n * 2
267+
assert numfragments == pytest.approx(1000, abs=50)

0 commit comments

Comments
 (0)