@@ -23,11 +23,11 @@ def test_simple_tree_sequence(self, tmp_path):
23
23
tables .edges .add_row (left = 0 , right = 100 , parent = 5 , child = 2 )
24
24
tables .edges .add_row (left = 0 , right = 100 , parent = 5 , child = 3 )
25
25
site_id = tables .sites .add_row (position = 10 , ancestral_state = "A" )
26
- tables .mutations .add_row (site = site_id , node = 4 , derived_state = "T " )
27
- site_id = tables .sites .add_row (position = 20 , ancestral_state = "C " )
26
+ tables .mutations .add_row (site = site_id , node = 4 , derived_state = "TTTT " )
27
+ site_id = tables .sites .add_row (position = 20 , ancestral_state = "CCC " )
28
28
tables .mutations .add_row (site = site_id , node = 5 , derived_state = "G" )
29
29
site_id = tables .sites .add_row (position = 30 , ancestral_state = "G" )
30
- tables .mutations .add_row (site = site_id , node = 0 , derived_state = "A " )
30
+ tables .mutations .add_row (site = site_id , node = 0 , derived_state = "AA " )
31
31
tables .sort ()
32
32
tree_sequence = tables .tree_sequence ()
33
33
tree_sequence .dump (tmp_path / "test.trees" )
@@ -53,7 +53,12 @@ def test_simple_tree_sequence(self, tmp_path):
53
53
alleles = zroot ["variant_allele" ][:]
54
54
assert alleles .shape == (3 , 2 )
55
55
assert alleles .dtype == "O"
56
- assert np .array_equal (alleles , [["A" , "T" ], ["C" , "G" ], ["G" , "A" ]])
56
+ assert np .array_equal (alleles , [["A" , "TTTT" ], ["CCC" , "G" ], ["G" , "AA" ]])
57
+
58
+ lengths = zroot ["variant_length" ][:]
59
+ assert lengths .shape == (3 ,)
60
+ assert lengths .dtype == np .int8
61
+ assert np .array_equal (lengths , [4 , 3 , 2 ])
57
62
58
63
genotypes = zroot ["call_genotype" ][:]
59
64
assert genotypes .shape == (3 , 2 , 2 )
@@ -64,7 +69,7 @@ def test_simple_tree_sequence(self, tmp_path):
64
69
65
70
phased = zroot ["call_genotype_phased" ][:]
66
71
assert phased .shape == (3 , 2 )
67
- assert phased .dtype == np . bool
72
+ assert phased .dtype == " bool"
68
73
assert np .all (phased )
69
74
70
75
contigs = zroot ["contig_id" ][:]
@@ -82,15 +87,22 @@ def test_simple_tree_sequence(self, tmp_path):
82
87
assert samples .dtype == "O"
83
88
assert np .array_equal (samples , ["tsk_0" , "tsk_1" ])
84
89
90
+ region_index = zroot ["region_index" ][:]
91
+ assert region_index .shape == (1 ,6 )
92
+ assert region_index .dtype == np .int8
93
+ assert np .array_equal (region_index , [[ 0 , 0 , 10 , 30 , 31 , 3 ]])
94
+
85
95
assert set (zroot .array_keys ()) == {
86
96
"variant_position" ,
87
97
"variant_allele" ,
98
+ "variant_length" ,
88
99
"call_genotype" ,
89
100
"call_genotype_phased" ,
90
101
"call_genotype_mask" ,
91
102
"contig_id" ,
92
103
"variant_contig" ,
93
104
"sample_id" ,
105
+ "region_index" ,
94
106
}
95
107
96
108
@@ -113,8 +125,8 @@ def simple_ts(self, tmp_path):
113
125
tables .edges .add_row (left = 0 , right = 100 , parent = 5 , child = 2 )
114
126
tables .edges .add_row (left = 0 , right = 100 , parent = 5 , child = 3 )
115
127
site_id = tables .sites .add_row (position = 10 , ancestral_state = "A" )
116
- tables .mutations .add_row (site = site_id , node = 4 , derived_state = "T " )
117
- site_id = tables .sites .add_row (position = 20 , ancestral_state = "C " )
128
+ tables .mutations .add_row (site = site_id , node = 4 , derived_state = "TT " )
129
+ site_id = tables .sites .add_row (position = 20 , ancestral_state = "CCC " )
118
130
tables .mutations .add_row (site = site_id , node = 5 , derived_state = "G" )
119
131
site_id = tables .sites .add_row (position = 30 , ancestral_state = "G" )
120
132
tables .mutations .add_row (site = site_id , node = 0 , derived_state = "A" )
@@ -248,6 +260,7 @@ def test_schema_generation(self, simple_ts):
248
260
field_names = [field .name for field in schema .fields ]
249
261
assert "variant_position" in field_names
250
262
assert "variant_allele" in field_names
263
+ assert "variant_length" in field_names
251
264
assert "variant_contig" in field_names
252
265
assert "call_genotype" in field_names
253
266
assert "call_genotype_phased" in field_names
@@ -319,18 +332,22 @@ def test_iter_alleles_and_genotypes(self, simple_ts, ind_nodes, expected_gts):
319
332
320
333
assert len (results ) == 3
321
334
322
- for i , ( alleles , ( gt , phased )) in enumerate (results ):
335
+ for i , variant_data in enumerate (results ):
323
336
if i == 0 :
324
- assert tuple (alleles ) == ("A" , "T" )
337
+ assert variant_data .variant_length == 2
338
+ assert np .array_equal (variant_data .alleles , ("A" , "TT" ))
325
339
elif i == 1 :
326
- assert tuple (alleles ) == ("C" , "G" )
340
+ assert variant_data .variant_length == 3
341
+ assert np .array_equal (variant_data .alleles , ("CCC" , "G" ))
327
342
elif i == 2 :
328
- assert tuple (alleles ) == ("G" , "A" )
343
+ assert variant_data .variant_length == 1
344
+ assert np .array_equal (variant_data .alleles , ("G" , "A" ))
329
345
330
346
assert np .array_equal (
331
- gt , expected_gts [i ]
332
- ), f"Mismatch at variant { i } , expected { expected_gts [i ]} , got { gt } "
333
- assert np .all (phased )
347
+ variant_data .genotypes , expected_gts [i ]
348
+ ), f"Mismatch at variant { i } , expected { expected_gts [i ]} , "
349
+ f"got { variant_data .genotypes } "
350
+ assert np .all (variant_data .phased )
334
351
335
352
def test_iter_alleles_and_genotypes_errors (self , simple_ts ):
336
353
"""Test error cases for iter_alleles_and_genotypes with invalid inputs."""
@@ -398,12 +415,12 @@ def insert_branch_sites(ts, m=1):
398
415
)
399
416
400
417
assert len (results_default ) == 1
401
- alleles , ( gt_default , phased ) = results_default [0 ]
402
- assert tuple ( alleles ) == ("0" , "1" )
418
+ variant_data_default = results_default [0 ]
419
+ assert np . array_equal ( variant_data_default . alleles , ("0" , "1" ) )
403
420
404
421
# Sample 2 should have the ancestral state (0) when isolated_as_missing=False
405
422
expected_gt_default = np .array ([[1 ], [0 ], [0 ]])
406
- assert np .array_equal (gt_default , expected_gt_default )
423
+ assert np .array_equal (variant_data_default . genotypes , expected_gt_default )
407
424
408
425
format_obj_missing = ts .TskitFormat (
409
426
ts_path , individuals_nodes = ind_nodes , isolated_as_missing = True
@@ -413,12 +430,13 @@ def insert_branch_sites(ts, m=1):
413
430
)
414
431
415
432
assert len (results_missing ) == 1
416
- alleles , (gt_missing , phased ) = results_missing [0 ]
417
- assert tuple (alleles ) == ("0" , "1" )
433
+ variant_data_missing = results_missing [0 ]
434
+ assert variant_data_missing .variant_length == 1
435
+ assert np .array_equal (variant_data_missing .alleles , ("0" , "1" ))
418
436
419
437
# Individual 2 should have missing values (-1) when isolated_as_missing=True
420
438
expected_gt_missing = np .array ([[1 ], [0 ], [- 1 ]])
421
- assert np .array_equal (gt_missing , expected_gt_missing )
439
+ assert np .array_equal (variant_data_missing . genotypes , expected_gt_missing )
422
440
423
441
def test_genotype_dtype_selection (self , tmp_path ):
424
442
tables = tskit .TableCollection (sequence_length = 100 )
0 commit comments