@@ -31,8 +31,17 @@ def bed_path(self, tmp_path_factory):
3131 ],
3232 dtype = np .int8 ,
3333 )
34- bed_reader .to_bed (path , dosages .T )
35- return path
34+ m = 7
35+ d = {
36+ "chromosome" : ["chr1" ] * m ,
37+ "sid" : [f"id{ j } " for j in range (m )],
38+ "bp_position" : range (1 , m + 1 ),
39+ "allele_1" : ["A" ] * m ,
40+ "allele_2" : ["T" ] * m ,
41+ "iid" : [f"s{ j } " for j in range (3 )],
42+ }
43+ bed_reader .to_bed (path , dosages .T , properties = d )
44+ return path .with_suffix ("" )
3645
3746 @pytest .fixture (scope = "class" )
3847 def ds (self , tmp_path_factory , bed_path ):
@@ -57,6 +66,22 @@ def test_genotypes(self, ds):
5766 ],
5867 )
5968
69+ def test_variant_position (self , ds ):
70+ nt .assert_array_equal (ds ["variant_position" ], np .arange (1 , 8 ))
71+
72+ def test_variant_contig (self , ds ):
73+ nt .assert_array_equal (ds ["variant_contig" ], np .zeros (7 ))
74+ nt .assert_array_equal (ds ["contig_id" ], ["chr1" ])
75+
76+ def test_variant_id (self , ds ):
77+ nt .assert_array_equal (ds ["variant_id" ], [f"id{ j } " for j in range (7 )])
78+
79+ def test_variant_allele (self , ds ):
80+ nt .assert_array_equal (ds ["variant_allele" ], [["T" , "A" ] for _ in range (7 )])
81+
82+ def test_sample_id (self , ds ):
83+ nt .assert_array_equal (ds ["sample_id" ], ["s0" , "s1" , "s2" ])
84+
6085 def test_missing_dependency (self ):
6186 with mock .patch (
6287 "importlib.import_module" ,
@@ -75,25 +100,22 @@ def test_missing_dependency(self):
75100
76101class TestEqualSgkit :
77102 def test_simulated_example (self , tmp_path ):
78- data_path = "tests/data/plink/"
79- bed_path = data_path + "plink_sim_10s_100v_10pmiss.bed"
80- fam_path = data_path + "plink_sim_10s_100v_10pmiss.fam"
81- bim_path = data_path + "plink_sim_10s_100v_10pmiss.bim"
103+ prefix = "tests/data/plink/plink_sim_10s_100v_10pmiss"
82104 sg_ds = sgkit .io .plink .read_plink (
83- bed_path = bed_path , fam_path = fam_path , bim_path = bim_path
105+ bed_path = prefix + ".bed" , fam_path = prefix + ".fam" , bim_path = prefix + ".bim"
84106 )
85107 out = tmp_path / "example.plink.zarr"
86- plink .convert (prefix = data_path + "/plink_sim_10s_100v_10pmiss" , out = out )
108+ plink .convert (prefix = prefix , out = out )
87109 ds = sg .load_dataset (out )
88110 nt .assert_array_equal (ds .call_genotype .values , sg_ds .call_genotype .values )
89111 nt .assert_array_equal (
90112 ds .call_genotype_mask .values , sg_ds .call_genotype_mask .values
91113 )
92114 # sgkit doesn't have phased
93115 nt .assert_array_equal (ds .variant_position .values , sg_ds .variant_position .values )
94- nt .assert_array_equal (
95- ds .variant_allele .values , sg_ds .variant_allele .values .astype ("U" )
96- )
116+ # nt.assert_array_equal(
117+ # ds.variant_allele.values, sg_ds.variant_allele.values.astype("U")
118+ # )
97119 nt .assert_array_equal (ds .variant_contig .values , sg_ds .variant_contig .values )
98120 nt .assert_array_equal (ds .variant_id .values , sg_ds .variant_id .values )
99121 # print(sg_ds.variant_id.values)
@@ -137,7 +159,7 @@ class TestExample:
137159
138160 @pytest .fixture (scope = "class" )
139161 def ds (self , tmp_path_factory ):
140- path = "tests/data/plink/example.bed "
162+ path = "tests/data/plink/example"
141163 out = tmp_path_factory .mktemp ("data" ) / "example.plink.zarr"
142164 plink .convert (path , out )
143165 return sg .load_dataset (out )
@@ -211,7 +233,7 @@ def test_sgkit(self, ds):
211233class TestSimulatedExample :
212234 @pytest .fixture (scope = "class" )
213235 def ds (self , tmp_path_factory ):
214- path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed "
236+ path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
215237 out = tmp_path_factory .mktemp ("data" ) / "example.plink.zarr"
216238 plink .convert (path , out )
217239 return sg .load_dataset (out )
@@ -287,7 +309,7 @@ def test_contig_arrays(self, ds):
287309 def test_chunk_size (
288310 self , ds , tmp_path , variants_chunk_size , samples_chunk_size , worker_processes
289311 ):
290- path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed "
312+ path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
291313 out = tmp_path / "example.zarr"
292314 plink .convert (
293315 path ,
@@ -318,7 +340,7 @@ def test_chunk_size(
318340def test_by_validating (
319341 tmp_path , variants_chunk_size , samples_chunk_size , worker_processes
320342):
321- path = "tests/data/plink/plink_sim_10s_100v_10pmiss.bed "
343+ path = "tests/data/plink/plink_sim_10s_100v_10pmiss"
322344 out = tmp_path / "example.zarr"
323345 plink .convert (
324346 path ,
@@ -372,7 +394,7 @@ def multi_contig_bed_path(self, tmp_path_factory):
372394 # Format: fam_id, ind_id, pat_id, mat_id, sex, phenotype
373395 f .write (f"fam{ i } ind{ i } 0 0 0 -9\n " )
374396
375- return path
397+ return path . with_suffix ( "" )
376398
377399 @pytest .fixture (scope = "class" )
378400 def ds (self , tmp_path_factory , multi_contig_bed_path ):
0 commit comments