Skip to content

Commit 033abb4

Browse files
Merge pull request #503 from jeromekelleher/fasta-alignments
Adds write_fasta method to Dataset
2 parents 2335ab2 + 823f041 commit 033abb4

File tree

2 files changed

+76
-0
lines changed

2 files changed

+76
-0
lines changed

sc2ts/dataset.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,21 @@ def variants(self, sample_id=None, position=None):
289289
variant_alleles[j],
290290
)
291291

292+
def write_fasta(self, out, sample_id=None):
293+
"""
294+
Writes the alignment data in FASTA format to the specified file.
295+
"""
296+
if sample_id is None:
297+
sample_id = self.sample_id
298+
299+
for sid in sample_id:
300+
h = self.haplotypes[sid]
301+
a = core.decode_alignment(h)
302+
print(f">{sid}", file=out)
303+
# FIXME this is probably a terrible way to write a large numpy string to
304+
# a file
305+
print("".join(a), file=out)
306+
292307
def copy(
293308
self,
294309
path,

tests/test_dataset.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,67 @@ def test_copy_subset(self, tmp_path, fx_dataset, sample_id):
224224
xt.assert_equal(permuted, sg_ds2)
225225

226226

227+
class TestDatasetFasta:
228+
229+
def test_write_fasta_all(self, tmp_path, fx_dataset, fx_alignments_fasta):
230+
path = tmp_path / "export.fa"
231+
with open(path, "w") as f:
232+
fx_dataset.write_fasta(f)
233+
234+
fr1 = sc2ts.FastaReader(fx_alignments_fasta)
235+
fr2 = sc2ts.FastaReader(path)
236+
for k, a1 in fr1.items():
237+
a2 = fr2[k]
238+
nt.assert_array_equal(a1, a2)
239+
assert set(fr2.keys()) == set(fr1.keys())
240+
241+
@pytest.mark.parametrize(
242+
"sample_id",
243+
[
244+
[
245+
"SRR11597195",
246+
],
247+
[
248+
"SRR11597146",
249+
"SRR11597196",
250+
"SRR11597178",
251+
"SRR11597168",
252+
"SRR11597195",
253+
"SRR11597190",
254+
"SRR11597164",
255+
"SRR11597115",
256+
],
257+
[
258+
"SRR11597115",
259+
"SRR11597146",
260+
],
261+
[
262+
"SRR11597115",
263+
"SRR11597146",
264+
"SRR11597164",
265+
"SRR11597168",
266+
"SRR11597178",
267+
"SRR11597190",
268+
"SRR11597195",
269+
"SRR11597196",
270+
],
271+
],
272+
)
273+
def test_write_fasta_subset(
274+
self, tmp_path, fx_dataset, fx_alignments_fasta, sample_id
275+
):
276+
path = tmp_path / "export.fa"
277+
with open(path, "w") as f:
278+
fx_dataset.write_fasta(f, sample_id)
279+
280+
fr1 = sc2ts.FastaReader(fx_alignments_fasta)
281+
fr2 = sc2ts.FastaReader(path)
282+
for k in sample_id:
283+
a1 = fr1[k]
284+
a2 = fr2[k]
285+
nt.assert_array_equal(a1, a2)
286+
287+
227288
class TestDatasetVariants:
228289

229290
def test_all(self, fx_dataset):

0 commit comments

Comments
 (0)