Skip to content

Commit 31efb75

Browse files
hyanwongmergify[bot]
authored andcommitted
Allow haplotypes and alignments to have start, stop, samples params
1 parent b74b98c commit 31efb75

File tree

3 files changed

+236
-65
lines changed

3 files changed

+236
-65
lines changed

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
- New ``Site.alleles()`` method (:user:`hyanwong`, :issue:`2380`, :pr:`2385`)
1616

17+
- The ``variants()``, ``haplotypes()`` and ``alignments()`` methods can now
18+
take a list of sample ids and a left and right position, to restrict the
19+
size of the output (:user:`hyanwong`, :issue:`2092`, :pr:`2397`)
20+
1721

1822
--------------------
1923
[0.5.0] - 2022-06-22

python/tests/test_genotypes.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,33 @@ def test_snipped_tree_sequence_mutations_over_isolated(self):
638638
assert non_missing_found
639639
assert missing_found
640640

641+
def test_limit_interval(self):
642+
ts = self.get_tree_sequence()
643+
test_variant = tskit.Variant(ts)
644+
test_variant.decode(1)
645+
for v in ts.variants(left=ts.site(1).position, right=ts.site(2).position):
646+
# should only decode the first variant
647+
assert v.site.id == 1
648+
assert np.all(v.genotypes == test_variant.genotypes)
649+
assert v.alleles == test_variant.alleles
650+
651+
def test_bad_left(self):
652+
ts = tskit.TableCollection(10).tree_sequence()
653+
for bad_left in [-1, 10, 100, np.nan, np.inf, -np.inf]:
654+
with pytest.raises(ValueError, match="`left` not between"):
655+
list(ts.variants(left=bad_left))
656+
657+
def test_bad_right(self):
658+
ts = tskit.TableCollection(10).tree_sequence()
659+
for bad_right in [-1, 0, 100, np.nan, np.inf, -np.inf]:
660+
with pytest.raises(ValueError, match="`right` not between"):
661+
list(ts.variants(right=bad_right))
662+
663+
def test_bad_left_right(self):
664+
ts = tskit.TableCollection(10).tree_sequence()
665+
with pytest.raises(ValueError, match="must be less than"):
666+
list(ts.variants(left=1, right=1))
667+
641668

642669
class TestHaplotypeGenerator:
643670
"""
@@ -830,6 +857,29 @@ def test_missing_data(self):
830857
)
831858
assert h == ["A", "A"]
832859

860+
def test_restrict_samples(self):
861+
tables = tskit.TableCollection(1.0)
862+
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
863+
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
864+
tables.sites.add_row(0.5, "A")
865+
tables.mutations.add_row(0, 0, derived_state="B")
866+
ts = tables.tree_sequence()
867+
haplotypes = list(ts.haplotypes(samples=[0], isolated_as_missing=False))
868+
assert haplotypes == ["B"]
869+
haplotypes = list(ts.haplotypes(samples=[1], isolated_as_missing=False))
870+
assert haplotypes == ["A"]
871+
872+
def test_restrict_positions(self):
873+
tables = tskit.TableCollection(1.0)
874+
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
875+
tables.sites.add_row(0.1, "A")
876+
tables.sites.add_row(0.2, "B")
877+
tables.sites.add_row(0.3, "C")
878+
tables.sites.add_row(0.4, "D")
879+
ts = tables.tree_sequence()
880+
haplotypes = list(ts.haplotypes(left=0.2, right=0.4, isolated_as_missing=False))
881+
assert haplotypes == ["BC"]
882+
833883

834884
class TestUserAlleles:
835885
"""
@@ -1091,6 +1141,14 @@ def test_alignments_default(self):
10911141
assert A[1] == "NNANNNNNNC"
10921142
assert A[2] == "NNANNNNNNC"
10931143

1144+
def test_alignments_restricted(self):
1145+
ts = self.ts()
1146+
samples = ts.samples()
1147+
# Take the first 2 in reverse order
1148+
A = list(ts.alignments(left=1, right=9, samples=samples[1::-1]))
1149+
assert A[0] == "NANNNNNN"
1150+
assert A[1] == "NGNNNNNN"
1151+
10941152
def test_alignments_missing_data_char(self):
10951153
A = list(self.ts().alignments(missing_data_character="x"))
10961154
assert A[0] == "xxGxxxxxxT"
@@ -1669,13 +1727,17 @@ def test_reference_length_mismatch(self, ref_length):
16691727
tables = tskit.TableCollection(10)
16701728
tables.reference_sequence.data = "A" * ref_length
16711729
ts = tables.tree_sequence()
1672-
with pytest.raises(ValueError, match="same length"):
1730+
if ref_length <= tables.sequence_length:
1731+
with pytest.raises(ValueError, match="shorter than"):
1732+
list(ts.alignments())
1733+
else:
1734+
# Longer reference sequences are allowed
16731735
list(ts.alignments())
16741736

16751737
@pytest.mark.parametrize("ref", ["", "xy"])
16761738
def test_reference_sequence_length_mismatch(self, ref):
16771739
ts = self.simplest_ts()
1678-
with pytest.raises(ValueError, match="same length"):
1740+
with pytest.raises(ValueError, match="shorter than"):
16791741
list(ts.alignments(reference_sequence=ref))
16801742

16811743
@pytest.mark.parametrize("ref", ["À", "┃", "α"])
@@ -1699,6 +1761,23 @@ def test_non_ascii_missing_data_char(self, missing_data_char):
16991761
with pytest.raises(UnicodeEncodeError):
17001762
list(ts.alignments(missing_data_character=missing_data_char))
17011763

1764+
def test_bad_left(self):
1765+
ts = tskit.TableCollection(10).tree_sequence()
1766+
with pytest.raises(ValueError, match="integer"):
1767+
list(ts.alignments(left=0.1))
1768+
1769+
def test_bad_right(self):
1770+
ts = tskit.TableCollection(10).tree_sequence()
1771+
with pytest.raises(ValueError, match="integer"):
1772+
list(ts.alignments(right=1.1))
1773+
1774+
def test_bad_restricted(self):
1775+
tables = tskit.TableCollection(10)
1776+
tables.reference_sequence.data = "A" * 7
1777+
ts = tables.tree_sequence()
1778+
with pytest.raises(ValueError, match="sequence ends before"):
1779+
list(ts.alignments(right=8))
1780+
17021781

17031782
class TestAlignmentExamples:
17041783
@pytest.mark.parametrize("ts", get_example_discrete_genome_tree_sequences())

0 commit comments

Comments
 (0)