Fixup tests

jeromekelleher · jeromekelleher · commit 7162ef94f713 · 2025-11-21T14:30:27.000Z
diff --git a/docs/alignments_analysis.md b/docs/alignments_analysis.md
@@ -111,14 +111,14 @@ np.mean(gap_count)
 ```
 
 :::{warning}
-The arrays returned by the ``alignment`` are **zero based** and you
+The arrays returned by the ``alignment`` interface are **zero based** and you
 must compensate to use **one-based** coordinates.
 :::
 
 If you want to access
 specific slices of the array based on **one-based** coordinates, it's important
 to take the zero-based nature of this into account. Suppose we wanted to
-access the first 10 bases of Spike for a give sample. The first
+access the first 10 bases of Spike for a given sample. The first
 base of Spike is 21563 in standard one-based coordinates. While we could do
 some arithmetic to compensate, the simplest way to translate is to simply
 prepend some value to the alignment array:
@@ -129,7 +129,6 @@ spike_start = 21_563
 a[spike_start: spike_start + 10]
 ```
 
-
 (sec_alignments_analysis_data_encoding)=
 
 ## Alignment data encoding
@@ -182,3 +181,16 @@ strings, because -1 is interpreted as the last element of the list in Python. Pl
 use the {func}`decode_alleles` function to avoid this tripwire.
 :::
 
+
+## Accessing by variant
+
+A unique feature of the VCF Zarr encoding used here is that we can efficiently access
+the alignment data by sample **and** by site. The best way to access data by site
+is to use the {meth}`Dataset.variants` method.
+
+:::{note}
+The {meth}`Dataset.variants` method is deliberately designed to mirror the API
+of the corresponding [tskit](https://tskit.dev) function
+({meth}`tskit.TreeSequence.variants`).
+:::
+
diff --git a/tests/test_inference.py b/tests/test_inference.py
@@ -188,7 +188,7 @@ def test_match_reference(self, mirror):
         ts = tables.tree_sequence()
         alignment = sc2ts.data_import.get_reference_sequence(as_array=True)
         alignment[0] = "A"
-        a = jit.encode_alignment(alignment)
+        a = jit.encode_alleles(alignment)
         h = a[ts.sites_position.astype(int)]
         samples = [si.Sample("test", "2020-01-01", haplotype=h)]
         matches = self.match_tsinfer(samples, ts, mirror_coordinates=mirror)
@@ -205,7 +205,7 @@ def test_match_reference_one_mutation(self, mirror, site_id):
         ts = tables.tree_sequence()
         alignment = sc2ts.data_import.get_reference_sequence(as_array=True)
         alignment[0] = "A"
-        a = jit.encode_alignment(alignment)
+        a = jit.encode_alleles(alignment)
         h = a[ts.sites_position.astype(int)]
         samples = [si.Sample("test", "2020-01-01", haplotype=h)]
         # Mutate to gap
@@ -231,7 +231,7 @@ def test_match_reference_all_same(self, mirror, allele):
         ts = tables.tree_sequence()
         alignment = sc2ts.data_import.get_reference_sequence(as_array=True)
         alignment[0] = "A"
-        a = jit.encode_alignment(alignment)
+        a = jit.encode_alleles(alignment)
         ref = a[ts.sites_position.astype(int)]
         h = np.zeros_like(ref) + allele
         samples = [si.Sample("test", "2020-01-01", haplotype=h)]