Rename encode_alignments to encode_alleles

jeromekelleher · jeromekelleher · commit d78b0452b540 · 2025-11-21T14:13:57.000Z
diff --git a/docs/alignments_analysis.md b/docs/alignments_analysis.md
@@ -146,10 +146,10 @@ The drawback of this is that it's not as easy to inspect and debug, and we must
 always be aware of the translation required.
 
 Sc2ts provides some utilities for doing this. The easiest way to get the string
-values is to use {func}`decode_alignment` function:
+values is to use {func}`decode_alleles` function:
 
 ```{code-cell}
-a = sc2ts.decode_alignment(ds.alignment["SRR11597146"])
+a = sc2ts.decode_alleles(ds.alignment["SRR11597146"])
 a
 ```
 This is a numpy string array, which can still be processed quite efficiently.
@@ -179,7 +179,6 @@ easily and handled correctly by downstream utilities.
 :::{warning}
 It is important to take this into account when translating the integer encoded data into
 strings, because -1 is interpreted as the last element of the list in Python. Please
-use the {func}`decode_alignment` function
-
+use the {func}`decode_alleles` function to avoid this tripwire.
 :::
 
diff --git a/docs/api.md b/docs/api.md
@@ -35,7 +35,7 @@ notebooks.
 ```{eval-rst}
 .. autosummary::
    Dataset
-   decode_alignment
+   decode_alleles
    mask_ambiguous
    mask_flanking_deletions
 ```
@@ -44,7 +44,7 @@ notebooks.
 .. autoclass:: Dataset
    :members:
 
-.. autofunction:: decode_alignment
+.. autofunction:: decode_alleles
 
 .. autofunction:: mask_ambiguous
 
diff --git a/sc2ts/cli.py b/sc2ts/cli.py
@@ -157,7 +157,7 @@ def import_alignments(dataset, fastas, initialise, progress, verbose):
             position=1,
         )
         for k, v in a_bar:
-            alignments[k] = jit.encode_alignment(v)
+            alignments[k] = jit.encode_alleles(v)
         sc2ts.Dataset.append_alignments(dataset, alignments)
 
 
diff --git a/sc2ts/dataset.py b/sc2ts/dataset.py
@@ -414,7 +414,7 @@ def write_fasta(self, out, sample_id=None):
 
         for sid in sample_id:
             h = self.alignment[sid]
-            a = decode_alignment(h)
+            a = decode_alleles(h)
             print(f">{sid}", file=out)
             # FIXME this is probably a terrible way to write a large numpy string to
             # a file
diff --git a/sc2ts/inference.py b/sc2ts/inference.py
@@ -1227,10 +1227,10 @@ def make_tsb(ts, num_alleles, mirror_coordinates=False):
     ts = tree_ops.insert_vestigial_root_edge(ts)
 
     # Convert arrays for numba compatibility
-    ancestral_state = jit.encode_alignment(
+    ancestral_state = jit.encode_alleles(
         np.asarray(ts.sites_ancestral_state, dtype="U1")
     )
-    derived_state = jit.encode_alignment(
+    derived_state = jit.encode_alleles(
         np.asarray(ts.mutations_derived_state, dtype="U1")
     )
 
diff --git a/sc2ts/jit.py b/sc2ts/jit.py
@@ -203,7 +203,7 @@ def count(ts):
 
 # FIXME make cache optional.
 @numba.njit(cache=True)
-def encode_alignment(h):
+def encode_alleles(h):
     # Just so numba knows this is a constant string.
     alleles = "ACGT-RYSWKMBDHV."
     n = h.shape[0]
diff --git a/tests/sc2ts_fixtures.py b/tests/sc2ts_fixtures.py
@@ -47,7 +47,7 @@ def encoded_alignments(path):
     fr = data_import.FastaReader(path)
     alignments = {}
     for k, v in fr.items():
-        alignments[k] = jit.encode_alignment(v[1:])
+        alignments[k] = jit.encode_alleles(v[1:])
     return alignments
 
 
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -550,14 +550,14 @@ class TestEncodeAlignment:
     )
     def test_examples(self, hap, expected):
         h = np.array(list(hap), dtype="U1")
-        a = jit.encode_alignment(h)
+        a = jit.encode_alleles(h)
         nt.assert_array_equal(a, expected)
 
     @pytest.mark.parametrize("hap", "acgtXZxz")
     def test_other_error(self, hap):
         h = np.array(list(hap), dtype="U1")
         with pytest.raises(ValueError, match="not recognised"):
-            jit.encode_alignment(h)
+            jit.encode_alleles(h)
 
 
 class TestDecodeAlleles:
@@ -609,6 +609,6 @@ class TestMaskFlankingDeletions:
         ],
     )
     def test_examples(self, nucs, expected):
-        a = jit.encode_alignment(np.array(list(nucs), dtype="U1"))
-        b = jit.encode_alignment(np.array(list(expected), dtype="U1"))
+        a = jit.encode_alleles(np.array(list(nucs), dtype="U1"))
+        b = jit.encode_alleles(np.array(list(expected), dtype="U1"))
         nt.assert_array_equal(sc2ts.mask_flanking_deletions(a), b)

Original file line number	Diff line number	Diff line change
`@@ -157,7 +157,7 @@ def import_alignments(dataset, fastas, initialise, progress, verbose):`
`157`	`157`	`position=1,`
`158`	`158`	`)`
`159`	`159`	`for k, v in a_bar:`
`160`		`- alignments[k] = jit.encode_alignment(v)`
	`160`	`+ alignments[k] = jit.encode_alleles(v)`
`161`	`161`	`sc2ts.Dataset.append_alignments(dataset, alignments)`
`162`	`162`
`163`	`163`
Original file line number	Diff line number	Diff line change
`@@ -1227,10 +1227,10 @@ def make_tsb(ts, num_alleles, mirror_coordinates=False):`
`1227`	`1227`	`ts = tree_ops.insert_vestigial_root_edge(ts)`
`1228`	`1228`
`1229`	`1229`	`# Convert arrays for numba compatibility`
`1230`		`- ancestral_state = jit.encode_alignment(`
	`1230`	`+ ancestral_state = jit.encode_alleles(`
`1231`	`1231`	`np.asarray(ts.sites_ancestral_state, dtype="U1")`
`1232`	`1232`	`)`
`1233`		`- derived_state = jit.encode_alignment(`
	`1233`	`+ derived_state = jit.encode_alleles(`
`1234`	`1234`	`np.asarray(ts.mutations_derived_state, dtype="U1")`
`1235`	`1235`	`)`
`1236`	`1236`