Skip to content

Commit 6de784e

Browse files
committed
feat!: lift over interval, not position (#66)
Lift over an interval rather than a single set of coordinates. This guards against a bunch of downstream problems, like whether the space in between the start and end can also cleanly lift over, and also saves us a lookup.
1 parent 8d692a1 commit 6de784e

File tree

5 files changed

+94
-41
lines changed

5 files changed

+94
-41
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ c = Converter(Assembly.HG19, Assembly.HG38)
3434
Call ``convert_coordinate()``:
3535

3636
```python3
37-
c.convert_coordinate("chr7", 140453136, Strand.POSITIVE)
37+
c.convert_coordinate("chr7", 140453136, 140453137, Strand.POSITIVE)
38+
# returns [LiftoverResult(chrom='chr7', start=140753336, end=140753337, strand=<Strand.POSITIVE: '+'>)]
3839
```
3940

4041
## Development

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "agct"
3-
version = "0.1.1"
3+
version = "0.2.0"
44
authors = [
55
{name = "James Stevenson"},
66
{name = "Kori Kuzma"},

rust/src/lib.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,13 @@ impl Converter {
4040
}
4141

4242
/// Perform liftover
43-
pub fn lift(&self, chrom: &str, pos: u64, strand: &str) -> PyResult<Vec<Vec<String>>> {
43+
pub fn lift(
44+
&self,
45+
chrom: &str,
46+
start: u64,
47+
end: u64,
48+
strand: &str,
49+
) -> PyResult<Vec<Vec<String>>> {
4450
let parsed_strand = if strand == "+" {
4551
Strand::Positive
4652
} else if strand == "-" {
@@ -52,15 +58,14 @@ impl Converter {
5258
)));
5359
};
5460
// safe to unwrap coordinates because `pos` is always an int
55-
let start = Coordinate::new(chrom, parsed_strand.clone(), pos);
56-
let end = Coordinate::new(chrom, parsed_strand.clone(), pos + 1);
61+
let start_coordinate = Coordinate::new(chrom, parsed_strand.clone(), start);
62+
let end_coordinate = Coordinate::new(chrom, parsed_strand.clone(), end);
5763

58-
let Ok(interval) = Interval::try_new(start, end) else {
64+
let Ok(interval) = Interval::try_new(start_coordinate.clone(), end_coordinate.clone())
65+
else {
5966
return Err(ChainfileError::new_err(format!(
6067
"Chainfile yielded invalid interval from coordinates: \"{}\" (\"{}\", \"{}\")",
61-
&chrom,
62-
pos,
63-
pos + 1
68+
&chrom, start_coordinate, end_coordinate
6469
)));
6570
};
6671
if let Some(liftover_result) = self.machine.liftover(interval.clone()) {
@@ -70,14 +75,15 @@ impl Converter {
7075
vec![
7176
r.query().contig().to_string(),
7277
r.query().start().position().to_string(),
78+
r.query().end().position().to_string(),
7379
r.query().strand().to_string(),
7480
]
7581
})
7682
.collect())
7783
} else {
7884
Err(NoLiftoverError::new_err(format!(
79-
"No liftover available for \"{}\" on \"{}\"",
80-
chrom, pos
85+
"No liftover available for \"{}\" on [\"{}\",\"{}\"]",
86+
chrom, start_coordinate, end_coordinate
8187
)))
8288
}
8389
}

src/agct/converter.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ class LiftoverResult(NamedTuple):
2727
"""Declare structure of liftover response"""
2828

2929
chrom: str
30-
position: int
30+
start: int
31+
end: int
3132
strand: Strand
3233

3334

@@ -121,7 +122,7 @@ def _download_data(version: str, file: Path) -> None: # noqa: ARG001
121122
return _download_data
122123

123124
def convert_coordinate(
124-
self, chrom: str, pos: int, strand: Strand = Strand.POSITIVE
125+
self, chrom: str, start: int, end: int, strand: Strand = Strand.POSITIVE
125126
) -> list[LiftoverResult]:
126127
"""Perform liftover for given params
127128
@@ -130,40 +131,50 @@ def convert_coordinate(
130131
.. code-block:: pycon
131132
132133
>>> from agct import Converter, Strand, Assembly
133-
134134
>>> c = Converter(Assembly.HG19, Assembly.HG38)
135-
>>> c.convert_coordinate("chr7", 140453136, Strand.POSITIVE)
136-
[LiftoverResult(chrom='chr7', position=140753336, strand=<Strand.POSITIVE: '+'>)]
137-
135+
>>> c.convert_coordinate("chr7", 140453136, 140453137, Strand.POSITIVE)
136+
[LiftoverResult(chrom='chr7', start=140753336, end=140753337, strand=<Strand.POSITIVE: '+'>)]
138137
139138
:param chrom: chromosome name as given in chainfile. Usually e.g. ``"chr7"``.
140-
:param pos: query position
139+
:param start: start position of coordinate interval (inter-residue)
140+
:param end: end position of coordinate interval (inter-residue)
141141
:param strand: query strand (``"+"`` by default).
142142
:return: list of coordinate matches (possibly empty)
143+
:raise ValueError: if ``start`` > ``end`` and strandedness is positive, or
144+
``start`` < ``end`` and strandedness is negative
143145
"""
146+
if start < end and strand == Strand.NEGATIVE:
147+
msg = f"`start` must be less than `end` on the negative strand: {start=}, {end=}"
148+
raise ValueError(msg)
149+
if start > end and strand == Strand.POSITIVE:
150+
msg = f"`end` must be less than `start` on the positive strand: {start=}, {end=}"
151+
raise ValueError(msg)
144152
try:
145-
results = self._converter.lift(chrom, pos, strand)
153+
results = self._converter.lift(chrom, start, end, strand)
146154
except _core.NoLiftoverError:
147155
results = []
148156
except _core.ChainfileError:
149157
_logger.exception(
150-
"Encountered internal error while converting coordinates - is the chainfile invalid? (%s, %s, %s)",
158+
"Encountered internal error while converting coordinates - is the chainfile invalid? (%s, [%s, %s], %s)",
151159
chrom,
152-
pos,
160+
start,
161+
end,
153162
strand,
154163
)
155164
results = []
156165
formatted_results: list[LiftoverResult] = []
157166
for result in results:
158167
try:
159-
pos = int(result[1])
168+
lifted_over_start, lifted_over_end = int(result[1]), int(result[2])
160169
except ValueError:
161170
_logger.exception("Got invalid position value in %s", result)
162171
continue
163172
try:
164-
strand = Strand(result[2])
173+
strand = Strand(result[3])
165174
except ValueError:
166175
_logger.exception("Got invalid Strand value in %s", result)
167176
continue
168-
formatted_results.append(LiftoverResult(result[0], pos, strand))
177+
formatted_results.append(
178+
LiftoverResult(result[0], lifted_over_start, lifted_over_end, strand)
179+
)
169180
return formatted_results

tests/test_liftover.py

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,84 @@
11
"""Run liftover tests."""
22

3+
import re
4+
5+
import pytest
6+
37
from agct import Assembly, Converter, LiftoverResult, Strand
48

59

610
def test_hg19_to_hg38():
711
"""Test hg19 to hg38 lifter."""
812
converter = Converter(Assembly.HG19, Assembly.HG38)
913

10-
result = converter.convert_coordinate("chr7", 140439611)
14+
result = converter.convert_coordinate("chr7", 140439611, 140439611)
1115
assert len(result) == 1
12-
assert result[0] == LiftoverResult("chr7", 140739811, Strand.POSITIVE)
16+
assert result[0] == LiftoverResult("chr7", 140739811, 140739811, Strand.POSITIVE)
1317

14-
result = converter.convert_coordinate("chr7", 140439746)
18+
result = converter.convert_coordinate("chr7", 140439746, 140439746)
1519
assert len(result) == 1
16-
assert result[0] == LiftoverResult("chr7", 140739946, Strand.POSITIVE)
20+
assert result[0] == LiftoverResult("chr7", 140739946, 140739946, Strand.POSITIVE)
1721

18-
result = converter.convert_coordinate("chr7", 140439703)
22+
result = converter.convert_coordinate("chr7", 140439703, 140439703)
1923
assert len(result) == 1
20-
assert result[0] == LiftoverResult("chr7", 140739903, Strand.POSITIVE)
24+
assert result[0] == LiftoverResult("chr7", 140739903, 140739903, Strand.POSITIVE)
2125

22-
result = converter.convert_coordinate("chr7", 140453136)
26+
result = converter.convert_coordinate("chr7", 140453136, 140453136)
2327
assert len(result) == 1
24-
assert result[0] == LiftoverResult("chr7", 140753336, Strand.POSITIVE)
28+
assert result[0] == LiftoverResult("chr7", 140753336, 140753336, Strand.POSITIVE)
2529

26-
result = converter.convert_coordinate("chr1", 206072707)
30+
result = converter.convert_coordinate("chr1", 206072707, 206072708)
2731
assert len(result) == 1
28-
assert result[0] == LiftoverResult("chr1", 206268644, Strand.NEGATIVE)
32+
assert result[0] == LiftoverResult("chr1", 206268644, 206268643, Strand.NEGATIVE)
2933

3034
# coordinate exceeds bounds
31-
result = converter.convert_coordinate("chr7", 14040053136)
35+
result = converter.convert_coordinate("chr7", 14040053136, 14040053136)
3236
assert result == []
3337

3438

3539
def test_hg38_to_hg19():
3640
"""Test hg38 to hg19 lifter."""
3741
converter = Converter(Assembly.HG38, Assembly.HG19)
3842

39-
result = converter.convert_coordinate("chr7", 140739811)
43+
result = converter.convert_coordinate("chr7", 140739811, 140739811)
44+
assert len(result) == 1
45+
assert result[0] == LiftoverResult("chr7", 140439611, 140439611, Strand.POSITIVE)
46+
47+
result = converter.convert_coordinate("chr7", 140759820, 140759820)
48+
assert len(result) == 1
49+
assert result[0] == LiftoverResult("chr7", 140459620, 140459620, Strand.POSITIVE)
50+
51+
result = converter.convert_coordinate("chr7", 60878240, 60878240)
4052
assert len(result) == 1
41-
assert result[0] == LiftoverResult("chr7", 140439611, Strand.POSITIVE)
53+
assert result[0] == LiftoverResult("chr7", 61646115, 61646115, Strand.POSITIVE)
4254

43-
result = converter.convert_coordinate("chr7", 140759820)
55+
result = converter.convert_coordinate("chr7", 60878240, 60878240)
4456
assert len(result) == 1
45-
assert result[0] == LiftoverResult("chr7", 140459620, Strand.POSITIVE)
57+
assert result[0] == LiftoverResult("chr7", 61646115, 61646115, Strand.POSITIVE)
4658

47-
result = converter.convert_coordinate("chr7", 60878240)
59+
result = converter.convert_coordinate("chr7", 60878240, 60878245)
4860
assert len(result) == 1
49-
assert result[0] == LiftoverResult("chr7", 61646115, Strand.POSITIVE)
61+
assert result[0] == LiftoverResult("chr7", 61646115, 61646120, Strand.POSITIVE)
62+
63+
64+
def test_interval_input():
65+
"""Test that invalid intervals raise errors"""
66+
converter = Converter(Assembly.HG38, Assembly.HG19)
67+
68+
with pytest.raises(
69+
ValueError,
70+
match=re.escape(
71+
"`end` must be less than `start` on the positive strand: start=140739811, end=140739809"
72+
),
73+
):
74+
converter.convert_coordinate("chr7", 140739811, 140739809)
75+
76+
with pytest.raises(
77+
ValueError,
78+
match=re.escape(
79+
"`start` must be less than `end` on the negative strand: start=206268644, end=206268645"
80+
),
81+
):
82+
converter.convert_coordinate(
83+
"chr1", 206268644, 206268645, strand=Strand.NEGATIVE
84+
)

0 commit comments

Comments
 (0)