Skip to content

Commit ba3a960

Browse files
authored
Add spliceinto! function. (#338)
This function inserts a sequence into a biosequence, and optionally deletes part of the original sequence. The naming difference from `Base.splice!` reflects is slightly different API.
1 parent 65249bd commit ba3a960

File tree

8 files changed

+180
-6
lines changed

8 files changed

+180
-6
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
## [3.5.0]
8+
* Add `spliceinto!`. This function inserts a sequence into a biosequence,
9+
and optionally deletes part of the original sequence. The naming difference
10+
from `Base.splice!` reflects is slightly different API.
11+
* Optimise various methods
12+
713
## [3.4.0]
814
* Deprecate functions `n_ambiguous`, `n_gaps` and `n_certain`. Instead, use the
915
equivalent methods `count(f, seq)` with the appropriate function `f`.

Project.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "BioSequences"
22
uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
33
authors = ["Sabrina Jaye Ward <[email protected]>", "Jakob Nissen <[email protected]>"]
4-
version = "3.4.2"
4+
version = "3.5.0"
55

66
[deps]
77
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
@@ -11,19 +11,22 @@ Twiddle = "7200193e-83a8-5a55-b20d-5d36d44a0795"
1111

1212
[compat]
1313
BioSymbols = "5.1.2"
14+
LinearAlgebra = "1.10"
1415
PrecompileTools = "1"
1516
Random = "1.5"
1617
StableRNGs = "0.1, 1.0"
18+
StatsBase = "0.34.5"
19+
Test = "1.10"
1720
Twiddle = "1.1.1"
21+
YAML = "0.4.14"
1822
julia = "1.10"
1923

2024
[extras]
21-
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
2225
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
2326
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
2427
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
2528
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2629
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
2730

2831
[targets]
29-
test = ["Documenter", "Test", "StatsBase", "YAML", "LinearAlgebra", "StableRNGs"]
32+
test = ["Test", "StatsBase", "YAML", "LinearAlgebra", "StableRNGs"]

docs/Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,8 @@ BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
33
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
44
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
55

6+
[sources]
7+
BioSequences = {path = ".."}
8+
69
[compat]
710
Documenter = "1"

docs/src/transforms.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ deleteat!(::BioSequences.BioSequence, ::Integer)
6666
append!(::BioSequences.BioSequence, ::BioSequences.BioSequence)
6767
resize!(::BioSequences.LongSequence, ::Integer)
6868
empty!(::BioSequences.BioSequence)
69+
spliceinto!(::BioSequence, ::Integer, ::Any)
70+
spliceinto!(::BioSequence, ::UnitRange, ::Any)
6971
```
7072

7173
Here are some examples:

src/BioSequences.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ export
132132
ungap,
133133
ungap!,
134134
join!,
135+
spliceinto!,
135136

136137
###
137138
### LongSequence

src/biosequence/transformations.jl

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,97 @@ end
4444
insert!(seq::BioSequence, i, x)
4545
4646
Insert a biological symbol `x` into a biological sequence `seq`, at the given
47-
index `i`.
47+
index `i`. Returns the mutated `seq`.
48+
49+
# Examples
50+
```jldoctest
51+
julia> seq = dna"ATGCA"
52+
5nt DNA Sequence:
53+
ATGCA
54+
55+
julia> insert!(seq, 3, 'A')
56+
6nt DNA Sequence:
57+
ATAGCA
58+
```
4859
"""
4960
function Base.insert!(seq::BioSequence, i::Integer, x)
61+
i == length(seq) + 1 && return push!(seq, x)
5062
checkbounds(seq, i)
5163
resize!(seq, length(seq) + 1)
5264
copyto!(seq, i + 1, seq, i, lastindex(seq) - i)
5365
@inbounds seq[i] = x
5466
return seq
5567
end
5668

69+
"""
70+
spliceinto!(seq::BioSequence, i::Integer, x)
71+
72+
Insert the sequence `x` into a biological sequence `seq`, at the given index `i`.
73+
After splicing, the `seq`'s symbols at indices `i:i+length(x)-1` are equal to `x`,
74+
and the the symbols that were previously there are moved to the right.
75+
76+
# Examples
77+
```jldoctest
78+
julia> seq = dna"TAGTGCA";
79+
80+
julia> spliceinto!(seq, 3, "CAGGA")
81+
12nt DNA sequence:
82+
TACAGGAGTGCA
83+
```
84+
"""
85+
function spliceinto!(seq::BioSequence, i::Integer, x)
86+
oldlen = length(seq)
87+
i == oldlen + 1 && return append!(seq, x)
88+
@boundscheck checkbounds(seq, i)
89+
resize!(seq, oldlen + length(x))
90+
copyto!(seq, i + length(x), seq, i, oldlen - i + 1)
91+
copyto!(seq, i, x, 1, length(x))
92+
return seq
93+
end
94+
95+
"""
96+
spliceinto!(seq::BioSequence, span::UnitRange, x)
97+
98+
Delete the symbols at indices `span` in `seq`, and then copy `x` into the
99+
first deleted position, then return `seq`.
100+
101+
This is equivalent to `deleteat!(seq, span); spliceinto!(seq, first(span), x)`,
102+
but is more efficient.
103+
`span` must be nonempty, or this function will throw an `ArgumentError`. To handle
104+
potentially empty spans, check if the span is empty, and if so use `spliceinto(seq, first(span), x)`.
105+
106+
# Examples
107+
```jldoctest
108+
julia> seq = dna"TAGTGCA";
109+
110+
julia> spliceinto!(seq, 3:5, "CAGGA")
111+
9nt DNA sequence:
112+
TACAGGACA
113+
```
114+
"""
115+
function spliceinto!(seq::BioSequence, span::UnitRange, x)
116+
isempty(span) && throw(ArgumentError("span cannot be empty"))
117+
@boundscheck checkbounds(seq, span)
118+
oldlen = length(seq)
119+
xlen = length(x)
120+
if length(span) == xlen
121+
# Same lengths: Just copy in x
122+
copyto!(seq, first(span), x, 1, length(span))
123+
elseif length(span) < xlen
124+
# x is longer. Resize and shift to make room for more symbols,
125+
# then copy in x
126+
resize!(seq, oldlen + xlen - length(span))
127+
copyto!(seq, first(span) + xlen, seq, last(span) + 1, oldlen - last(span))
128+
copyto!(seq, first(span), x, 1, xlen)
129+
else
130+
# Span is longer. Delete the rightmost bases (to cause the smallest possible shift),
131+
# then copy in
132+
deleteat!(seq, first(span) + xlen:last(span))
133+
copyto!(seq, first(span), x, 1, xlen)
134+
end
135+
return seq
136+
end
137+
57138
"""
58139
deleteat!(seq::BioSequence, range::UnitRange{<:Integer})
59140
@@ -89,7 +170,7 @@ end
89170
Add a biological sequence `other` onto the end of biological sequence `seq`.
90171
Modifies and returns `seq`.
91172
"""
92-
function Base.append!(seq::BioSequence, other::BioSequence)
173+
function Base.append!(seq::BioSequence, other)
93174
resize!(seq, length(seq) + length(other))
94175
copyto!(seq, lastindex(seq) - length(other) + 1, other, 1, length(other))
95176
return seq

test/longsequences/mutability.jl

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,88 @@
122122
seq = dna"ACGT"
123123
@test insert!(seq, 2, DNA_G) == dna"AGCGT"
124124
@test insert!(seq, 5, DNA_A) == dna"AGCGAT"
125+
@test insert!(seq, 1, 'G') == dna"GAGCGAT"
126+
@test insert!(seq, length(seq) + 1, 'C') == dna"GAGCGATC"
125127
@test_throws BoundsError insert!(seq, 10, DNA_T)
126128
end
127129

130+
@testset "spliceinto!" begin
131+
@testset "spliceinto integer" begin
132+
function manual_spliceinto(seq, i, x)
133+
s = typeof(seq)(undef, length(seq) + length(x))
134+
copyto!(s, 1, seq, 1, i-1)
135+
copyto!(s, i, x, 1, length(x))
136+
copyto!(s, i + length(x), seq, i, length(seq)-i+1)
137+
return s
138+
end
139+
140+
seq = dna"TAGTGCA"
141+
@test spliceinto!(seq, 2, "CA") === seq
142+
@test seq == dna"TCAAGTGCA"
143+
144+
str = "ATGTGCTCGTGTCGTGATAGTGAGTAGTAGTCGTAGTAGTGATTGCTGTAGTA"
145+
seq = LongDNA{2}(str)
146+
147+
@test_throws BoundsError spliceinto!(seq, 0, "CA")
148+
@test_throws BoundsError spliceinto!(seq, -1, "CA")
149+
@test_throws BoundsError spliceinto!(seq, length(seq) + 2, "CA")
150+
151+
for (i, s) in Any[
152+
(1, ""),
153+
(1, "CAC"),
154+
(1, "ATGCTGCTGATGTGATGA"),
155+
(2, dna"ATGTCGA"),
156+
(15, rna"AUGUCGUAGUAACCAACA"),
157+
(16, dna"ATGTCGTGATGATGTAGTGTCGTA"),
158+
(18, b"ATGCTGTGATGATGTCC"),
159+
(length(seq), dna"TAGCGGAGA"),
160+
(length(seq) + 1, "AGCGGGAGA"),
161+
]
162+
copy!(seq, str)
163+
cp = copy(seq)
164+
@test spliceinto!(seq, i, s) == manual_spliceinto(cp, i, s)
165+
end
166+
end
167+
168+
@testset "spliceinto span" begin
169+
function manual_spliceinto(seq, span, x)
170+
deleteat!(seq, span)
171+
spliceinto!(seq, first(span), x)
172+
return seq
173+
end
174+
175+
seq = dna"ATGTCGTGA"
176+
@test spliceinto!(seq, 2:2, "CA") === seq
177+
@test seq == dna"ACAGTCGTGA"
178+
179+
str = "ATGTGCTCGTGTCGTGATAGTGAGTAGTAGTCGTAGTAGTGATTGCTGTAGTA"
180+
seq = LongDNA{2}(str)
181+
182+
@test_throws ArgumentError spliceinto!(seq, 0:-1, "CA")
183+
@test_throws ArgumentError spliceinto!(seq, 1:0, "CA")
184+
@test_throws ArgumentError spliceinto!(seq, lastindex(seq):5, "CA")
185+
186+
@test_throws BoundsError spliceinto!(seq, 0:0, "CA")
187+
@test_throws BoundsError spliceinto!(seq, 0:1, "CA")
188+
@test_throws BoundsError spliceinto!(seq, 4:100, "CA")
189+
@test_throws BoundsError spliceinto!(seq, length(seq):length(seq)+1, "CA")
190+
191+
for (i, s) in Any[
192+
(1:1, ""),
193+
(4:6, "T"),
194+
(4:9, "ATGCGTA"),
195+
(3:6, "AGCA"),
196+
(30:35, "ATGTCGTAG"),
197+
(15:20, rna"UAGC"),
198+
(9:40, dna"ATGTCGTGATGAA")
199+
]
200+
copy!(seq, str)
201+
cp = copy(seq)
202+
@test spliceinto!(seq, i, s) == manual_spliceinto(cp, i, s)
203+
end
204+
end
205+
end
206+
128207
@testset "deleteat!" begin
129208
seq = dna"ACGT"
130209
@test deleteat!(seq, 1) == dna"CGT"

test/runtests.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
module TestBioSequences
22

33
using Test
4-
using Documenter
54

65
using Random
76
using StableRNGs

0 commit comments

Comments
 (0)