Skip to content

Commit c417262

Browse files
committed
Refactor eachregion to be O(n log n) not O(n^2)
Since we removed the ordering restriction on annotations to improve the semantics of annotation modification, each `annotations(str)` call became `O(n)` which is fine for a once off, but use it in a loop as `eachregion` does and now it's `O(n m)`. That's pretty underwhelming. We can improve this to `O(n log n)` by pre-sorting the list of annotations, and working with it instead. A bit more complexity is needed to do this while preserving the semantics, but it can be worth it for long strings. With a 100,000 char string with 20,000 annotations, print time goes from ~0.4s to ~0.01s on my machine.
1 parent f7af623 commit c417262

File tree

2 files changed

+57
-22
lines changed

2 files changed

+57
-22
lines changed

docs/src/internals.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ StyledStrings.Legacy.legacy_color
1616
StyledStrings.Legacy.load_env_colors!
1717
StyledStrings.ansi_4bit_color_code
1818
StyledStrings.eachregion
19+
StyledStrings.annotation_events
1920
StyledStrings.face!
2021
StyledStrings.getface
2122
StyledStrings.loadface!

src/regioniterator.jl

Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,37 +36,71 @@ julia> collect(StyledStrings.eachregion(Base.AnnotatedString(
3636
("there", [:face => :italic])
3737
```
3838
"""
39-
function eachregion(s::AnnotatedString, region::UnitRange{Int}=firstindex(s):lastindex(s))
40-
isempty(s) || isempty(region) &&
41-
return RegionIterator(s, Vector{UnitRange{Int}}(), Vector{Vector{Pair{Symbol, Any}}}())
39+
function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s))
40+
isempty(s) || isempty(subregion) &&
41+
return RegionIterator(s.string, UnitRange{Int}[], Vector{Pair{Symbol, Any}}[])
42+
events = annotation_events(s, subregion)
43+
isempty(events) && return RegionIterator(s.string, [subregion], [Pair{Symbol, Any}[]])
44+
annotvals = last.(annotations(s))
4245
regions = Vector{UnitRange{Int}}()
4346
annots = Vector{Vector{Pair{Symbol, Any}}}()
44-
changepoints = filter(c -> c in region,
45-
Iterators.flatten((first(region), nextind(s, last(region)))
46-
for region in first.(s.annotations)) |>
47-
unique |> sort)
48-
isempty(changepoints) &&
49-
return RegionIterator(s.string, UnitRange{Int}[region], Vector{Pair{Symbol, Any}}[map(last, annotations(s, first(region)))])
50-
function registerchange!(start, stop)
51-
push!(regions, start:stop)
52-
push!(annots, map(last, annotations(s, start)))
47+
pos = first(events).pos
48+
if pos > first(subregion)
49+
push!(regions, first(subregion):pos-1)
50+
push!(annots, [])
5351
end
54-
if first(region) < first(changepoints)
55-
registerchange!(first(region), prevind(s, first(changepoints)))
52+
activelist = Int[]
53+
for event in events
54+
if event.pos != pos
55+
push!(regions, pos:prevind(s, event.pos))
56+
push!(annots, annotvals[activelist])
57+
pos = event.pos
58+
end
59+
if event.active
60+
insert!(activelist, searchsortedfirst(activelist, event.index), event.index)
61+
else
62+
deleteat!(activelist, searchsortedfirst(activelist, event.index))
63+
end
5664
end
57-
for (start, stop) in zip(changepoints, changepoints[2:end])
58-
registerchange!(start, prevind(s, stop))
59-
end
60-
if last(changepoints) <= last(region)
61-
registerchange!(last(changepoints), last(region))
65+
if last(events).pos < nextind(s, last(subregion))
66+
push!(regions, last(events).pos:last(subregion))
67+
push!(annots, [])
6268
end
6369
RegionIterator(s.string, regions, annots)
6470
end
6571

66-
function eachregion(s::SubString{<:AnnotatedString}, region::UnitRange{Int}=firstindex(s):lastindex(s))
72+
function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s))
6773
if isempty(s)
68-
RegionIterator(s, Vector{UnitRange{Int}}(), Vector{Vector{Pair{Symbol, Any}}}())
74+
RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{Pair{Symbol, Any}}}())
6975
else
70-
eachregion(s.string, first(region)+s.offset:last(region)+s.offset)
76+
eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset)
7177
end
7278
end
79+
80+
"""
81+
annotation_events(string::AbstractString, annots::Vector{Tuple{UnitRange{Int64}, Pair{Symbol, Any}}}, subregion::UnitRange{Int})
82+
annotation_events(string::AnnotatedString, subregion::UnitRange{Int})
83+
84+
Find all annotation "change events" that occur within a `subregion` of `annots`,
85+
with respect to `string`. When `string` is styled, `annots` is inferred.
86+
87+
Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool,
88+
index::Int}` where `pos` is the position of the event, `active` is a boolean
89+
indicating whether the annotation is being activated or deactivated, and `index`
90+
is the index of the annotation in question.
91+
"""
92+
function annotation_events(s::AbstractString, annots::Vector{Tuple{UnitRange{Int64}, Pair{Symbol, Any}}}, subregion::UnitRange{Int})
93+
events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index
94+
for (i, (region, _)) in enumerate(annots)
95+
if !isempty(intersect(subregion, region))
96+
start, stop = max(first(subregion), first(region)), min(last(subregion), last(region))
97+
start <= stop || continue # Currently can't handle empty regions
98+
push!(events, (pos=start, active=true, index=i))
99+
push!(events, (pos=nextind(s, stop), active=false, index=i))
100+
end
101+
end
102+
sort(events, by=e -> e.pos)
103+
end
104+
105+
annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) =
106+
annotation_events(s.string, annotations(s), subregion)

0 commit comments

Comments
 (0)