3
3
# When updating any part of the parsing code in this file, it is highly
4
4
# recommended to also check and/or update `../test/fuzz.jl`.
5
5
6
+ """
7
+ StyledMarkup
8
+
9
+ A sub-module of `StyledStrings` that specifically deals with parsing styled
10
+ markup strings. To this end, two entrypoints are provided:
11
+
12
+ - The [`styled""`](@ref @styled_str) string macro, which is generally preferred.
13
+ - They [`styled`](@ref styled) function, which allows for use with runtime-provided strings,
14
+ when needed.
15
+
16
+ Overall, this module essentially functions as a state machine with a few extra
17
+ niceties (like detailed error reporting) sprinkled on top. The overall design
18
+ can be largely summed up with the following diagram:
19
+
20
+ ```text
21
+ ╭String─────────╮
22
+ │ Styled markup │
23
+ ╰──────┬────────╯
24
+ │╭╴[module]
25
+ ││
26
+ ╭┴┴State─╮
27
+ ╰┬───────╯
28
+ │
29
+ ╭╴run_state_machine!╶╮
30
+ │ ╭─────┼─╼ escaped!
31
+ │ Apply rules: │ │
32
+ │ "\\\\ " ▶──────╯ ╭───┼─╼[interpolated!] ──▶ readexpr!, addpart!
33
+ │ "\$ " ▶────────╯ │
34
+ │ "{" ▶────────────┼─╼ begin_style! ──▶ read_annotation!
35
+ │ "}" ▶─────╮ │ ├─╼ read_inlineface! [readexpr!]
36
+ │ ╰──────┼─╼ end_style! ╰─╼ read_face_or_keyval!
37
+ │ addpart!(...) │
38
+ ╰╌╌╌╌╌┬╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
39
+ │
40
+ ▼
41
+ Result
42
+ ```
43
+
44
+ Of course, as usual, the devil is in the details.
45
+ """
6
46
module StyledMarkup
7
47
8
48
using Base: AnnotatedString, annotations, annotatedstring
9
49
using .. StyledStrings: Face, SimpleColor
10
50
11
51
export @styled_str , styled
12
52
53
+ """
54
+ State
55
+
56
+ A struct representing of the parser state (if you squint, a state monad even).
57
+
58
+ To create the initial state, use the constructor:
59
+ State(content::AbstractString, mod::Union{Module, Nothing}=nothing) -> State
60
+
61
+ Its fields are as follows:
62
+ - `content::String`, the (unescaped) input string
63
+ - `bytes::Vector{UInt8}`, the codeunits of `content`. This is a `Vector{UInt8}` instead of a
64
+ `CodeUnits{UInt8}` because we need to be able to modify the array, for instance when erasing
65
+ escape characters.
66
+ - `s::Iterators.Stateful`, an `(index, char)` iterator of `content`
67
+ - `mod::Union{Module, Nothing}`, the (optional) context with which to evaluate inline
68
+ expressions in. This should be provided iff the styled markup comes from a macro invocation.
69
+ - `parts::Vector{Any}`, the result of the parsing, a list of elements that when passed to
70
+ `annotatedstring` produce the styled markup string. The types of its values are highly diverse,
71
+ hence the `Any` element type.
72
+ - `active_styles::Vector{Vector{Tuple{Int, Int, Union{Symbol, Expr, Pair{Symbol, Any}}}}}}`,
73
+ A list of batches of styles that have yet to be applied to any content. Entries of a batch
74
+ consist of `(source_position, start_position, style)` tuples, where `style` may be just
75
+ a symbol (referring to a face), a `Pair{Symbol, Any}` annotation, or an `Expr` that evaluates
76
+ to a valid annotation (when `mod` is set).
77
+ - `pending_styles::Vector{Tuple{UnitRange{Int}, Union{Symbol, Expr, Pair{Symbol, Any}}}}`,
78
+ A list of styles that have been terminated, and so are known to occur over a certain range,
79
+ but have yet to be applied.
80
+ - `offset::Ref{Int}`, a record of the between the `content` index and the index in the resulting
81
+ styled string, as markup structures are absorbed.
82
+ - `point::Ref{Int}`, the current index in `content`.
83
+ - `escape::Ref{Bool}`, whether the last character seen was an escape character.
84
+ - `interpolated::Ref{Bool}`, whether any interpolated values have been seen. Knowing whether or not
85
+ anything needs to be evaluated allows the resulting string to be computed at macroexpansion time,
86
+ when possible.
87
+ - `errors::Vector`, any errors raised during parsing. We collect them instead of immediately throwing
88
+ so that we can list as many issues as possible at once, instead of forcing the author of the invalid
89
+ styled markup to resolve each issue one at a time. This is expected to be populated by invocations of
90
+ `styerr!`.
91
+ """
13
92
struct State
14
- content:: String # the (unescaped) input string
15
- bytes:: Vector{UInt8} # bytes of `content`
16
- s:: Iterators.Stateful # (index, char) interator of `content`
17
- mod:: Union{Module, Nothing} # the context to evaluate in (when part of a macro)
18
- parts:: Vector{Any} # the final result
19
- active_styles:: Vector { # unterminated batches of styles, [(source_pos, start, style), ...]
20
- Vector{Tuple{Int, Int, Union{Symbol, Expr, Pair{Symbol, Any}}}}}
21
- pending_styles:: Vector { # terminated styles that have yet to be applied, [(range, style), ...]
22
- Tuple{UnitRange{Int}, Union{Symbol, Expr, Pair{Symbol, Any}}}}
23
- offset:: Ref{Int} # drift in the `content` index as structures are absorbed
24
- point:: Ref{Int} # current index in `content`
25
- escape:: Ref{Bool} # whether the last char was an escape char
26
- interpolated:: Ref{Bool} # whether any string interpolation occurs
27
- errors:: Vector # any errors raised during parsing
93
+ content:: String
94
+ bytes:: Vector{UInt8}
95
+ s:: Iterators.Stateful
96
+ mod:: Union{Module, Nothing}
97
+ parts:: Vector{Any}
98
+ active_styles:: Vector {Vector{Tuple{Int, Int, Union{Symbol, Expr, Pair{Symbol, Any}}}}}
99
+ pending_styles:: Vector {Tuple{UnitRange{Int}, Union{Symbol, Expr, Pair{Symbol, Any}}}}
100
+ offset:: Ref{Int}
101
+ point:: Ref{Int}
102
+ escape:: Ref{Bool}
103
+ interpolated:: Ref{Bool}
104
+ errors:: Vector
28
105
end
29
106
30
107
function State (content:: AbstractString , mod:: Union{Module, Nothing} = nothing )
@@ -48,15 +125,39 @@ const VALID_WEIGHTS = ("thin", "extralight", "light", "semilight", "normal",
48
125
const VALID_SLANTS = (" italic" , " oblique" , " normal" )
49
126
const VALID_UNDERLINE_STYLES = (" straight" , " double" , " curly" , " dotted" , " dashed" )
50
127
128
+ """
129
+ isnextchar(state::State, char::Char) -> Bool
130
+ isnextchar(state::State, chars::NTuple{N, Char}) -> Bool
131
+
132
+ Check if `state` has a next character, and if so whether it is `char` or one of `chars`.
133
+ """
134
+ function isnextchar end
135
+
51
136
isnextchar (state:: State , c:: Char ) =
52
137
! isempty (state. s) && last (peek (state. s)) == c
53
138
54
139
isnextchar (state:: State , cs:: NTuple{N, Char} ) where {N} =
55
140
! isempty (state. s) && last (peek (state. s)) ∈ cs
56
141
142
+ """
143
+ ismacro(state::State) -> Bool
144
+
145
+ Check whether `state` is indicated to come from a macro invocation,
146
+ according to whether `state.mod` is set or not.
147
+
148
+ While this function is rather trivial, it clarifies the intent when used instead
149
+ of just checking `state.mod`.
150
+ """
57
151
ismacro (state:: State ) = ! isnothing (state. mod)
58
152
59
- function styerr! (state:: State , message, position:: Union{Nothing, Int} = nothing , hint:: String = " around here" )
153
+ """
154
+ styerr!(state::State, message::AbstractString, position::Union{Nothing, Int}=nothing, hint::String="around here")
155
+
156
+ Register an error in `state` based on erroneous content at or around `position`
157
+ (if known, and with a certain `hint` as to the location), with the nature of the
158
+ error given by `message`.
159
+ """
160
+ function styerr! (state:: State , message:: AbstractString , position:: Union{Nothing, Int} = nothing , hint:: String = " around here" )
60
161
if ! isnothing (position) && position < 0
61
162
position = prevind (
62
163
state. content,
@@ -71,9 +172,25 @@ function styerr!(state::State, message, position::Union{Nothing, Int}=nothing, h
71
172
nothing
72
173
end
73
174
175
+ """
176
+ hygienic_eval(state::State, expr)
177
+
178
+ Evaluate `expr` within the scope of `state`'s module.
179
+ This replicates part of the behind-the-scenes behaviour of macro expansion, we
180
+ just need to manually invoke it due to the particularities around dealing with
181
+ code from a foreign module that we parse ourselves.
182
+ """
74
183
hygienic_eval (state:: State , expr) =
75
184
Core. eval (state. mod, Expr (:var"hygienic-scope" , expr, @__MODULE__ ))
76
185
186
+ """
187
+ addpart!(state::State, stop::Int)
188
+
189
+ Create a new part from `state.point` to `stop`, applying all pending styles.
190
+
191
+ This consumes all the content between `state.point` and `stop`, and shifts
192
+ `state.point` to be the index after `stop`.
193
+ """
77
194
function addpart! (state:: State , stop:: Int )
78
195
if state. point[] > stop+ state. offset[]+ ncodeunits (state. content[stop])- 1
79
196
return state. point[] = nextind (state. content, stop) + state. offset[]
@@ -115,6 +232,12 @@ function addpart!(state::State, stop::Int)
115
232
state. point[] = nextind (state. content, stop) + state. offset[]
116
233
end
117
234
235
+ """
236
+ addpart!(state::State, start::Int, expr, stop::Int)
237
+
238
+ Create a new part based on (the eventual evaluation of) `expr`, running from
239
+ `start` to `stop`, taking the currently active styles into account.
240
+ """
118
241
function addpart! (state:: State , start:: Int , expr, stop:: Int )
119
242
if state. point[] < start
120
243
addpart! (state, start)
@@ -152,6 +275,11 @@ function addpart!(state::State, start::Int, expr, stop::Int)
152
275
end
153
276
end
154
277
278
+ """
279
+ escaped!(state::State, i::Int, char::Char)
280
+
281
+ Parse the escaped character `char`, at index `i`, into `state`
282
+ """
155
283
function escaped! (state:: State , i:: Int , char:: Char )
156
284
if char in (' {' , ' }' , ' \\ ' ) || (char == ' $' && ismacro (state))
157
285
deleteat! (state. bytes, i + state. offset[] - 1 )
@@ -172,6 +300,11 @@ function escaped!(state::State, i::Int, char::Char)
172
300
state. escape[] = false
173
301
end
174
302
303
+ """
304
+ interpolated!(state::State, i::Int, _)
305
+
306
+ Interpolate the expression starting at `i`, and add it as a part to `state`.
307
+ """
175
308
function interpolated! (state:: State , i:: Int , _)
176
309
expr, nexti = readexpr! (state, i + ncodeunits (' $' ))
177
310
deleteat! (state. bytes, i + state. offset[])
@@ -181,6 +314,12 @@ function interpolated!(state::State, i::Int, _)
181
314
state. interpolated[] = true
182
315
end
183
316
317
+ """
318
+ readexpr!(state::State, pos::Int = first(popfirst!(state.s)) + 1)
319
+
320
+ Read the expression starting at `pos` in `state.content`, and consume `state.s`
321
+ as appropriate to align the iterator to the end of the expression.
322
+ """
184
323
function readexpr! (state:: State , pos:: Int = first (popfirst! (state. s)) + 1 )
185
324
if isempty (state. s)
186
325
styerr! (state,
@@ -198,13 +337,23 @@ function readexpr!(state::State, pos::Int = first(popfirst!(state.s)) + 1)
198
337
expr, nextpos
199
338
end
200
339
340
+ """
341
+ skipwhitespace!(state::State)
201
342
343
+ Skip forwards all space, tab, and newline characters in `state.s`
344
+ """
202
345
function skipwhitespace! (state:: State )
203
346
while isnextchar (state, (' ' , ' \t ' , ' \n ' , ' \r ' ))
204
347
popfirst! (state. s)
205
348
end
206
349
end
207
350
351
+ """
352
+ begin_style!(state::State, i::Int, char::Char)
353
+
354
+ Parse the style declaration beginning at `i` (`char`) with `read_annotation!`,
355
+ and register it in the active styles list.
356
+ """
208
357
function begin_style! (state:: State , i:: Int , char:: Char )
209
358
hasvalue = false
210
359
newstyles = Vector{Tuple{Int, Int, Union{Symbol, Expr, Pair{Symbol, Any}}}}()
@@ -220,16 +369,35 @@ function begin_style!(state::State, i::Int, char::Char)
220
369
end
221
370
end
222
371
372
+ """
373
+ end_style!(state::State, i::Int, char::Char)
374
+
375
+ Close of the most recent active style in `state`, making it a pending style.
376
+ """
223
377
function end_style! (state:: State , i:: Int , char:: Char )
224
- # Close off most recent active style
225
378
for (_, start, annot) in pop! (state. active_styles)
226
379
pushfirst! (state. pending_styles, (start: i+ state. offset[], annot))
227
380
end
228
381
deleteat! (state. bytes, i + state. offset[])
229
382
state. offset[] -= ncodeunits (' }' )
230
383
end
231
384
232
- function read_annotation! (state:: State , i:: Int , char:: Char , newstyles)
385
+ """
386
+ read_annotation!(state::State, i::Int, char::Char, newstyles::Vector) -> Bool
387
+
388
+ Read the annotations at `i` (`char`), and push the style read to `newstyles`.
389
+
390
+ This skips whitespace and checks what the next character in `state.s` is,
391
+ detects the form of the annotation, and parses it using the appropriate
392
+ specialised function like so:
393
+ - `:`, end of annotation, do nothing
394
+ - `(`, inline face declaration, use `read_inlineface!`
395
+ - otherwise, use `read_face_or_keyval!`
396
+
397
+ After parsing the annotation, returns a boolean value signifying whether there
398
+ is an immediately subsequent annotation to be read.
399
+ """
400
+ function read_annotation! (state:: State , i:: Int , char:: Char , newstyles:: Vector )
233
401
skipwhitespace! (state)
234
402
if isempty (state. s)
235
403
isempty (newstyles) &&
@@ -262,6 +430,12 @@ function read_annotation!(state::State, i::Int, char::Char, newstyles)
262
430
end
263
431
end
264
432
433
+ """
434
+ read_inlineface!(state::State, i::Int, char::Char, newstyles)
435
+
436
+ Read an inline face declaration from `state`, at position `i` (`char`), and add
437
+ it to `newstyles`.
438
+ """
265
439
function read_inlineface! (state:: State , i:: Int , char:: Char , newstyles)
266
440
# Substructure parsing helper functions
267
441
function readalph! (state, lastchar)
@@ -529,6 +703,12 @@ function read_inlineface!(state::State, i::Int, char::Char, newstyles)
529
703
end ))
530
704
end
531
705
706
+ """
707
+ read_face_or_keyval!(state::State, i::Int, char::Char, newstyles)
708
+
709
+ Read an inline face or key-value pair from `state` at position `i` (`char`), and
710
+ add it to `newstyles`.
711
+ """
532
712
function read_face_or_keyval! (state:: State , i:: Int , char:: Char , newstyles)
533
713
function read_curlywrapped! (state)
534
714
popfirst! (state. s) # first '{'
@@ -612,6 +792,15 @@ function read_face_or_keyval!(state::State, i::Int, char::Char, newstyles)
612
792
end
613
793
end
614
794
795
+ """
796
+ run_state_machine!(state::State)
797
+
798
+ Iterate through `state.s`, applying the parsing rules for the top-level of
799
+ syntax and calling the relevant specialised functions.
800
+
801
+ Upon completion, `state.s` should be fully consumed and `state.parts` fully
802
+ populated (along with `state.errors`).
803
+ """
615
804
function run_state_machine! (state:: State )
616
805
# Run the state machine
617
806
for (i, char) in state. s
0 commit comments