Skip to content

Commit 490a6b7

Browse files
committed
Refactor out main element switch into functions
There are limits to how large functions /should/ be, and lexnext has already blown past them. Now I'm considering object lexing, and it's past time I separate out the largest chunks into helper functions.
1 parent b7a59be commit 490a6b7

File tree

1 file changed

+89
-75
lines changed

1 file changed

+89
-75
lines changed

src/lexer.jl

Lines changed: 89 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -85,87 +85,14 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
8585
elseif newlines > 2 && K"item" state.ctx
8686
Token(settag(K">item", tag(state.ctx)), start - 0x1, start - 0x1), start
8787
elseif newlines != 0
88-
nextelem = if K"table" state.ctx
89-
if chr == UInt8('|')
90-
if ischarat(bytes, pos + 0x1, '-')
91-
lend = lineend(bytes, pos)
92-
Token(K"table_row[1]", pos, lend - 0x1), lend
93-
else
94-
Token(K"<table_row", pos, pos), pos + 0x1
95-
end
96-
else
97-
Token(K">table", start - 0x1, start - 0x1), start
98-
end
99-
elseif K"item" state.ctx && tag(state.ctx) > (pos - linestart)
100-
Token(settag(K">item", tag(state.ctx)), start - 0x1, start - 0x1), start
101-
elseif K"clock" state.ctx
102-
Token(K">clock", start - 0x1, start - 0x1), start
103-
elseif chr == UInt8('*') && pos == linestart && ischarat(bytes, skipchars(bytes, pos, '*'), ' ')
104-
lex_heading(state, bytes, pos)
105-
elseif chr == UInt8(':')
106-
if length(bytes) > pos && iswhitespace(bytes, pos + 0x1) || islineend(bytes, pos + 0x1)
107-
lex_fixedwidth(state, bytes, pos)
108-
else
109-
lex_drawer(state, bytes, pos)
110-
end
111-
elseif chr == UInt8('[') && pos == linestart
112-
fndef = lex_footnotedef(state, bytes, pos)
113-
if fndef != NONE_TOKEN && K"footnote_definition" state.ctx
114-
Token(K">footnote_definition", start - 0x1, start - 0x1), start
115-
else
116-
fndef
117-
end
118-
elseif chr == UInt8('|') && K"table" state.restriction
119-
Token(K"<table", pos, pos), pos
120-
elseif chr == UInt8('#') && ischarat(bytes, pos + 0x1, '+')
121-
lex_hashplus(state, bytes, pos)
122-
elseif chr == UInt8('c') && hasprefix(bytes, pos + 0x1, "lock:")
123-
lex_clock(state, bytes, pos)
124-
elseif chr == UInt8('%') && ischarat(bytes, pos + 0x1, '%')
125-
lex_diarysexp(state, bytes, pos)
126-
elseif chr == UInt8('#') && (length(bytes) > pos && iswhitespace(bytes, pos + 0x1) || islineend(bytes, pos + 0x1))
127-
lex_comment(state, bytes, pos)
128-
elseif chr == UInt8('-') && ischarat(bytes, pos + 0x1, '-')
129-
lex_hrule(state, bytes, pos)
130-
elseif chr == UInt8('\\') && hasprefix(bytes, pos + 0x1, "begin{")
131-
lex_latexenv(state, bytes, pos)
132-
elseif K"heading" state.lastelement
133-
lex_planning(state, bytes, pos)
134-
else
135-
if K"item" state.restriction
136-
lex_item(state, bytes, linestart)
137-
else
138-
NONE_TOKEN
139-
end
140-
end
88+
nextelem = lexnext_element(state, bytes, start, linestart, pos, chr)
14189
if nextelem == NONE_TOKEN && K"paragraph" state.ctx && K"paragraph" state.restriction && linestart < length(bytes)
14290
Token(K"<paragraph", linestart, linestart), linestart
14391
else
14492
nextelem
14593
end
14694
else # No newlines
147-
if K"table" state.ctx && islineend(bytes, pos + 0x1)
148-
if K"table_cell" state.ctx
149-
Token(K">table_cell", pos, pos), pos
150-
elseif K"table_row" state.ctx
151-
Token(K">table_row", pos, pos), pos + 0x1
152-
else
153-
NONE_TOKEN
154-
end
155-
elseif K"table_row" state.ctx
156-
if K"table_cell" state.ctx
157-
cellend = min(length(bytes) % UInt32, nextchar(bytes, pos, ('|', '\n', '\r')))
158-
cellend -= (bytes[cellend] (UInt8('\n'), UInt8('\r'))) % UInt32
159-
Token(K">table_cell", cellend, cellend), cellend
160-
else
161-
if bytes[pos] == UInt8('|')
162-
pos += 0x1
163-
end
164-
Token(K"<table_cell", pos, pos), pos
165-
end
166-
else
167-
NONE_TOKEN
168-
end
95+
lexnext_object(state, bytes, start, linestart, pos, chr)
16996
end
17097
if next != NONE_TOKEN
17198
if K"paragraph" state.ctx
@@ -182,6 +109,93 @@ function lexnext(state::LexerState, bytes::DenseVector{UInt8}, start::UInt32)
182109
end
183110
end
184111

112+
function lexnext_element(state::LexerState, bytes::DenseVector{UInt8},
113+
start::UInt32, linestart::UInt32, pos::UInt32, chr::UInt8)
114+
if K"table" state.ctx
115+
if chr == UInt8('|')
116+
if ischarat(bytes, pos + 0x1, '-')
117+
lend = lineend(bytes, pos)
118+
Token(K"table_row[1]", pos, lend - 0x1), lend
119+
else
120+
Token(K"<table_row", pos, pos), pos + 0x1
121+
end
122+
else
123+
Token(K">table", start - 0x1, start - 0x1), start
124+
end
125+
elseif K"item" state.ctx && tag(state.ctx) > (pos - linestart)
126+
Token(settag(K">item", tag(state.ctx)), start - 0x1, start - 0x1), start
127+
elseif K"clock" state.ctx
128+
Token(K">clock", start - 0x1, start - 0x1), start
129+
elseif chr == UInt8('*') && pos == linestart && ischarat(bytes, skipchars(bytes, pos, '*'), ' ')
130+
lex_heading(state, bytes, pos)
131+
elseif chr == UInt8(':')
132+
if length(bytes) > pos && iswhitespace(bytes, pos + 0x1) || islineend(bytes, pos + 0x1)
133+
lex_fixedwidth(state, bytes, pos)
134+
else
135+
lex_drawer(state, bytes, pos)
136+
end
137+
elseif chr == UInt8('[') && pos == linestart
138+
fndef = lex_footnotedef(state, bytes, pos)
139+
if fndef != NONE_TOKEN && K"footnote_definition" state.ctx
140+
Token(K">footnote_definition", start - 0x1, start - 0x1), start
141+
else
142+
fndef
143+
end
144+
elseif chr == UInt8('|') && K"table" state.restriction
145+
Token(K"<table", pos, pos), pos
146+
elseif chr == UInt8('#')
147+
if ischarat(bytes, pos + 0x1, '+')
148+
lex_hashplus(state, bytes, pos)
149+
elseif length(bytes) > pos && iswhitespace(bytes, pos + 0x1) || islineend(bytes, pos + 0x1)
150+
lex_comment(state, bytes, pos)
151+
else
152+
NONE_TOKEN
153+
end
154+
elseif chr == UInt8('c') && hasprefix(bytes, pos + 0x1, "lock:")
155+
lex_clock(state, bytes, pos)
156+
elseif chr == UInt8('%') && ischarat(bytes, pos + 0x1, '%')
157+
lex_diarysexp(state, bytes, pos)
158+
elseif chr == UInt8('-') && ischarat(bytes, pos + 0x1, '-')
159+
lex_hrule(state, bytes, pos)
160+
elseif chr == UInt8('\\') && hasprefix(bytes, pos + 0x1, "begin{")
161+
lex_latexenv(state, bytes, pos)
162+
elseif K"heading" state.lastelement
163+
lex_planning(state, bytes, pos)
164+
else
165+
if K"item" state.restriction
166+
lex_item(state, bytes, linestart)
167+
else
168+
NONE_TOKEN
169+
end
170+
end
171+
end
172+
173+
function lexnext_object(state::LexerState, bytes::DenseVector{UInt8},
174+
start::UInt32, linestart::UInt32, pos::UInt32, chr::UInt8)
175+
if K"table" state.ctx && islineend(bytes, pos + 0x1)
176+
if K"table_cell" state.ctx
177+
Token(K">table_cell", pos, pos), pos
178+
elseif K"table_row" state.ctx
179+
Token(K">table_row", pos, pos), pos + 0x1
180+
else
181+
NONE_TOKEN
182+
end
183+
elseif K"table_row" state.ctx
184+
if K"table_cell" state.ctx
185+
cellend = min(length(bytes) % UInt32, nextchar(bytes, pos, ('|', '\n', '\r')))
186+
cellend -= (bytes[cellend] (UInt8('\n'), UInt8('\r'))) % UInt32
187+
Token(K">table_cell", cellend, cellend), cellend
188+
else
189+
if bytes[pos] == UInt8('|')
190+
pos += 0x1
191+
end
192+
Token(K"<table_cell", pos, pos), pos
193+
end
194+
else
195+
NONE_TOKEN
196+
end
197+
end
198+
185199

186200
# Greater element lexing
187201

0 commit comments

Comments
 (0)