Skip to content

Commit 454c568

Browse files
committed
refactor: keep working on wip docstring parser
The parser is mostly done. This commit will be ammended/fixed up once the docstring parser is completely done, so more details can be found in the accompanying PR.
1 parent 93e3c59 commit 454c568

File tree

2 files changed

+290
-4
lines changed

2 files changed

+290
-4
lines changed

docs/typlodocus/extractor.typ

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818

1919
if line.starts-with("///") {
2020
in-comment = true
21-
current-comment += line.slice(3).trim() + "\n"
21+
current-comment += line.slice(3) + "\n"
2222
} else if in-comment {
2323
if line.starts-with(regex(`#let\s+`.text)) {
24-
let function = parser.parse-function-signature(lines.slice(i))
2524
comments.push((
26-
comment: parser.parse-docstring(current-comment),
27-
signature: function,
25+
comment: parser.parse-docstring-alt(current-comment),
26+
signature: parser.parse-function-signature(lines.slice(i)),
2827
))
2928
}
3029

docs/typlodocus/parser.typ

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
// TODO: see if we can perform documentation diagnostics:
2+
// (1) the diagnostics would include:
3+
// - default argument type validation (would have to look into how do
4+
// this with custom types but it's feasible,)
5+
// - argument name validation (feasible considering we preprocess the
6+
// function signature prior to the docstring, though it would
7+
// require further rework,)
8+
// (2) the diagnostics should panic such that querying fails and the error
9+
// is reported during the doc building process (to better diagnose
10+
// issues whenever this happens in CI, it would be best if the errors
11+
// were also written to some log file or alternatively replaced the
12+
// query operation, and the Python script reported the actual error
13+
// and wrote to file if a specific json schema in the expected query
14+
// results is detected.)
15+
116
#let parse-function-signature(lines) = {
217
let ident = lines
318
.first()
@@ -150,6 +165,278 @@
150165
)
151166
}
152167

168+
// TODO: change the `symbol` key in the corresponding dictionaries with the
169+
// `token` key.
170+
// TODO: see if repetitive code blocks at different stages of the parsing
171+
// process can be refactored into a set of functions that handle such
172+
// functionality in an isolated manner.
173+
#let parse-docstring-alt(string) = {
174+
let indent-ws = 0
175+
let example-fields = (
176+
symbol: (
177+
open: "```example",
178+
close: "```",
179+
),
180+
inside: false,
181+
)
182+
let parameter-fields = (
183+
symbol: (
184+
param: "-",
185+
param-list-open: "(",
186+
param-list-close: ")",
187+
),
188+
inside: false,
189+
inside-param-list: false,
190+
re: (
191+
line: regex(
192+
// TODO: rework the part of the regex parsing the optional type list to
193+
// make it accept only comma-optional separated sequences.
194+
`^-(\s+)((?:\.{2})?[[:alnum:]_-]+)((?:\s+)(?:\()([[:alnum:][:blank:]-_\.,]+)(?:\)))?((?:\s+=\s+)([^:]+))?(:\s+(.+))?`.text,
195+
),
196+
parameter-list: regex(`([[:alnum:]-_\.]+)(?:,){0,1}`.text),
197+
default-param: regex(`\)(\s+=\s+([^:]+))?:(?:\s*)?(.*)?`.text),
198+
),
199+
indent-ws: 0,
200+
)
201+
let result-fields = (
202+
symbol: "->",
203+
inside: false,
204+
re: regex(`^->(\s+)\(?([[:alnum:]-_]+)\)?((?:\s+)(.+))?`.text),
205+
indent-ws: 0,
206+
)
207+
// NOTE: this matches the empty string on non-matching haystacks, so it's
208+
// never `none`.
209+
let comment-ws-re = regex(`^([[:blank:]]*).*`.text)
210+
let arguments = ()
211+
let result = ()
212+
let text = ()
213+
214+
for line in string.split("\n") {
215+
if not (example-fields.inside or parameter-fields.inside or result-fields.inside) {
216+
indent-ws = line.match(comment-ws-re).captures.first().len()
217+
line = line.slice(indent-ws)
218+
219+
if line.len() == 0 and text.last().len() == 0 {
220+
continue
221+
} else if line.starts-with(example-fields.symbol.open) {
222+
example-fields.inside = true
223+
text.push(line.trim(at: end))
224+
} else if line.starts-with(result-fields.symbol) {
225+
let re-result = line.match(result-fields.re)
226+
if re-result != none {
227+
result-fields.indent-ws = re-result.captures.first().len()
228+
result-fields.inside = true
229+
230+
result.push((
231+
type: re-result.captures.at(1),
232+
text: if re-result.captures.at(3) != none {
233+
(re-result.captures.at(3).trim(at: end),)
234+
} else {
235+
()
236+
},
237+
))
238+
239+
continue
240+
}
241+
} else if line.starts-with(parameter-fields.symbol.param) {
242+
let param = line.match(parameter-fields.re.line)
243+
if param != none {
244+
parameter-fields.indent-ws = param.captures.first().len()
245+
parameter-fields.inside = true
246+
247+
arguments.push((
248+
name: param.captures.at(1),
249+
types: if param.captures.at(3) != none {
250+
param
251+
.captures
252+
.at(3)
253+
.matches(parameter-fields.re.parameter-list)
254+
.map(it => it.captures.first())
255+
} else {
256+
()
257+
},
258+
default-value: if param.captures.at(5) != none { param.captures.at(5) } else { none },
259+
text: if param.captures.at(7) != none {
260+
(param.captures.at(7).trim(at: end),)
261+
} else {
262+
()
263+
},
264+
))
265+
266+
continue
267+
}
268+
} else {
269+
text.push(line.trim(at: end))
270+
}
271+
} else if example-fields.inside {
272+
let tmp-ws = line.match(comment-ws-re).captures.first().len()
273+
line = line.slice(tmp-ws)
274+
275+
if line.starts-with(example-fields.symbol.close) { example-fields.inside = false }
276+
text.push(line)
277+
} else if parameter-fields.inside {
278+
let tmp-ws = line.match(comment-ws-re).captures.first().len()
279+
line = line.slice(tmp-ws)
280+
281+
if tmp-ws < indent-ws + parameter-fields.indent-ws + parameter-fields.symbol.param.len() {
282+
parameter-fields.inside = false
283+
indent-ws = tmp-ws
284+
285+
if line.starts-with(result-fields.symbol) {
286+
let re-result = line.match(result-fields.re)
287+
if re-result != none {
288+
result-fields.indent-ws = re-result.captures.first().len()
289+
result-fields.inside = true
290+
291+
result.push((
292+
type: re-result.captures.at(1),
293+
text: if re-result.captures.at(3) != none {
294+
(re-result.captures.at(3).trim(at: end),)
295+
} else {
296+
()
297+
},
298+
))
299+
}
300+
} else if line.starts-with(parameter-fields.symbol.param) {
301+
let param = line.match(parameter-fields.re.line)
302+
if param != none {
303+
parameter-fields.indent-ws = param.captures.first().len()
304+
parameter-fields.inside = true
305+
306+
arguments.push((
307+
name: param.captures.at(1),
308+
types: if param.captures.at(3) != none {
309+
param
310+
.captures
311+
.at(3)
312+
.matches(parameter-fields.re.parameter-list)
313+
.map(it => it.captures.first())
314+
} else {
315+
()
316+
},
317+
default-value: if param.captures.at(5) != none { param.captures.at(5) } else { none },
318+
text: if param.captures.at(7) != none {
319+
(param.captures.at(7).trim(at: end),)
320+
} else {
321+
()
322+
},
323+
))
324+
}
325+
} else {
326+
if text.last().len() == 0 and line.len() == 0 { continue }
327+
text.push(line.trim(at: end))
328+
}
329+
} else {
330+
if parameter-fields.inside-param-list {
331+
if line.starts-with(parameter-fields.symbol.param-list-close) {
332+
parameter-fields.inside-param-list = false
333+
334+
let result = line.match(parameter-fields.re.default-param)
335+
if result != none { arguments.last().text.push(result.captures.at(1)) }
336+
} else {
337+
let result = line.match(parameter-fields.re.parameter-list)
338+
if result != none { arguments.last().types.push(result.captures.first()) }
339+
}
340+
} else if arguments.last().types.len() == 0 {
341+
assert.eq(
342+
line,
343+
parameter-fields.symbol.param-list-open,
344+
message: ```
345+
Multiline parameter lists must contain a single parenthesis as the opening token.
346+
```.text,
347+
)
348+
parameter-fields.inside-param-list = true
349+
} else {
350+
arguments.last().text.push(line.trim(at: end))
351+
}
352+
}
353+
} else if result-fields.inside {
354+
let tmp-ws = line.match(comment-ws-re).captures.first().len()
355+
line = line.slice(tmp-ws)
356+
357+
if tmp-ws < indent-ws + result-fields.indent-ws + result-fields.symbol.len() {
358+
result-fields.inside = false
359+
indent-ws = tmp-ws
360+
361+
if line.starts-with(result-fields.symbol) {
362+
let re-result = line.match(result-fields.re)
363+
if re-result != none {
364+
result-fields.indent-ws = re-result.captures.first().len()
365+
result-fields.inside = true
366+
367+
result.push((
368+
type: re-result.captures.at(1),
369+
text: if re-result.captures.at(3) != none {
370+
(re-result.captures.at(3).trim(at: end),)
371+
} else {
372+
()
373+
},
374+
))
375+
}
376+
} else if line.starts-with(parameter-fields.symbol.param) {
377+
let param = line.match(parameter-fields.re.line)
378+
if param != none {
379+
parameter-fields.indent-ws = param.captures.first().len()
380+
parameter-fields.inside = true
381+
382+
arguments.push((
383+
name: param.captures.at(1),
384+
types: if param.captures.at(3) != none {
385+
param
386+
.captures
387+
.at(3)
388+
.matches(parameter-fields.re.parameter-list)
389+
.map(it => it.captures.first())
390+
} else {
391+
()
392+
},
393+
default-value: if param.captures.at(5) != none { param.captures.at(5) } else { none },
394+
text: if param.captures.at(7) != none {
395+
(param.captures.at(7).trim(at: end),)
396+
} else {
397+
()
398+
},
399+
))
400+
}
401+
} else if line.starts-with(example-fields.symbol.open) {
402+
example-fields.inside = true
403+
text.push(line.trim(at: end))
404+
} else {
405+
if text.last().len() == 0 and line.len() == 0 { continue }
406+
text.push(line.trim(at: end))
407+
}
408+
} else {
409+
result.last().text.push(line.trim(at: end))
410+
}
411+
}
412+
}
413+
414+
return (
415+
raw: string,
416+
text: text.join("\n", default: "").trim(),
417+
arguments: arguments.map(it => (
418+
..it,
419+
text: it
420+
.text
421+
.join(
422+
"\n",
423+
default: "",
424+
)
425+
.trim(),
426+
)),
427+
result: result.map(it => (
428+
..it,
429+
text: it
430+
.text
431+
.join(
432+
"\n",
433+
default: "",
434+
)
435+
.trim(),
436+
)),
437+
)
438+
}
439+
153440
#let parse-docstring(string) = {
154441
let argument-re = regex("-\s+(\.*[_a-zA-Z]+[-\w]*)\s+(\\(.*?\\))?(\s*=\s*.*?)?:(.*)")
155442
let result-re = regex("->\s+\(?([_a-zA-Z]+[-\w]*)\)?\s*(.*)")

0 commit comments

Comments
 (0)