Skip to content

Commit 67faaf8

Browse files
committed
New detector: missing-space-before-default-role.
closes #39
1 parent 964f486 commit 67faaf8

File tree

3 files changed

+62
-14
lines changed

3 files changed

+62
-14
lines changed

sphinxlint.py

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -212,20 +212,33 @@ def match_size(re_match):
212212
return re_match.end() - re_match.start()
213213

214214

215-
def clean_paragraph(paragraph):
216-
paragraph = escape2null(paragraph)
215+
def _clean_heuristic(paragraph, regex):
216+
"""Remove the regex from the paragraph.
217+
218+
The remove starts by most "credible" ones (here lies the dragons).
219+
220+
To remove `(.*)` from `(abc def ghi (jkl)`, a bad move consists of
221+
removing everything (eating a lone `(`), while the most credible
222+
action to take is to remove `(jkl)`, leaving a lone `(`.
223+
"""
217224
while True:
218-
potential_inline_literal = min(
219-
inline_literal_re.finditer(paragraph, overlapped=True),
220-
key=match_size,
221-
default=None,
222-
)
223-
if potential_inline_literal is None:
224-
break
225-
paragraph = (
226-
paragraph[: potential_inline_literal.start()]
227-
+ paragraph[potential_inline_literal.end() :]
225+
candidate = min(
226+
regex.finditer(paragraph, overlapped=True), key=match_size, default=None
228227
)
228+
if candidate is None:
229+
return paragraph
230+
paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :]
231+
232+
233+
def clean_paragraph(paragraph):
234+
"""Removes all good constructs, so detectors can focus on bad ones.
235+
236+
It removes all well formed inline literals, inline internal
237+
targets, and roles.
238+
"""
239+
paragraph = escape2null(paragraph)
240+
paragraph = _clean_heuristic(paragraph, inline_literal_re)
241+
paragraph = _clean_heuristic(paragraph, inline_internal_target_re)
229242
paragraph = normal_role_re.sub("", paragraph)
230243
return paragraph.replace("\x00", "\\")
231244

@@ -353,7 +366,7 @@ def paragraphs(lines):
353366
)
354367

355368

356-
def inline_markup_gen(start_string, end_string):
369+
def inline_markup_gen(start_string, end_string, extra_allowed_before=""):
357370
"""Generate a regex matching an inline markup.
358371
359372
inline_markup_gen('**', '**') geneates a regex matching strong
@@ -363,8 +376,10 @@ def inline_markup_gen(start_string, end_string):
363376
unicode_allowed_before = r"[\p{Ps}\p{Pi}\p{Pf}\p{Pd}\p{Po}]"
364377
ascii_allowed_after = r"""[-.,:;!?/'")\]}>]"""
365378
unicode_allowed_after = r"[\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}]"
379+
if extra_allowed_before:
380+
extra_allowed_before = "|" + extra_allowed_before
366381
return re.compile(
367-
fr"""
382+
rf"""
368383
(?<!\x00) # Both inline markup start-string and end-string must not be preceded by
369384
# an unescaped backslash
370385
@@ -373,6 +388,7 @@ def inline_markup_gen(start_string, end_string):
373388
\s| # or be immediately preceded by whitespace,
374389
{ascii_allowed_before}| # one of the ASCII characters
375390
{unicode_allowed_before} # or a similar non-ASCII punctuation character.
391+
{extra_allowed_before}
376392
)
377393
378394
(?P<inline_markup>
@@ -401,6 +417,7 @@ def inline_markup_gen(start_string, end_string):
401417

402418
# https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
403419
interpreted_text_re = inline_markup_gen("`", "`")
420+
inline_internal_target_re = inline_markup_gen("_`", "`")
404421
inline_literal_re = inline_markup_gen("``", "``")
405422
normal_role_re = re.compile(
406423
f":{SIMPLENAME}:{interpreted_text_re.pattern}", flags=re.VERBOSE | re.DOTALL
@@ -587,6 +604,29 @@ def check_missing_space_before_role(file, lines, options=None):
587604
yield lno, "missing space before role"
588605

589606

607+
@checker(".rst")
608+
def check_missing_space_before_default_role(file, lines, options=None):
609+
"""Search for missing spaces before default role.
610+
611+
Bad: the`sum`
612+
Good: the `sum`
613+
"""
614+
for paragraph_lno, paragraph in paragraphs(lines):
615+
if paragraph.count("|") > 4:
616+
return # we don't handle tables yet.
617+
paragraph = clean_paragraph(paragraph)
618+
paragraph = interpreted_text_re.sub("", paragraph)
619+
for role in inline_markup_gen("`", "`", extra_allowed_before="[^_]").finditer(
620+
paragraph
621+
):
622+
error_offset = paragraph[: role.start()].count("\n")
623+
context = paragraph[role.start() - 3 : role.end()]
624+
yield (
625+
paragraph_lno + error_offset,
626+
f"missing space before default role: {context!r}.",
627+
)
628+
629+
590630
@checker(".rst")
591631
def check_missing_colon_in_role(file, lines, options=None):
592632
"""Search for missing colons in roles.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Lines containing`'RESTART'` mean that the user execution process has been
2+
re-started. This occurs when the user execution process has crashed,
3+
when one requests a restart on the Shell menu, or when one runs code
4+
in an editor window.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Oh yes, the _`Norwegian Blue`. What's, um, what's wrong with it?
2+
3+
4+
Even when there's _`many`, _`of`, _`them`.

0 commit comments

Comments
 (0)