@@ -212,20 +212,33 @@ def match_size(re_match):
212212 return re_match .end () - re_match .start ()
213213
214214
215- def clean_paragraph (paragraph ):
216- paragraph = escape2null (paragraph )
215+ def _clean_heuristic (paragraph , regex ):
216+ """Remove the regex from the paragraph.
217+
218+ The remove starts by most "credible" ones (here lies the dragons).
219+
220+ To remove `(.*)` from `(abc def ghi (jkl)`, a bad move consists of
221+ removing everything (eating a lone `(`), while the most credible
222+ action to take is to remove `(jkl)`, leaving a lone `(`.
223+ """
217224 while True :
218- potential_inline_literal = min (
219- inline_literal_re .finditer (paragraph , overlapped = True ),
220- key = match_size ,
221- default = None ,
222- )
223- if potential_inline_literal is None :
224- break
225- paragraph = (
226- paragraph [: potential_inline_literal .start ()]
227- + paragraph [potential_inline_literal .end () :]
225+ candidate = min (
226+ regex .finditer (paragraph , overlapped = True ), key = match_size , default = None
228227 )
228+ if candidate is None :
229+ return paragraph
230+ paragraph = paragraph [: candidate .start ()] + paragraph [candidate .end () :]
231+
232+
233+ def clean_paragraph (paragraph ):
234+ """Removes all good constructs, so detectors can focus on bad ones.
235+
236+ It removes all well formed inline literals, inline internal
237+ targets, and roles.
238+ """
239+ paragraph = escape2null (paragraph )
240+ paragraph = _clean_heuristic (paragraph , inline_literal_re )
241+ paragraph = _clean_heuristic (paragraph , inline_internal_target_re )
229242 paragraph = normal_role_re .sub ("" , paragraph )
230243 return paragraph .replace ("\x00 " , "\\ " )
231244
@@ -353,7 +366,7 @@ def paragraphs(lines):
353366)
354367
355368
356- def inline_markup_gen (start_string , end_string ):
369+ def inline_markup_gen (start_string , end_string , extra_allowed_before = "" ):
357370 """Generate a regex matching an inline markup.
358371
359372 inline_markup_gen('**', '**') geneates a regex matching strong
@@ -363,8 +376,10 @@ def inline_markup_gen(start_string, end_string):
363376 unicode_allowed_before = r"[\p{Ps}\p{Pi}\p{Pf}\p{Pd}\p{Po}]"
364377 ascii_allowed_after = r"""[-.,:;!?/'")\]}>]"""
365378 unicode_allowed_after = r"[\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}]"
379+ if extra_allowed_before :
380+ extra_allowed_before = "|" + extra_allowed_before
366381 return re .compile (
367- fr """
382+ rf """
368383 (?<!\x00) # Both inline markup start-string and end-string must not be preceded by
369384 # an unescaped backslash
370385
@@ -373,6 +388,7 @@ def inline_markup_gen(start_string, end_string):
373388 \s| # or be immediately preceded by whitespace,
374389 { ascii_allowed_before } | # one of the ASCII characters
375390 { unicode_allowed_before } # or a similar non-ASCII punctuation character.
391+ { extra_allowed_before }
376392 )
377393
378394 (?P<inline_markup>
@@ -401,6 +417,7 @@ def inline_markup_gen(start_string, end_string):
401417
402418# https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
403419interpreted_text_re = inline_markup_gen ("`" , "`" )
420+ inline_internal_target_re = inline_markup_gen ("_`" , "`" )
404421inline_literal_re = inline_markup_gen ("``" , "``" )
405422normal_role_re = re .compile (
406423 f":{ SIMPLENAME } :{ interpreted_text_re .pattern } " , flags = re .VERBOSE | re .DOTALL
@@ -587,6 +604,29 @@ def check_missing_space_before_role(file, lines, options=None):
587604 yield lno , "missing space before role"
588605
589606
607+ @checker (".rst" )
608+ def check_missing_space_before_default_role (file , lines , options = None ):
609+ """Search for missing spaces before default role.
610+
611+ Bad: the`sum`
612+ Good: the `sum`
613+ """
614+ for paragraph_lno , paragraph in paragraphs (lines ):
615+ if paragraph .count ("|" ) > 4 :
616+ return # we don't handle tables yet.
617+ paragraph = clean_paragraph (paragraph )
618+ paragraph = interpreted_text_re .sub ("" , paragraph )
619+ for role in inline_markup_gen ("`" , "`" , extra_allowed_before = "[^_]" ).finditer (
620+ paragraph
621+ ):
622+ error_offset = paragraph [: role .start ()].count ("\n " )
623+ context = paragraph [role .start () - 3 : role .end ()]
624+ yield (
625+ paragraph_lno + error_offset ,
626+ f"missing space before default role: { context !r} ." ,
627+ )
628+
629+
590630@checker (".rst" )
591631def check_missing_colon_in_role (file , lines , options = None ):
592632 """Search for missing colons in roles.
0 commit comments