diff --git a/docs/conf.py b/docs/conf.py index 290eac7f..e468b853 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,6 +52,7 @@ ".*_NodeType", ".*Literal.*", ".*_Result", + ".*_State", "EnvType", "Path", "Ellipsis", diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index 93989eb5..c98323c0 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -6,17 +6,15 @@ class _Result: - __slots__ = ("lines", "ok", "pos", "str") + __slots__ = ("ok", "pos", "str") def __init__(self) -> None: self.ok = False self.pos = 0 - self.lines = 0 self.str = "" def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: - lines = 0 start = pos result = _Result() @@ -80,7 +78,6 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: return result result.str = unescapeAll(string[start:pos]) - result.lines = lines result.pos = pos result.ok = True return result diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py index f002c7c4..a38ff0d9 100644 --- a/markdown_it/helpers/parse_link_title.py +++ b/markdown_it/helpers/parse_link_title.py @@ -3,58 +3,73 @@ from ..common.utils import charCodeAt, unescapeAll -class _Result: - __slots__ = ("lines", "ok", "pos", "str") +class _State: + __slots__ = ("can_continue", "marker", "ok", "pos", "str") def __init__(self) -> None: self.ok = False + """if `true`, this is a valid link title""" + self.can_continue = False + """if `true`, this link can be continued on the next line""" self.pos = 0 - self.lines = 0 + """if `ok`, it's the position of the first character after the closing marker""" self.str = "" + """if `ok`, it's the unescaped title""" + self.marker = 0 + """expected closing marker character code""" def __str__(self) -> str: return self.str -def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result: - lines = 0 - start = pos - result = _Result() +def parseLinkTitle( + string: str, start: int, maximum: int, prev_state: _State | None = None +) -> _State: + """Parse link title within `str` in [start, max] range, + or continue previous parsing if `prev_state` is defined (equal to result of last execution). + """ + pos = start + state = _State() - if pos >= maximum: - return result + if prev_state is not None: + # this is a continuation of a previous parseLinkTitle call on the next line, + # used in reference links only + state.str = prev_state.str + state.marker = prev_state.marker + else: + if pos >= maximum: + return state - marker = charCodeAt(string, pos) + marker = charCodeAt(string, pos) - # /* " */ /* ' */ /* ( */ - if marker != 0x22 and marker != 0x27 and marker != 0x28: - return result + # /* " */ /* ' */ /* ( */ + if marker != 0x22 and marker != 0x27 and marker != 0x28: + return state - pos += 1 + start += 1 + pos += 1 + + # if opening marker is "(", switch it to closing marker ")" + if marker == 0x28: + marker = 0x29 - # if opening marker is "(", switch it to closing marker ")" - if marker == 0x28: - marker = 0x29 + state.marker = marker while pos < maximum: code = charCodeAt(string, pos) - if code == marker: - title = string[start + 1 : pos] - title = unescapeAll(title) - result.pos = pos + 1 - result.lines = lines - result.str = title - result.ok = True - return result - elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */ - return result - elif code == 0x0A: - lines += 1 + if code == state.marker: + state.pos = pos + 1 + state.str += unescapeAll(string[start:pos]) + state.ok = True + return state + elif code == 0x28 and state.marker == 0x29: # /* ( */ /* ) */ + return state elif code == 0x5C and pos + 1 < maximum: # /* \ */ pos += 1 - if charCodeAt(string, pos) == 0x0A: - lines += 1 pos += 1 - return result + # no closing marker found, but this link title may continue on the next line (for references) + state.can_continue = True + state.str += unescapeAll(string[start:pos]) + return state diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index b77944b2..ad94d409 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -11,7 +11,6 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent ) - lines = 0 pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] nextLine = startLine + 1 @@ -22,51 +21,9 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> if state.src[pos] != "[": return False - # Simple check to quickly interrupt scan on [link](url) at the start of line. - # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 - while pos < maximum: - # /* ] */ /* \ */ /* : */ - if state.src[pos] == "]" and state.src[pos - 1] != "\\": - if pos + 1 == maximum: - return False - if state.src[pos + 1] != ":": - return False - break - pos += 1 - - endLine = state.lineMax - - # jump line-by-line until empty one or EOF - terminatorRules = state.md.block.ruler.getRules("reference") + string = state.src[pos : maximum + 1] - oldParentType = state.parentType - state.parentType = "reference" - - while nextLine < endLine and not state.isEmpty(nextLine): - # this would be a code block normally, but after paragraph - # it's considered a lazy continuation regardless of what's there - if state.sCount[nextLine] - state.blkIndent > 3: - nextLine += 1 - continue - - # quirk for blockquotes, this line should already be checked by that rule - if state.sCount[nextLine] < 0: - nextLine += 1 - continue - - # Some tags can terminate paragraph without empty line. - terminate = False - for terminatorRule in terminatorRules: - if terminatorRule(state, nextLine, endLine, True): - terminate = True - break - - if terminate: - break - - nextLine += 1 - - string = state.getLines(startLine, nextLine, state.blkIndent, False).strip() + # string = state.getLines(startLine, nextLine, state.blkIndent, False).strip() maximum = len(string) labelEnd = None @@ -79,11 +36,20 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> labelEnd = pos break elif ch == 0x0A: # /* \n */ - lines += 1 + if (lineContent := getNextLine(state, nextLine)) is not None: + string += lineContent + maximum = len(string) + nextLine += 1 elif ch == 0x5C: # /* \ */ pos += 1 - if pos < maximum and charCodeAt(string, pos) == 0x0A: - lines += 1 + if ( + pos < maximum + and charCodeAt(string, pos) == 0x0A + and (lineContent := getNextLine(state, nextLine)) is not None + ): + string += lineContent + maximum = len(string) + nextLine += 1 pos += 1 if ( @@ -97,7 +63,10 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> while pos < maximum: ch = charCodeAt(string, pos) if ch == 0x0A: - lines += 1 + if (lineContent := getNextLine(state, nextLine)) is not None: + string += lineContent + maximum = len(string) + nextLine += 1 elif isSpace(ch): pass else: @@ -106,20 +75,19 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> # [label]: destination 'title' # ^^^^^^^^^^^ parse this - res = state.md.helpers.parseLinkDestination(string, pos, maximum) - if not res.ok: + destRes = state.md.helpers.parseLinkDestination(string, pos, maximum) + if not destRes.ok: return False - href = state.md.normalizeLink(res.str) + href = state.md.normalizeLink(destRes.str) if not state.md.validateLink(href): return False - pos = res.pos - lines += res.lines + pos = destRes.pos # save cursor state, we could require to rollback later destEndPos = pos - destEndLineNo = lines + destEndLineNo = nextLine # [label]: destination 'title' # ^^^ skipping those spaces @@ -127,7 +95,10 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> while pos < maximum: ch = charCodeAt(string, pos) if ch == 0x0A: - lines += 1 + if (lineContent := getNextLine(state, nextLine)) is not None: + string += lineContent + maximum = len(string) + nextLine += 1 elif isSpace(ch): pass else: @@ -136,15 +107,23 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> # [label]: destination 'title' # ^^^^^^^ parse this - res = state.md.helpers.parseLinkTitle(string, pos, maximum) - if pos < maximum and start != pos and res.ok: - title = res.str - pos = res.pos - lines += res.lines + titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, None) + while titleRes.can_continue: + if (lineContent := getNextLine(state, nextLine)) is None: + break + string += lineContent + pos = maximum + maximum = len(string) + nextLine += 1 + titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, titleRes) + + if pos < maximum and start != pos and titleRes.ok: + title = titleRes.str + pos = titleRes.pos else: title = "" pos = destEndPos - lines = destEndLineNo + nextLine = destEndLineNo # skip trailing spaces until the rest of the line while pos < maximum: @@ -158,7 +137,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> # but it could still be a valid reference if we roll back title = "" pos = destEndPos - lines = destEndLineNo + nextLine = destEndLineNo while pos < maximum: ch = charCodeAt(string, pos) if not isSpace(ch): @@ -181,7 +160,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> if "references" not in state.env: state.env["references"] = {} - state.line = startLine + lines + 1 + state.line = nextLine # note, this is not part of markdown-it JS, but is useful for renderers if state.md.options.get("inline_definitions", False): @@ -210,6 +189,47 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> } ) - state.parentType = oldParentType - return True + + +def getNextLine(state: StateBlock, nextLine: int) -> None | str: + endLine = state.lineMax + + if nextLine >= endLine or state.isEmpty(nextLine): + # empty line or end of input + return None + + isContinuation = False + + # this would be a code block normally, but after paragraph + # it's considered a lazy continuation regardless of what's there + if state.is_code_block(nextLine): + isContinuation = True + + # quirk for blockquotes, this line should already be checked by that rule + if state.sCount[nextLine] < 0: + isContinuation = True + + if not isContinuation: + terminatorRules = state.md.block.ruler.getRules("reference") + oldParentType = state.parentType + state.parentType = "reference" + + # Some tags can terminate paragraph without empty line. + terminate = False + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + + state.parentType = oldParentType + + if terminate: + # terminated by another block + return None + + pos = state.bMarks[nextLine] + state.tShift[nextLine] + maximum = state.eMarks[nextLine] + + # max + 1 explicitly includes the newline + return state.src[pos : maximum + 1] diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index b4a32a9f..005105b1 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -66,7 +66,7 @@ def image(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^^^^^^ parsing link title - res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax) + res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax, None) if pos < max and start != pos and res.ok: title = res.str pos = res.pos diff --git a/tests/test_port/fixtures/commonmark_extras.md b/tests/test_port/fixtures/commonmark_extras.md index 5d13d859..f0b31dbd 100644 --- a/tests/test_port/fixtures/commonmark_extras.md +++ b/tests/test_port/fixtures/commonmark_extras.md @@ -49,6 +49,40 @@ Reference labels: support ligatures (equivalent according to unicode case foldin

fffifl

. +Reference can be interrupted by other rules +. +[foo]: /url 'title + - - - +' + +[foo] +. +

[foo]: /url 'title

+
+

'

+

[foo]

+. + +Escape character in link reference title doesn't escape newlines +. +[foo]: /url " +hello +\ +\ +\ +world +" + +[foo] +. +

foo

+. Issue #35. `<` should work as punctuation .