Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions lib/rdoc/markdown.kpeg
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,20 @@
end
end

# Escape character that has special meaning in RDoc format.
# To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped.

def rdoc_escape(text)
text.gsub(/[*+<\\_]/) {|s| "\\#{s}" }
end

# Escape link url that contains brackets.
# Brackets needs escape because link url will be surrounded by `[]` in RDoc format.

def rdoc_link_url_escape(text)
text.gsub(/[\[\]\\]/) {|s| "\\#{s}" }
end

##
# :category: Extensions
#
Expand Down Expand Up @@ -969,11 +983,11 @@ Space = @Spacechar+ { " " }

Str = @StartList:a
< @NormalChar+ > { a = text }
( StrChunk:c { a << c } )* { a }
( StrChunk:c { a << c } )* { rdoc_escape(a) }

StrChunk = < (@NormalChar | /_+/ &Alphanumeric)+ > { text }

EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text }
EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) }

Entity = ( HexEntity | DecEntity | CharEntity ):a { a }

Expand All @@ -988,7 +1002,7 @@ TerminalEndline = @Sp @Newline @Eof
LineBreak = " " @NormalEndline { RDoc::Markup::HardBreak.new }

Symbol = < @SpecialChar >
{ text }
{ rdoc_escape(text) }

# This keeps the parser from getting bogged down on long strings of '*' or '_',
# or strings of '*' or '_' with space on each side:
Expand Down Expand Up @@ -1053,7 +1067,7 @@ ReferenceLinkSingle = Label:content < (Spnl "[]")? >
{ link_to content, content, text }

ExplicitLink = ExplicitLinkWithLabel:a
{ "{#{a[:label]}}[#{a[:link]}]" }
{ "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" }

ExplicitLinkWithLabel = Label:label "(" @Sp Source:link Spnl Title @Sp ")"
{ { label: label, link: link } }
Expand Down Expand Up @@ -1163,12 +1177,12 @@ Newline = %literals.Newline
Spacechar = %literals.Spacechar

HexEntity = /&#x/i < /[0-9a-fA-F]+/ > ";"
{ [text.to_i(16)].pack 'U' }
{ rdoc_escape([text.to_i(16)].pack('U')) }
DecEntity = "&#" < /[0-9]+/ > ";"
{ [text.to_i].pack 'U' }
{ rdoc_escape([text.to_i].pack('U')) }
CharEntity = "&" </[A-Za-z0-9]+/ > ";"
{ if entity = HTML_ENTITIES[text] then
entity.pack 'U*'
rdoc_escape(entity.pack('U*'))
else
"&#{text};"
end
Expand Down
56 changes: 35 additions & 21 deletions lib/rdoc/markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,20 @@ def emphasis text
end
end

# Escape character that has special meaning in RDoc format.
# To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped.

def rdoc_escape(text)
text.gsub(/[*+<\\_]/) {|s| "\\#{s}" }
end

# Escape link url that contains brackets.
# Brackets needs escape because link url will be surrounded by `[]` in RDoc format.

def rdoc_link_url_escape(text)
text.gsub(/[\[\]\\]/) {|s| "\\#{s}" }
end

##
# :category: Extensions
#
Expand Down Expand Up @@ -9731,7 +9745,7 @@ def _Space
return _tmp
end

# Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a }
# Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }
def _Str

_save = self.pos
Expand Down Expand Up @@ -9792,7 +9806,7 @@ def _Str
self.pos = _save
break
end
@result = begin; a ; end
@result = begin; rdoc_escape(a) ; end
_tmp = true
unless _tmp
self.pos = _save
Expand Down Expand Up @@ -9894,7 +9908,7 @@ def _StrChunk
return _tmp
end

# EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text }
# EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) }
def _EscapedChar

_save = self.pos
Expand All @@ -9921,7 +9935,7 @@ def _EscapedChar
self.pos = _save
break
end
@result = begin; text ; end
@result = begin; rdoc_escape(text) ; end
_tmp = true
unless _tmp
self.pos = _save
Expand Down Expand Up @@ -10122,7 +10136,7 @@ def _LineBreak
return _tmp
end

# Symbol = < @SpecialChar > { text }
# Symbol = < @SpecialChar > { rdoc_escape(text) }
def _Symbol

_save = self.pos
Expand All @@ -10136,7 +10150,7 @@ def _Symbol
self.pos = _save
break
end
@result = begin; text ; end
@result = begin; rdoc_escape(text) ; end
_tmp = true
unless _tmp
self.pos = _save
Expand Down Expand Up @@ -11189,7 +11203,7 @@ def _ReferenceLinkSingle
return _tmp
end

# ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{a[:link]}]" }
# ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" }
def _ExplicitLink

_save = self.pos
Expand All @@ -11200,7 +11214,7 @@ def _ExplicitLink
self.pos = _save
break
end
@result = begin; "{#{a[:label]}}[#{a[:link]}]" ; end
@result = begin; "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" ; end
_tmp = true
unless _tmp
self.pos = _save
Expand Down Expand Up @@ -14615,7 +14629,7 @@ def _Spacechar
return _tmp
end

# HexEntity = /&#x/i < /[0-9a-fA-F]+/ > ";" { [text.to_i(16)].pack 'U' }
# HexEntity = /&#x/i < /[0-9a-fA-F]+/ > ";" { rdoc_escape([text.to_i(16)].pack('U')) }
def _HexEntity

_save = self.pos
Expand All @@ -14639,7 +14653,7 @@ def _HexEntity
self.pos = _save
break
end
@result = begin; [text.to_i(16)].pack 'U' ; end
@result = begin; rdoc_escape([text.to_i(16)].pack('U')) ; end
_tmp = true
unless _tmp
self.pos = _save
Expand All @@ -14651,7 +14665,7 @@ def _HexEntity
return _tmp
end

# DecEntity = "&#" < /[0-9]+/ > ";" { [text.to_i].pack 'U' }
# DecEntity = "&#" < /[0-9]+/ > ";" { rdoc_escape([text.to_i].pack('U')) }
def _DecEntity

_save = self.pos
Expand All @@ -14675,7 +14689,7 @@ def _DecEntity
self.pos = _save
break
end
@result = begin; [text.to_i].pack 'U' ; end
@result = begin; rdoc_escape([text.to_i].pack('U')) ; end
_tmp = true
unless _tmp
self.pos = _save
Expand All @@ -14687,7 +14701,7 @@ def _DecEntity
return _tmp
end

# CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then entity.pack 'U*' else "&#{text};" end }
# CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then rdoc_escape(entity.pack('U*')) else "&#{text};" end }
def _CharEntity

_save = self.pos
Expand All @@ -14712,7 +14726,7 @@ def _CharEntity
break
end
@result = begin; if entity = HTML_ENTITIES[text] then
entity.pack 'U*'
rdoc_escape(entity.pack('U*'))
else
"&#{text};"
end
Expand Down Expand Up @@ -16563,15 +16577,15 @@ def _DefinitionListDefinition
Rules[:_Inlines] = rule_info("Inlines", "(!@Endline Inline:i { i } | @Endline:c !(&{ github? } Ticks3 /[^`\\n]*$/) &Inline { c })+:chunks @Endline? { chunks }")
Rules[:_Inline] = rule_info("Inline", "(Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Symbol)")
Rules[:_Space] = rule_info("Space", "@Spacechar+ { \" \" }")
Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a }")
Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }")
Rules[:_StrChunk] = rule_info("StrChunk", "< (@NormalChar | /_+/ &Alphanumeric)+ > { text }")
Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { text }")
Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { rdoc_escape(text) }")
Rules[:_Entity] = rule_info("Entity", "(HexEntity | DecEntity | CharEntity):a { a }")
Rules[:_Endline] = rule_info("Endline", "(@LineBreak | @TerminalEndline | @NormalEndline)")
Rules[:_NormalEndline] = rule_info("NormalEndline", "@Sp @Newline !@BlankLine !\">\" !AtxStart !(Line /={1,}|-{1,}/ @Newline) { \"\\n\" }")
Rules[:_TerminalEndline] = rule_info("TerminalEndline", "@Sp @Newline @Eof")
Rules[:_LineBreak] = rule_info("LineBreak", "\" \" @NormalEndline { RDoc::Markup::HardBreak.new }")
Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { text }")
Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { rdoc_escape(text) }")
Rules[:_UlOrStarLine] = rule_info("UlOrStarLine", "(UlLine | StarLine):a { a }")
Rules[:_StarLine] = rule_info("StarLine", "(< /\\*{4,}/ > { text } | < @Spacechar /\\*+/ &@Spacechar > { text })")
Rules[:_UlLine] = rule_info("UlLine", "(< /_{4,}/ > { text } | < @Spacechar /_+/ &@Spacechar > { text })")
Expand All @@ -16588,7 +16602,7 @@ def _DefinitionListDefinition
Rules[:_ReferenceLink] = rule_info("ReferenceLink", "(ReferenceLinkDouble | ReferenceLinkSingle)")
Rules[:_ReferenceLinkDouble] = rule_info("ReferenceLinkDouble", "Label:content < Spnl > !\"[]\" Label:label { link_to content, label, text }")
Rules[:_ReferenceLinkSingle] = rule_info("ReferenceLinkSingle", "Label:content < (Spnl \"[]\")? > { link_to content, content, text }")
Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{a[:link]}]\" }")
Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{rdoc_link_url_escape(a[:link])}]\" }")
Rules[:_ExplicitLinkWithLabel] = rule_info("ExplicitLinkWithLabel", "Label:label \"(\" @Sp Source:link Spnl Title @Sp \")\" { { label: label, link: link } }")
Rules[:_Source] = rule_info("Source", "(\"<\" < SourceContents > \">\" | < SourceContents >) { text }")
Rules[:_SourceContents] = rule_info("SourceContents", "((!\"(\" !\")\" !\">\" Nonspacechar)+ | \"(\" SourceContents \")\")*")
Expand Down Expand Up @@ -16631,9 +16645,9 @@ def _DefinitionListDefinition
Rules[:_BOM] = rule_info("BOM", "%literals.BOM")
Rules[:_Newline] = rule_info("Newline", "%literals.Newline")
Rules[:_Spacechar] = rule_info("Spacechar", "%literals.Spacechar")
Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { [text.to_i(16)].pack 'U' }")
Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { [text.to_i].pack 'U' }")
Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then entity.pack 'U*' else \"&\#{text};\" end }")
Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { rdoc_escape([text.to_i(16)].pack('U')) }")
Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { rdoc_escape([text.to_i].pack('U')) }")
Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then rdoc_escape(entity.pack('U*')) else \"&\#{text};\" end }")
Rules[:_NonindentSpace] = rule_info("NonindentSpace", "/ {0,3}/")
Rules[:_Indent] = rule_info("Indent", "/\\t| /")
Rules[:_IndentedLine] = rule_info("IndentedLine", "Indent Line")
Expand Down
3 changes: 2 additions & 1 deletion lib/rdoc/markup/inline_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,10 @@ def scan_token
# Returns nil if no valid URL part is found.
# URL part is enclosed in square brackets and may contain escaped brackets.
# Example: <tt>[http://example.com/?q=\[\]]</tt> represents <tt>http://example.com/?q=[]</tt>.
# If we're accepting rdoc-style links in markdown, url may include <tt>*+<_</tt> with backslash escape.

def read_tidylink_url
bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\])+\]/)
bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\*+<_])+\]/)
bracketed_url[1...-1].gsub(/\\(.)/, '\1') if bracketed_url
end
end
16 changes: 16 additions & 0 deletions test/rdoc/markup/to_html_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,22 @@ def test_convert_TIDYLINK_multiple
assert_equal expected, result
end

def test_convert_TIDYLINK_url_unescape
# markdown: [{label}](http://example.com/foo?q=bar+baz[])
result = @to.convert '{\{label\}}[http://example.com/_foo?q=bar+baz\[\]]'
expected = "\n<p><a href=\"http://example.com/_foo?q=bar+baz[]\">{label}</a></p>\n"
assert_equal expected, result
end

def test_convert_TIDYLINK_rdoc_in_markdown_url_unescape
# markdown: {label}[http://example.com/?q=<+_*]
# The ubove text is a plain text in markdown, so <+_* are escaped in HTML.
# If we're accepting rdoc-style link in markdown, these escape should be allowed in [url] part.
result = @to.convert '{label}[http://example.com/?q=\<\+\_\*]'
expected = "\n<p><a href=\"http://example.com/?q=&lt;+_*\">label</a></p>\n"
assert_equal expected, result
end

def test_convert_TIDYLINK_with_code_label
result = @to.convert '{Link to +Foo+}[https://example.com]'

Expand Down
57 changes: 53 additions & 4 deletions test/rdoc/rdoc_markdown_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,11 @@ def test_parse_emphasis_underscore
end

def test_parse_emphasis_underscore_embedded
doc = parse "foo_bar bar_baz\n"
doc = parse "foo_bar bar_baz _em1_ *em2*\n"

expected =
doc(
para("foo_bar bar_baz"))
para("foo\\_bar bar\\_baz _em1_ _em2_"))

assert_equal expected, doc
end
Expand All @@ -494,15 +494,64 @@ def test_parse_emphasis_underscore_in_word

expected =
doc(
para("it foo_bar_baz"))
para("it foo\\_bar\\_baz"))

assert_equal expected, doc
end

def test_rdoc_code_escaped_in_normal_text
doc = parse "+notcode+ \\+notcode+ \\\\+notcode+"
expected = doc(para("\\+notcode\\+ \\+notcode\\+ \\\\\\+notcode\\+"))
assert_equal expected, doc
end

def test_escape_character_entities
doc = parse "&#x3C;tt>&#x2A;\\</tt> &#60;tt>&#43;\\</tt> &lt;tt>&lowbar;\\</tt>"
expected = doc(para("\\<tt>\\*\\</tt> \\<tt>\\+\\</tt> \\<tt>\\_\\</tt>"))
assert_equal expected, doc
end

def test_rdoc_escape_in_markdown_styling
doc = parse "_a \\_b\\_ c_ **+d+** `_1+2*3`"
expected = doc(para("<em>a \\_b\\_ c</em> <b>\\+d\\+</b> <code>_1+2*3</code>"))
assert_equal expected, doc
end

def test_rdoc_heading_escaped_inside_markdown
doc = parse "= notheading\n"
expected = doc(para("= notheading"))
assert_equal expected, doc
end

def test_rdoc_code_escaped_inside_markdown
doc = parse "~~+notcode+~~"
expected = doc(para("<del>\\+notcode\\+</del>"))
assert_equal expected, doc
end

def test_no_rdoc_escape_inside_markdown_code
doc = parse "`+foo+`"
expected = doc(para("<code>+foo+</code>"))
assert_equal expected, doc
end

def test_rdoc_format_escaped_inside_markdown_link
doc = parse "[Link +to+ `tap{ +1+ }`](http://example.com/?q=[])"
expected = doc(para("{Link \\+to\\+ <code>tap{ +1+ }</code>}[http://example.com/?q=\\[\\]]"))
assert_equal expected, doc
end

def test_lt_escape
doc = parse "\\<b>`a`\\</b> <b>\\</b>`b`</b>"
expected = doc(para("\\<b><code>a</code>\\</b> <b>\\</b><code>b</code></b>"))
assert_equal expected, doc
end

def test_parse_escape
assert_equal doc(para("Backtick: `")), parse("Backtick: \\`")

assert_equal doc(para("Backslash: \\")), parse("Backslash: \\\\")
# Unescaped as markdown and then escaped as RDoc
assert_equal doc(para("Backslash: \\\\")), parse("Backslash: \\\\")

assert_equal doc(para("Colon: :")), parse("Colon: \\:")
end
Expand Down
Loading
Loading