ruby · tompng · Jan 5, 2026
diff --git a/lib/rdoc/markdown.kpeg b/lib/rdoc/markdown.kpeg
@@ -303,6 +303,20 @@
     end
   end
 
+  # Escape character that has special meaning in RDoc format.
+  # To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped.
+
+  def rdoc_escape(text)
+    text.gsub(/[*+<\\_]/) {|s| "\\#{s}" }
+  end
+
+  # Escape link url that contains brackets.
+  # Brackets needs escape because link url will be surrounded by `[]` in RDoc format.
+
+  def rdoc_link_url_escape(text)
+    text.gsub(/[\[\]\\]/) {|s| "\\#{s}" }
+  end
+
   ##
   # :category: Extensions
   #
@@ -969,11 +983,11 @@ Space = @Spacechar+ { " " }
 
 Str = @StartList:a
       < @NormalChar+ > { a = text }
-      ( StrChunk:c { a << c } )* { a }
+      ( StrChunk:c { a << c } )* { rdoc_escape(a) }
 
 StrChunk = < (@NormalChar | /_+/ &Alphanumeric)+ > { text }
 
-EscapedChar =   "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text }
+EscapedChar =   "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) }
 
 Entity =    ( HexEntity | DecEntity | CharEntity ):a { a }
 
@@ -988,7 +1002,7 @@ TerminalEndline = @Sp @Newline @Eof
 LineBreak = "  " @NormalEndline { RDoc::Markup::HardBreak.new }
 
 Symbol = < @SpecialChar >
-         { text }
+         { rdoc_escape(text) }
 
 # This keeps the parser from getting bogged down on long strings of '*' or '_',
 # or strings of '*' or '_' with space on each side:
@@ -1053,7 +1067,7 @@ ReferenceLinkSingle = Label:content < (Spnl "[]")? >
                       { link_to content, content, text }
 
 ExplicitLink = ExplicitLinkWithLabel:a
-               { "{#{a[:label]}}[#{a[:link]}]" }
+               { "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" }
 
 ExplicitLinkWithLabel = Label:label "(" @Sp Source:link Spnl Title @Sp ")"
                         { { label: label, link: link } }
@@ -1163,12 +1177,12 @@ Newline           = %literals.Newline
 Spacechar         = %literals.Spacechar
 
 HexEntity  = /&#x/i < /[0-9a-fA-F]+/ > ";"
-             { [text.to_i(16)].pack 'U' }
+             { rdoc_escape([text.to_i(16)].pack('U')) }
 DecEntity  = "&#"   < /[0-9]+/       > ";"
-             { [text.to_i].pack 'U' }
+             { rdoc_escape([text.to_i].pack('U')) }
 CharEntity = "&"    </[A-Za-z0-9]+/  > ";"
              { if entity = HTML_ENTITIES[text] then
-                 entity.pack 'U*'
+                 rdoc_escape(entity.pack('U*'))
                else
                  "&#{text};"
                end

diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb
@@ -688,6 +688,20 @@ def emphasis text
     end
   end
 
+  # Escape character that has special meaning in RDoc format.
+  # To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped.
+
+  def rdoc_escape(text)
+    text.gsub(/[*+<\\_]/) {|s| "\\#{s}" }
+  end
+
+  # Escape link url that contains brackets.
+  # Brackets needs escape because link url will be surrounded by `[]` in RDoc format.
+
+  def rdoc_link_url_escape(text)
+    text.gsub(/[\[\]\\]/) {|s| "\\#{s}" }
+  end
+
   ##
   # :category: Extensions
   #
@@ -9731,7 +9745,7 @@ def _Space
     return _tmp
   end
 
-  # Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a }
+  # Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }
   def _Str
 
     _save = self.pos
@@ -9792,7 +9806,7 @@ def _Str
         self.pos = _save
         break
       end
-      @result = begin;  a ; end
+      @result = begin;  rdoc_escape(a) ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -9894,7 +9908,7 @@ def _StrChunk
     return _tmp
   end
 
-  # EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text }
+  # EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) }
   def _EscapedChar
 
     _save = self.pos
@@ -9921,7 +9935,7 @@ def _EscapedChar
         self.pos = _save
         break
       end
-      @result = begin;  text ; end
+      @result = begin;  rdoc_escape(text) ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -10122,7 +10136,7 @@ def _LineBreak
     return _tmp
   end
 
-  # Symbol = < @SpecialChar > { text }
+  # Symbol = < @SpecialChar > { rdoc_escape(text) }
   def _Symbol
 
     _save = self.pos
@@ -10136,7 +10150,7 @@ def _Symbol
         self.pos = _save
         break
       end
-      @result = begin;  text ; end
+      @result = begin;  rdoc_escape(text) ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -11189,7 +11203,7 @@ def _ReferenceLinkSingle
     return _tmp
   end
 
-  # ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{a[:link]}]" }
+  # ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" }
   def _ExplicitLink
 
     _save = self.pos
@@ -11200,7 +11214,7 @@ def _ExplicitLink
         self.pos = _save
         break
       end
-      @result = begin;  "{#{a[:label]}}[#{a[:link]}]" ; end
+      @result = begin;  "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -14615,7 +14629,7 @@ def _Spacechar
     return _tmp
   end
 
-  # HexEntity = /&#x/i < /[0-9a-fA-F]+/ > ";" { [text.to_i(16)].pack 'U' }
+  # HexEntity = /&#x/i < /[0-9a-fA-F]+/ > ";" { rdoc_escape([text.to_i(16)].pack('U')) }
   def _HexEntity
 
     _save = self.pos
@@ -14639,7 +14653,7 @@ def _HexEntity
         self.pos = _save
         break
       end
-      @result = begin;  [text.to_i(16)].pack 'U' ; end
+      @result = begin;  rdoc_escape([text.to_i(16)].pack('U')) ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -14651,7 +14665,7 @@ def _HexEntity
     return _tmp
   end
 
-  # DecEntity = "&#" < /[0-9]+/ > ";" { [text.to_i].pack 'U' }
+  # DecEntity = "&#" < /[0-9]+/ > ";" { rdoc_escape([text.to_i].pack('U')) }
   def _DecEntity
 
     _save = self.pos
@@ -14675,7 +14689,7 @@ def _DecEntity
         self.pos = _save
         break
       end
-      @result = begin;  [text.to_i].pack 'U' ; end
+      @result = begin;  rdoc_escape([text.to_i].pack('U')) ; end
       _tmp = true
       unless _tmp
         self.pos = _save
@@ -14687,7 +14701,7 @@ def _DecEntity
     return _tmp
   end
 
-  # CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then                  entity.pack 'U*'                else                  "&#{text};"                end              }
+  # CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then                  rdoc_escape(entity.pack('U*'))                else                  "&#{text};"                end              }
   def _CharEntity
 
     _save = self.pos
@@ -14712,7 +14726,7 @@ def _CharEntity
         break
       end
       @result = begin;  if entity = HTML_ENTITIES[text] then
-                 entity.pack 'U*'
+                 rdoc_escape(entity.pack('U*'))
                else
                  "&#{text};"
                end
@@ -16563,15 +16577,15 @@ def _DefinitionListDefinition
   Rules[:_Inlines] = rule_info("Inlines", "(!@Endline Inline:i { i } | @Endline:c !(&{ github? } Ticks3 /[^`\\n]*$/) &Inline { c })+:chunks @Endline? { chunks }")
   Rules[:_Inline] = rule_info("Inline", "(Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Symbol)")
   Rules[:_Space] = rule_info("Space", "@Spacechar+ { \" \" }")
-  Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a }")
+  Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }")
   Rules[:_StrChunk] = rule_info("StrChunk", "< (@NormalChar | /_+/ &Alphanumeric)+ > { text }")
-  Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { text }")
+  Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { rdoc_escape(text) }")
   Rules[:_Entity] = rule_info("Entity", "(HexEntity | DecEntity | CharEntity):a { a }")
   Rules[:_Endline] = rule_info("Endline", "(@LineBreak | @TerminalEndline | @NormalEndline)")
   Rules[:_NormalEndline] = rule_info("NormalEndline", "@Sp @Newline !@BlankLine !\">\" !AtxStart !(Line /={1,}|-{1,}/ @Newline) { \"\\n\" }")
   Rules[:_TerminalEndline] = rule_info("TerminalEndline", "@Sp @Newline @Eof")
   Rules[:_LineBreak] = rule_info("LineBreak", "\"  \" @NormalEndline { RDoc::Markup::HardBreak.new }")
-  Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { text }")
+  Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { rdoc_escape(text) }")
   Rules[:_UlOrStarLine] = rule_info("UlOrStarLine", "(UlLine | StarLine):a { a }")
   Rules[:_StarLine] = rule_info("StarLine", "(< /\\*{4,}/ > { text } | < @Spacechar /\\*+/ &@Spacechar > { text })")
   Rules[:_UlLine] = rule_info("UlLine", "(< /_{4,}/ > { text } | < @Spacechar /_+/ &@Spacechar > { text })")
@@ -16588,7 +16602,7 @@ def _DefinitionListDefinition
   Rules[:_ReferenceLink] = rule_info("ReferenceLink", "(ReferenceLinkDouble | ReferenceLinkSingle)")
   Rules[:_ReferenceLinkDouble] = rule_info("ReferenceLinkDouble", "Label:content < Spnl > !\"[]\" Label:label { link_to content, label, text }")
   Rules[:_ReferenceLinkSingle] = rule_info("ReferenceLinkSingle", "Label:content < (Spnl \"[]\")? > { link_to content, content, text }")
-  Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{a[:link]}]\" }")
+  Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{rdoc_link_url_escape(a[:link])}]\" }")
   Rules[:_ExplicitLinkWithLabel] = rule_info("ExplicitLinkWithLabel", "Label:label \"(\" @Sp Source:link Spnl Title @Sp \")\" { { label: label, link: link } }")
   Rules[:_Source] = rule_info("Source", "(\"<\" < SourceContents > \">\" | < SourceContents >) { text }")
   Rules[:_SourceContents] = rule_info("SourceContents", "((!\"(\" !\")\" !\">\" Nonspacechar)+ | \"(\" SourceContents \")\")*")
@@ -16631,9 +16645,9 @@ def _DefinitionListDefinition
   Rules[:_BOM] = rule_info("BOM", "%literals.BOM")
   Rules[:_Newline] = rule_info("Newline", "%literals.Newline")
   Rules[:_Spacechar] = rule_info("Spacechar", "%literals.Spacechar")
-  Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { [text.to_i(16)].pack 'U' }")
-  Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { [text.to_i].pack 'U' }")
-  Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then                  entity.pack 'U*'                else                  \"&\#{text};\"                end              }")
+  Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { rdoc_escape([text.to_i(16)].pack('U')) }")
+  Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { rdoc_escape([text.to_i].pack('U')) }")
+  Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then                  rdoc_escape(entity.pack('U*'))                else                  \"&\#{text};\"                end              }")
   Rules[:_NonindentSpace] = rule_info("NonindentSpace", "/ {0,3}/")
   Rules[:_Indent] = rule_info("Indent", "/\\t|    /")
   Rules[:_IndentedLine] = rule_info("IndentedLine", "Indent Line")

diff --git a/lib/rdoc/markup/inline_parser.rb b/lib/rdoc/markup/inline_parser.rb
@@ -303,9 +303,10 @@ def scan_token
   # Returns nil if no valid URL part is found.
   # URL part is enclosed in square brackets and may contain escaped brackets.
   # Example: <tt>[http://example.com/?q=\[\]]</tt> represents <tt>http://example.com/?q=[]</tt>.
+  # If we're accepting rdoc-style links in markdown, url may include <tt>*+<_</tt> with backslash escape.
 
   def read_tidylink_url
-    bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\])+\]/)
+    bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\*+<_])+\]/)
     bracketed_url[1...-1].gsub(/\\(.)/, '\1') if bracketed_url
   end
 end
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
@@ -736,6 +736,22 @@ def test_convert_TIDYLINK_multiple
     assert_equal expected, result
   end
 
+  def test_convert_TIDYLINK_url_unescape
+    # markdown: [{label}](http://example.com/foo?q=bar+baz[])
+    result = @to.convert '{\{label\}}[http://example.com/_foo?q=bar+baz\[\]]'
+    expected = "\n<p><a href=\"http://example.com/_foo?q=bar+baz[]\">{label}</a></p>\n"
+    assert_equal expected, result
+  end
+
+  def test_convert_TIDYLINK_rdoc_in_markdown_url_unescape
+    # markdown: {label}[http://example.com/?q=<+_*]
+    # The ubove text is a plain text in markdown, so <+_* are escaped in HTML.
+    # If we're accepting rdoc-style link in markdown, these escape should be allowed in [url] part.
+    result = @to.convert '{label}[http://example.com/?q=\<\+\_\*]'
+    expected = "\n<p><a href=\"http://example.com/?q=&lt;+_*\">label</a></p>\n"
+    assert_equal expected, result
+  end
+
   def test_convert_TIDYLINK_with_code_label
     result = @to.convert '{Link to +Foo+}[https://example.com]'
 

diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb
@@ -480,11 +480,11 @@ def test_parse_emphasis_underscore
   end
 
   def test_parse_emphasis_underscore_embedded
-    doc = parse "foo_bar bar_baz\n"
+    doc = parse "foo_bar bar_baz _em1_ *em2*\n"
 
     expected =
       doc(
-        para("foo_bar bar_baz"))
+        para("foo\\_bar bar\\_baz _em1_ _em2_"))
 
     assert_equal expected, doc
   end
@@ -494,15 +494,64 @@ def test_parse_emphasis_underscore_in_word
 
     expected =
       doc(
-        para("it foo_bar_baz"))
+        para("it foo\\_bar\\_baz"))
 
     assert_equal expected, doc
   end
 
+  def test_rdoc_code_escaped_in_normal_text
+    doc = parse "+notcode+ \\+notcode+ \\\\+notcode+"
+    expected = doc(para("\\+notcode\\+ \\+notcode\\+ \\\\\\+notcode\\+"))
+    assert_equal expected, doc
+  end
+
+  def test_escape_character_entities
+    doc = parse "&#x3C;tt>&#x2A;\\</tt> &#60;tt>&#43;\\</tt> &lt;tt>&lowbar;\\</tt>"
+    expected = doc(para("\\<tt>\\*\\</tt> \\<tt>\\+\\</tt> \\<tt>\\_\\</tt>"))
+    assert_equal expected, doc
+  end
+
+  def test_rdoc_escape_in_markdown_styling
+    doc = parse "_a \\_b\\_ c_ **+d+** `_1+2*3`"
+    expected = doc(para("<em>a \\_b\\_ c</em> <b>\\+d\\+</b> <code>_1+2*3</code>"))
+    assert_equal expected, doc
+  end
+
+  def test_rdoc_heading_escaped_inside_markdown
+    doc = parse "= notheading\n"
+    expected = doc(para("= notheading"))
+    assert_equal expected, doc
+  end
+
+  def test_rdoc_code_escaped_inside_markdown
+    doc = parse "~~+notcode+~~"
+    expected = doc(para("<del>\\+notcode\\+</del>"))
+    assert_equal expected, doc
+  end
+
+  def test_no_rdoc_escape_inside_markdown_code
+    doc = parse "`+foo+`"
+    expected = doc(para("<code>+foo+</code>"))
+    assert_equal expected, doc
+  end
+
+  def test_rdoc_format_escaped_inside_markdown_link
+    doc = parse "[Link +to+ `tap{ +1+ }`](http://example.com/?q=[])"
+    expected = doc(para("{Link \\+to\\+ <code>tap{ +1+ }</code>}[http://example.com/?q=\\[\\]]"))
+    assert_equal expected, doc
+  end
+
+  def test_lt_escape
+    doc = parse "\\<b>`a`\\</b> <b>\\</b>`b`</b>"
+    expected = doc(para("\\<b><code>a</code>\\</b> <b>\\</b><code>b</code></b>"))
+    assert_equal expected, doc
+  end
+
   def test_parse_escape
     assert_equal doc(para("Backtick: `")), parse("Backtick: \\`")
 
-    assert_equal doc(para("Backslash: \\")), parse("Backslash: \\\\")
+    # Unescaped as markdown and then escaped as RDoc
+    assert_equal doc(para("Backslash: \\\\")), parse("Backslash: \\\\")
 
     assert_equal doc(para("Colon: :")), parse("Colon: \\:")
   end