Skip to content

Commit 81752fe

Browse files
authored
Merge pull request #4 from nulab/fix/html-comment-stackoverflow
[fix]Experimental measures to avoid stack overflow using HTML comment…
2 parents ab4a8a3 + 31c1b94 commit 81752fe

File tree

2 files changed

+38
-19
lines changed
  • flexmark-core-test/src/test/java/com/vladsch/flexmark/core/test/util/parser
  • flexmark/src/main/java/com/vladsch/flexmark/ast/util

2 files changed

+38
-19
lines changed

flexmark-core-test/src/test/java/com/vladsch/flexmark/core/test/util/parser/ParserTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,25 @@ public void test_escapeOrderedListDotOnlyNoNumbered() {
636636
assertEquals("2\\)abc", unEscape("2\\)abc", parser));
637637
}
638638

639+
@Test
640+
public void stackOverflowReproductionTest() {
641+
// Test to reproduce potential StackOverflowError with deeply nested blockquotes
642+
// Content: "> 0<!-- ...\n" + (("> " + "あ".repeat(150) + "\n") * 100)
643+
644+
StringBuilder contentBuilder = new StringBuilder("> 0<!-- ...\n");
645+
String repeatedLine = "> " + "あ".repeat(150) + "\n";
646+
for (int i = 0; i < 100; i++) {
647+
contentBuilder.append(repeatedLine);
648+
}
649+
String content = contentBuilder.toString();
650+
651+
Parser parser = Parser.builder().build();
652+
Document document = parser.parse(content);
653+
654+
// Verify that parsing completed without StackOverflowError
655+
assertNotNull(document);
656+
}
657+
639658
private String firstText(Node n) {
640659
while (!(n instanceof Text)) {
641660
assertThat(n, notNullValue());

flexmark/src/main/java/com/vladsch/flexmark/ast/util/Parsing.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public class Parsing {
5656
final private static String ST_ADDITIONAL_CHARS_SET_IDI = "[\u001f]";
5757
final private static String ST_ADDITIONAL_CHARS_SET_NO_IDI = "";
5858

59-
final public static String ST_HTMLCOMMENT = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->";
59+
final public static String ST_HTMLCOMMENT = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*+-->";
6060
final public static String ST_PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
6161
final public static String ST_CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>";
6262
final public static String ST_SINGLEQUOTEDVALUE = "'[^']*'";
@@ -127,31 +127,31 @@ public class Parsing {
127127
final public Pattern LINE_END = ST_LINE_END;
128128

129129
// IntelliJDummyIdentifier dependent
130-
final private static String ST_TAGNAME_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_IDI + "-]*";
131-
final private static String ST_TAGNAME_NO_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_NO_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "-]*";
130+
final private static String ST_TAGNAME_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_IDI + "-]*+";
131+
final private static String ST_TAGNAME_NO_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_NO_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "-]*+";
132132

133-
final private static String ST_UNQUOTEDVALUE_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_IDI + "]+";
134-
final private static String ST_UNQUOTEDVALUE_NO_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]+";
133+
final private static String ST_UNQUOTEDVALUE_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_IDI + "]++";
134+
final private static String ST_UNQUOTEDVALUE_NO_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]++";
135135

136-
final private static String ST_ATTRIBUTENAME_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_IDI + ":._-]*";
137-
final private static String ST_ATTRIBUTENAME_NO_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_NO_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + ":._-]*";
136+
final private static String ST_ATTRIBUTENAME_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_IDI + ":._-]*+";
137+
final private static String ST_ATTRIBUTENAME_NO_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_NO_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + ":._-]*+";
138138

139139
final private static String ST_ATTRIBUTEVALUE_IDI = "(?:" + ST_UNQUOTEDVALUE_IDI + "|" + ST_SINGLEQUOTEDVALUE + "|" + ST_DOUBLEQUOTEDVALUE + ")";
140140
final private static String ST_ATTRIBUTEVALUE_NO_IDI = "(?:" + ST_UNQUOTEDVALUE_NO_IDI + "|" + ST_SINGLEQUOTEDVALUE + "|" + ST_DOUBLEQUOTEDVALUE + ")";
141141

142-
final private static String ST_ATTRIBUTEVALUESPEC_IDI = "(?:" + "\\s*=" + "\\s*" + ST_ATTRIBUTEVALUE_IDI + ")";
143-
final private static String ST_ATTRIBUTEVALUESPEC_NO_IDI = "(?:" + "\\s*=" + "\\s*" + ST_ATTRIBUTEVALUE_NO_IDI + ")";
142+
final private static String ST_ATTRIBUTEVALUESPEC_IDI = "(?:" + "\\s*+=" + "\\s*+" + ST_ATTRIBUTEVALUE_IDI + ")";
143+
final private static String ST_ATTRIBUTEVALUESPEC_NO_IDI = "(?:" + "\\s*+=" + "\\s*+" + ST_ATTRIBUTEVALUE_NO_IDI + ")";
144144

145-
final private static String ST_CLOSETAG_IDI = "</" + ST_TAGNAME_IDI + "\\s*[>]";
146-
final private static String ST_CLOSETAG_NO_IDI = "</" + ST_TAGNAME_NO_IDI + "\\s*[>]";
147-
final private static String ST_NS_CLOSETAG_IDI = "</" + XML_NAMESPACE + ST_TAGNAME_IDI + "\\s*[>]";
148-
final private static String ST_NS_CLOSETAG_NO_IDI = "</" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + "\\s*[>]";
145+
final private static String ST_CLOSETAG_IDI = "</" + ST_TAGNAME_IDI + "\\s*+[>]";
146+
final private static String ST_CLOSETAG_NO_IDI = "</" + ST_TAGNAME_NO_IDI + "\\s*+[>]";
147+
final private static String ST_NS_CLOSETAG_IDI = "</" + XML_NAMESPACE + ST_TAGNAME_IDI + "\\s*+[>]";
148+
final private static String ST_NS_CLOSETAG_NO_IDI = "</" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + "\\s*+[>]";
149149

150150
final private static String ST_ATTRIBUTE_IDI = "(?:" + "\\s+" + ST_ATTRIBUTENAME_IDI + ST_ATTRIBUTEVALUESPEC_IDI + "?)";
151151
final private static String ST_ATTRIBUTE_NO_IDI = "(?:" + "\\s+" + ST_ATTRIBUTENAME_NO_IDI + ST_ATTRIBUTEVALUESPEC_NO_IDI + "?)";
152152

153-
final private static String ST_DECLARATION_IDI = "<![A-Z" + ST_ADDITIONAL_CHARS_IDI + "]+\\s+[^>]*>";
154-
final private static String ST_DECLARATION_NO_IDI = "<![A-Z" + ST_ADDITIONAL_CHARS_NO_IDI + "]+\\s+[^>]*>";
153+
final private static String ST_DECLARATION_IDI = "<![A-Z" + ST_ADDITIONAL_CHARS_IDI + "]+\\s+[^>]*+>";
154+
final private static String ST_DECLARATION_NO_IDI = "<![A-Z" + ST_ADDITIONAL_CHARS_NO_IDI + "]+\\s+[^>]*+>";
155155

156156
final private static String ST_ENTITY_IDI = "&(?:#x[a-f0-9" + ST_ADDITIONAL_CHARS_IDI + "]{1,8}|#[0-9]{1,8}|[a-z" + ST_ADDITIONAL_CHARS_IDI + "][a-z0-9" + ST_ADDITIONAL_CHARS_IDI + "]{1,31});";
157157
final private static String ST_ENTITY_NO_IDI = "&(?:#x[a-f0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "]{1,8}|#[0-9]{1,8}|[a-z" + ST_ADDITIONAL_CHARS_NO_IDI + "][a-z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "]{1,31});";
@@ -177,10 +177,10 @@ public class Parsing {
177177
final private static String ST_IN_PARENS_W_SP_IDI = "\\((" + ST_REG_CHAR_SP_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
178178
final private static String ST_IN_PARENS_W_SP_NO_IDI = "\\((" + ST_REG_CHAR_SP_NO_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
179179

180-
final private static String ST_OPENTAG_IDI = "<" + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*" + "\\s*/?>";
181-
final private static String ST_OPENTAG_NO_IDI = "<" + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*" + "\\s*/?>";
182-
final private static String ST_NS_OPENTAG_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*" + "\\s*/?>";
183-
final private static String ST_NS_OPENTAG_NO_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*" + "\\s*/?>";
180+
final private static String ST_OPENTAG_IDI = "<" + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*+" + "\\s*+/?>";
181+
final private static String ST_OPENTAG_NO_IDI = "<" + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*+" + "\\s*+/?>";
182+
final private static String ST_NS_OPENTAG_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*+" + "\\s*+/?>";
183+
final private static String ST_NS_OPENTAG_NO_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*+" + "\\s*+/?>";
184184

185185
final private static String ST_REG_CHAR_PARENS_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_IDI + "]";
186186
final private static String ST_REG_CHAR_PARENS_NO_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]";

0 commit comments

Comments
 (0)