Skip to content

Commit 2b52abd

Browse files
authored
Merge pull request #1 from nulab/fix/stack-over-flow
Fix/stack over flow
2 parents bcfe84a + 90a23fd commit 2b52abd

File tree

5 files changed

+148
-10
lines changed

5 files changed

+148
-10
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,9 @@ target/
3838
/assets/comms/
3939
/lib/
4040
/mvn-dependency-tree.txt
41+
42+
# local
43+
.vscode/
44+
.DS_Store
45+
.devcontainer/
46+
.claude/

flexmark-core-test/src/test/java/com/vladsch/flexmark/core/test/util/parser/SpecialInputTest.java

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
package com.vladsch.flexmark.core.test.util.parser;
22

3+
import org.jetbrains.annotations.NotNull;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.junit.Test;
6+
37
import com.vladsch.flexmark.html.HtmlRenderer;
48
import com.vladsch.flexmark.parser.Parser;
5-
import com.vladsch.flexmark.test.util.*;
9+
import com.vladsch.flexmark.test.util.FlexmarkSpecExampleRenderer;
10+
import com.vladsch.flexmark.test.util.RenderingTestCase;
11+
import com.vladsch.flexmark.test.util.SpecExampleRenderer;
12+
import com.vladsch.flexmark.test.util.Strings;
13+
import com.vladsch.flexmark.test.util.TestUtils;
614
import com.vladsch.flexmark.test.util.spec.SpecExample;
715
import com.vladsch.flexmark.util.data.DataHolder;
816
import com.vladsch.flexmark.util.data.DataSet;
917
import com.vladsch.flexmark.util.data.MutableDataSet;
10-
import org.jetbrains.annotations.NotNull;
11-
import org.jetbrains.annotations.Nullable;
12-
import org.junit.Test;
1318

1419
final public class SpecialInputTest extends RenderingTestCase {
1520
final private static DataHolder OPTIONS = new MutableDataSet()
@@ -121,7 +126,19 @@ public void linkLabelLength() {
121126

122127
@Test
123128
public void manyUnderscores() {
124-
assertRendering(Strings.repeat("_", 1000), "<hr />");
129+
assertRendering(Strings.repeat("_", 100000), "<hr />");
130+
}
131+
132+
@Test
133+
public void uncompletedHugeHtmlTag() {
134+
assertRendering("<" + Strings.repeat("a", 1000000), "<p>&lt;" + Strings.repeat("a", 1000000) + "</p>\n");
135+
}
136+
137+
@Test
138+
public void hugeDataUrlInLink() {
139+
String url = "data:image/jpeg;base64," + "A".repeat(5000);
140+
String markdown = "![alt text][image]\n\n[image]: <" + url + ">";
141+
assertRendering(markdown, "<p><img src=\"" + url + "\" alt=\"alt text\" /></p>");
125142
}
126143

127144
@Nullable

flexmark/src/main/java/com/vladsch/flexmark/ast/util/Parsing.java

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import com.vladsch.flexmark.util.data.DataHolder;
55
import com.vladsch.flexmark.util.format.TableFormatOptions;
66
import com.vladsch.flexmark.util.misc.CharPredicate;
7+
import com.vladsch.flexmark.util.sequence.BasedSequence;
78
import com.vladsch.flexmark.util.sequence.Escaping;
89
import com.vladsch.flexmark.util.sequence.SequenceUtils;
910
import org.jetbrains.annotations.NotNull;
@@ -23,7 +24,8 @@ public class Parsing {
2324
// final public static String XML_NAMESPACE_CHAR = XML_NAME_SPACE_START + "|-|.|[0-9]";
2425
final public static String XML_NAMESPACE_START = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"; // excluded [#x10000-#xEFFFF]
2526
final public static String XML_NAMESPACE_CHAR = XML_NAMESPACE_START + "|[.0-9\u00B7\u0300-\u036F\u203F-\u2040-]";
26-
final public static String XML_NAMESPACE = "(?:(?:" + XML_NAMESPACE_START + ")(?:" + XML_NAMESPACE_CHAR + ")*:)?";
27+
// NOTE: Set name length limit to avoid stack overflow on pathological cases
28+
final public static String XML_NAMESPACE = "(?:(?:" + XML_NAMESPACE_START + ")(?:" + XML_NAMESPACE_CHAR + "){0,200}:)?";
2729

2830
// save options for others to use when only parsing instance is available
2931
final public DataHolder options;
@@ -507,4 +509,69 @@ public static boolean isLetter(CharSequence s, int index) {
507509
public static boolean isSpaceOrTab(CharSequence s, int index) {
508510
return CharPredicate.SPACE_TAB.test(SequenceUtils.safeCharAt(s, index));
509511
}
512+
513+
private static final String ESCAPABLE_CHARS = "\"#$%&'()*+,./:;<=>?@[]\\^_`{|}~-";
514+
/**
515+
* Parse angled link destination without regex catastrophic backtracking.
516+
* Replaces regex-based parsing to prevent StackOverflowError on large URLs.
517+
*
518+
* @param input the input sequence
519+
* @param startIndex starting position
520+
* @param spaceInLinkUrl whether spaces are allowed in URLs
521+
* @return parsed result or null if no match
522+
*/
523+
@Nullable
524+
public static BasedSequence parseAngledLinkDestination(@NotNull BasedSequence input, int startIndex, boolean spaceInLinkUrl) {
525+
if (startIndex >= input.length() || input.charAt(startIndex) != '<') {
526+
return null;
527+
}
528+
529+
int pos = startIndex + 1;
530+
531+
while (pos < input.length()) {
532+
int nextPos = pos + 1;
533+
char c = input.charAt(pos);
534+
535+
// End of angled destination
536+
if (c == '>') {
537+
return input.subSequence(startIndex, pos + 1);
538+
}
539+
540+
// Invalid characters
541+
if (c == '<' || c == '\0' || c == '\t' || c == '\n' || c == '\r') {
542+
return null;
543+
}
544+
545+
// Space handling
546+
if (c == ' ') {
547+
if (!spaceInLinkUrl) {
548+
return null;
549+
}
550+
// Check lookahead for space followed by quote
551+
if (nextPos < input.length()) {
552+
char next = input.charAt(nextPos);
553+
if (next == '"' || next == '\'' || next == '(') {
554+
return null;
555+
}
556+
}
557+
}
558+
559+
// Escape sequence handling
560+
if (c == '\\') {
561+
if (nextPos >= input.length()) {
562+
return null; // Incomplete escape at end
563+
}
564+
565+
char escapedChar = input.charAt(nextPos);
566+
if (ESCAPABLE_CHARS.indexOf(escapedChar) > 0) {
567+
pos++; // Skip the escaped character
568+
}
569+
}
570+
571+
pos++;
572+
}
573+
574+
// No closing '>' found
575+
return null;
576+
}
510577
}

flexmark/src/main/java/com/vladsch/flexmark/parser/core/ThematicBreakParser.java

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,56 @@
1212
import java.util.Arrays;
1313
import java.util.HashSet;
1414
import java.util.Set;
15-
import java.util.regex.Pattern;
1615

1716
public class ThematicBreakParser extends AbstractBlockParser {
1817

19-
static Pattern PATTERN = Pattern.compile("^(?:(?:\\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$");
18+
/**
19+
* Checks if the given input matches the thematic break pattern without using regex
20+
* to avoid StackOverflowError with very long sequences.
21+
*
22+
* Pattern: ^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$
23+
* - At least 3 occurrences of *, _, or - characters
24+
* - Characters can be separated by spaces or tabs
25+
* - Only one type of character allowed per line
26+
* - Line can end with spaces or tabs
27+
*/
28+
private static boolean matchesThematicBreak(BasedSequence input) {
29+
int length = input.length();
30+
if (length == 0) return false;
31+
32+
// Skip leading whitespace
33+
int pos = 0;
34+
while (pos < length && isWhitespace(input.charAt(pos))) {
35+
pos++;
36+
}
37+
38+
if (pos >= length) return false;
39+
40+
char patternChar = input.charAt(pos);
41+
if (patternChar != '*' && patternChar != '_' && patternChar != '-') {
42+
return false;
43+
}
44+
45+
int charCount = 0;
46+
while (pos < length) {
47+
char c = input.charAt(pos);
48+
if (c == patternChar) {
49+
charCount++;
50+
} else if (isWhitespace(c)) {
51+
// Whitespace is allowed between pattern characters
52+
} else {
53+
// Invalid character found
54+
return false;
55+
}
56+
pos++;
57+
}
58+
59+
return charCount >= 3;
60+
}
61+
62+
private static boolean isWhitespace(char c) {
63+
return c == ' ' || c == '\t';
64+
}
2065

2166
final private ThematicBreak block = new ThematicBreak();
2267

@@ -96,7 +141,7 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar
96141
}
97142
BasedSequence line = state.getLine();
98143
final BasedSequence input = line.subSequence(state.getNextNonSpaceIndex(), line.length());
99-
if (PATTERN.matcher(input).matches()) {
144+
if (matchesThematicBreak(input)) {
100145
return BlockStart.of(new ThematicBreakParser(line.subSequence(state.getIndex()))).atIndex(line.length());
101146
} else {
102147
return BlockStart.none();

flexmark/src/main/java/com/vladsch/flexmark/parser/internal/InlineParserImpl.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.vladsch.flexmark.ast.*;
44
import com.vladsch.flexmark.ast.util.ReferenceRepository;
55
import com.vladsch.flexmark.ast.util.TextNodeConverter;
6+
import com.vladsch.flexmark.ast.util.Parsing;
67
import com.vladsch.flexmark.parser.*;
78
import com.vladsch.flexmark.parser.block.CharacterNodeFactory;
89
import com.vladsch.flexmark.parser.block.ParagraphPreProcessor;
@@ -1181,8 +1182,10 @@ protected static void collapseLinkRefChildren(Node node, Function<LinkRefDerived
11811182
*/
11821183
@Override
11831184
public BasedSequence parseLinkDestination() {
1184-
BasedSequence res = match(myParsing.LINK_DESTINATION_ANGLES);
1185+
// NOTE: Use linear parser to prevent StackOverflowError on large URLs
1186+
BasedSequence res = Parsing.parseAngledLinkDestination(input, index, options.spaceInLinkUrls);
11851187
if (res != null) {
1188+
index += res.length();
11861189
return res;
11871190
} else {
11881191
if (linkDestinationParser != null) {

0 commit comments

Comments
 (0)