Skip to content

Commit a308cf9

Browse files
committed
Use hand-rolled scanner for thematic break.
Keep track of the last position where a thematic break failed to match on a line, to avoid rescanning unnecessarily. See commonmark#284.
1 parent fbb5d9a commit a308cf9

File tree

5 files changed

+735
-914
lines changed

5 files changed

+735
-914
lines changed

src/blocks.c

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
105105
parser->column = 0;
106106
parser->first_nonspace = 0;
107107
parser->first_nonspace_column = 0;
108+
parser->thematic_break_kill_pos = 0;
108109
parser->indent = 0;
109110
parser->blank = false;
110111
parser->partially_consumed_tab = false;
@@ -615,6 +616,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
615616
}
616617
}
617618

619+
// Check for thematic break. On failure, return 0 and update
620+
// thematic_break_kill_pos with the index at which the
621+
// parse fails. On success, return length of match.
622+
// "...three or more hyphens, asterisks,
623+
// or underscores on a line by themselves. If you wish, you may use
624+
// spaces between the hyphens or asterisks."
625+
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
626+
bufsize_t offset) {
627+
bufsize_t i;
628+
char c;
629+
char nextc = '\0';
630+
int count;
631+
i = offset;
632+
c = peek_at(input, i);
633+
if (!(c == '*' || c == '_' || c == '-')) {
634+
parser->thematic_break_kill_pos = i;
635+
return 0;
636+
}
637+
count = 1;
638+
while ((nextc = peek_at(input, ++i))) {
639+
if (nextc == c) {
640+
count++;
641+
} else if (nextc != ' ' && nextc != '\t') {
642+
break;
643+
}
644+
}
645+
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
646+
return (i - offset) + 1;
647+
} else {
648+
parser->thematic_break_kill_pos = i;
649+
return 0;
650+
}
651+
}
652+
618653
// Find first nonspace character from current offset, setting
619654
// parser->first_nonspace, parser->first_nonspace_column,
620655
// parser->indent, and parser->blank. Does not advance parser->offset.
@@ -948,7 +983,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
948983
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
949984
} else if (!indented &&
950985
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
951-
(matched = scan_thematic_break(input, parser->first_nonspace))) {
986+
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
987+
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
952988
// it's only now that we know the line is not part of a setext heading:
953989
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
954990
parser->first_nonspace + 1);
@@ -1171,6 +1207,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
11711207
parser->column = 0;
11721208
parser->first_nonspace = 0;
11731209
parser->first_nonspace_column = 0;
1210+
parser->thematic_break_kill_pos = 0;
11741211
parser->indent = 0;
11751212
parser->blank = false;
11761213
parser->partially_consumed_tab = false;

src/parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct cmark_parser {
2222
bufsize_t column;
2323
bufsize_t first_nonspace;
2424
bufsize_t first_nonspace_column;
25+
bufsize_t thematic_break_kill_pos;
2526
int indent;
2627
bool blank;
2728
bool partially_consumed_tab;

0 commit comments

Comments
 (0)