@@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
36
36
return (node -> flags & CMARK_NODE__LAST_LINE_BLANK ) != 0 ;
37
37
}
38
38
39
+ static bool S_last_line_checked (const cmark_node * node ) {
40
+ return (node -> flags & CMARK_NODE__LAST_LINE_CHECKED ) != 0 ;
41
+ }
42
+
39
43
static CMARK_INLINE cmark_node_type S_type (const cmark_node * node ) {
40
44
return (cmark_node_type )node -> type ;
41
45
}
@@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
47
51
node -> flags &= ~CMARK_NODE__LAST_LINE_BLANK ;
48
52
}
49
53
54
+ static void S_set_last_line_checked (cmark_node * node ) {
55
+ node -> flags |= CMARK_NODE__LAST_LINE_CHECKED ;
56
+ }
57
+
50
58
static CMARK_INLINE bool S_is_line_end_char (char c ) {
51
59
return (c == '\n' || c == '\r' );
52
60
}
@@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
121
129
parser -> root = document ;
122
130
parser -> current = document ;
123
131
124
- parser -> last_buffer_ended_with_cr = false;
125
-
126
132
parser -> syntax_extensions = saved_exts ;
127
133
parser -> inline_syntax_extensions = saved_inline_exts ;
128
134
parser -> options = saved_options ;
@@ -234,26 +240,43 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
234
240
235
241
// Check to see if a node ends with a blank line, descending
236
242
// if needed into lists and sublists.
237
- static bool ends_with_blank_line (cmark_node * node ) {
238
- cmark_node * cur = node ;
239
- while (cur != NULL ) {
240
- if (S_last_line_blank (cur )) {
241
- return true;
242
- }
243
- if (S_type (cur ) == CMARK_NODE_LIST || S_type (cur ) == CMARK_NODE_ITEM ) {
244
- cur = cur -> last_child ;
245
- } else {
246
- cur = NULL ;
247
- }
243
+ static bool S_ends_with_blank_line (cmark_node * node ) {
244
+ if (S_last_line_checked (node )) {
245
+ return (S_last_line_blank (node ));
246
+ } else if ((S_type (node ) == CMARK_NODE_LIST ||
247
+ S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
248
+ S_set_last_line_checked (node );
249
+ return (S_ends_with_blank_line (node -> last_child ));
250
+ } else {
251
+ S_set_last_line_checked (node );
252
+ return (S_last_line_blank (node ));
248
253
}
249
- return false;
254
+ }
255
+
256
+ // returns true if content remains after link defs are resolved.
257
+ static bool resolve_reference_link_definitions (
258
+ cmark_parser * parser ,
259
+ cmark_node * b ) {
260
+ bufsize_t pos ;
261
+ cmark_strbuf * node_content = & b -> content ;
262
+ cmark_chunk chunk = {node_content -> ptr , node_content -> size , 0 };
263
+ while (chunk .len && chunk .data [0 ] == '[' &&
264
+ (pos = cmark_parse_reference_inline (parser -> mem , & chunk ,
265
+ parser -> refmap ))) {
266
+
267
+ chunk .data += pos ;
268
+ chunk .len -= pos ;
269
+ }
270
+ cmark_strbuf_drop (node_content , (node_content -> size - chunk .len ));
271
+ return !is_blank (& b -> content , 0 );
250
272
}
251
273
252
274
static cmark_node * finalize (cmark_parser * parser , cmark_node * b ) {
253
275
bufsize_t pos ;
254
276
cmark_node * item ;
255
277
cmark_node * subitem ;
256
278
cmark_node * parent ;
279
+ bool has_content ;
257
280
258
281
parent = b -> parent ;
259
282
assert (b -> flags &
@@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
283
306
switch (S_type (b )) {
284
307
case CMARK_NODE_PARAGRAPH :
285
308
{
286
- cmark_chunk chunk = {node_content -> ptr , node_content -> size , 0 };
287
- while (chunk .len && chunk .data [0 ] == '[' &&
288
- (pos = cmark_parse_reference_inline (parser -> mem , & chunk , parser -> refmap ))) {
289
-
290
- chunk .data += pos ;
291
- chunk .len -= pos ;
292
- }
293
- cmark_strbuf_drop (node_content , (node_content -> size - chunk .len ));
294
- if (is_blank (node_content , 0 )) {
309
+ has_content = resolve_reference_link_definitions (parser , b );
310
+ if (!has_content ) {
295
311
// remove blank node (former reference def)
296
312
cmark_node_free (b );
297
313
}
@@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
343
359
// spaces between them:
344
360
subitem = item -> first_child ;
345
361
while (subitem ) {
346
- if (ends_with_blank_line (subitem ) && (item -> next || subitem -> next )) {
362
+ if ((item -> next || subitem -> next ) &&
363
+ S_ends_with_blank_line (subitem )) {
347
364
b -> as .list .tight = false;
348
365
break ;
349
366
}
@@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
748
765
}
749
766
}
750
767
768
+ // Check for thematic break. On failure, return 0 and update
769
+ // thematic_break_kill_pos with the index at which the
770
+ // parse fails. On success, return length of match.
771
+ // "...three or more hyphens, asterisks,
772
+ // or underscores on a line by themselves. If you wish, you may use
773
+ // spaces between the hyphens or asterisks."
774
+ static int S_scan_thematic_break (cmark_parser * parser , cmark_chunk * input ,
775
+ bufsize_t offset ) {
776
+ bufsize_t i ;
777
+ char c ;
778
+ char nextc = '\0' ;
779
+ int count ;
780
+ i = offset ;
781
+ c = peek_at (input , i );
782
+ if (!(c == '*' || c == '_' || c == '-' )) {
783
+ parser -> thematic_break_kill_pos = i ;
784
+ return 0 ;
785
+ }
786
+ count = 1 ;
787
+ while ((nextc = peek_at (input , ++ i ))) {
788
+ if (nextc == c ) {
789
+ count ++ ;
790
+ } else if (nextc != ' ' && nextc != '\t' ) {
791
+ break ;
792
+ }
793
+ }
794
+ if (count >= 3 && (nextc == '\r' || nextc == '\n' )) {
795
+ return (i - offset ) + 1 ;
796
+ } else {
797
+ parser -> thematic_break_kill_pos = i ;
798
+ return 0 ;
799
+ }
800
+ }
801
+
751
802
// Find first nonspace character from current offset, setting
752
803
// parser->first_nonspace, parser->first_nonspace_column,
753
804
// parser->indent, and parser->blank. Does not advance parser->offset.
@@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1040
1091
bufsize_t matched = 0 ;
1041
1092
int lev = 0 ;
1042
1093
bool save_partially_consumed_tab ;
1094
+ bool has_content ;
1043
1095
int save_offset ;
1044
1096
int save_column ;
1045
1097
@@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1112
1164
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
1113
1165
(lev =
1114
1166
scan_setext_heading_line (input , parser -> first_nonspace ))) {
1115
- (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1116
- (* container )-> as .heading .level = lev ;
1117
- (* container )-> as .heading .setext = true;
1118
- S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1167
+ // finalize paragraph, resolving reference links
1168
+ has_content = resolve_reference_link_definitions (parser , * container );
1169
+
1170
+ if (has_content ) {
1171
+
1172
+ (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1173
+ (* container )-> as .heading .level = lev ;
1174
+ (* container )-> as .heading .setext = true;
1175
+ S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1176
+ }
1119
1177
} else if (!indented &&
1120
1178
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched ) &&
1121
- (matched = scan_thematic_break (input , parser -> first_nonspace ))) {
1179
+ (parser -> thematic_break_kill_pos <= parser -> first_nonspace ) &&
1180
+ (matched = S_scan_thematic_break (parser , input , parser -> first_nonspace ))) {
1122
1181
// it's only now that we know the line is not part of a setext heading:
1123
1182
* container = add_child (parser , * container , CMARK_NODE_THEMATIC_BREAK ,
1124
1183
parser -> first_nonspace + 1 );
@@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1377
1436
parser -> column = 0 ;
1378
1437
parser -> first_nonspace = 0 ;
1379
1438
parser -> first_nonspace_column = 0 ;
1439
+ parser -> thematic_break_kill_pos = 0 ;
1380
1440
parser -> indent = 0 ;
1381
1441
parser -> blank = false;
1382
1442
parser -> partially_consumed_tab = false;
0 commit comments