Skip to content

Commit 061a4d8

Browse files
move global characters arrays into the parser
1 parent 2afe77a commit 061a4d8

File tree

5 files changed

+136
-62
lines changed

5 files changed

+136
-62
lines changed

api_test/main.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,66 @@ static void verify_custome_attributes_node(test_batch_runner *runner) {
12341234
check_markdown_attributes_node(runner, "^[](rainbow: 'extreme')", CMARK_NODE_ATTRIBUTE, "rainbow: 'extreme'");
12351235
}
12361236

1237+
typedef void (*reentrant_call_func) (void);
1238+
1239+
static cmark_node *reentrant_parse_inline_ext(cmark_syntax_extension *self, cmark_parser *parser,
1240+
cmark_node *parent, unsigned char character,
1241+
cmark_inline_parser *inline_parser) {
1242+
void *priv = cmark_syntax_extension_get_private(self);
1243+
if (priv) {
1244+
reentrant_call_func func = (reentrant_call_func)priv;
1245+
func();
1246+
cmark_syntax_extension_set_private(self, NULL, NULL);
1247+
}
1248+
1249+
return NULL;
1250+
}
1251+
1252+
static void run_inner_parser() {
1253+
cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
1254+
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough"));
1255+
1256+
static const char markdown[] = "this is the ~~outer~~ inner document";
1257+
cmark_parser_feed(parser, markdown, sizeof(markdown) - 1);
1258+
1259+
cmark_node *doc = cmark_parser_finish(parser);
1260+
cmark_node_free(doc);
1261+
cmark_parser_free(parser);
1262+
}
1263+
1264+
static void parser_interrupt(test_batch_runner *runner) {
1265+
cmark_gfm_core_extensions_ensure_registered();
1266+
1267+
cmark_syntax_extension *my_ext = cmark_syntax_extension_new("interrupt");
1268+
cmark_syntax_extension_set_private(my_ext, run_inner_parser, NULL);
1269+
cmark_syntax_extension_set_match_inline_func(my_ext, reentrant_parse_inline_ext);
1270+
1271+
cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
1272+
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough"));
1273+
cmark_parser_attach_syntax_extension(parser, my_ext);
1274+
1275+
static const char markdown[] = "this is the ~~inner~~ outer document";
1276+
cmark_parser_feed(parser, markdown, sizeof(markdown) - 1);
1277+
1278+
cmark_node *doc = cmark_parser_finish(parser);
1279+
char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT);
1280+
STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1281+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
1282+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
1283+
" <paragraph>\n"
1284+
" <text xml:space=\"preserve\">this is the </text>\n"
1285+
" <strikethrough>\n"
1286+
" <text xml:space=\"preserve\">inner</text>\n"
1287+
" </strikethrough>\n"
1288+
" <text xml:space=\"preserve\"> outer document</text>\n"
1289+
" </paragraph>\n"
1290+
"</document>\n", "interrupting the parser should still allow extensions");
1291+
1292+
free(xml);
1293+
cmark_node_free(doc);
1294+
cmark_parser_free(parser);
1295+
}
1296+
12371297
int main() {
12381298
int retval;
12391299
test_batch_runner *runner = test_batch_runner_new();
@@ -1267,6 +1327,7 @@ int main() {
12671327
inline_only_opt(runner);
12681328
preserve_whitespace_opt(runner);
12691329
verify_custome_attributes_node(runner);
1330+
parser_interrupt(runner);
12701331

12711332
test_print_summary(runner);
12721333
retval = test_ok(runner) ? 0 : 1;

src/blocks.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,16 @@ int cmark_parser_attach_syntax_extension(cmark_parser *parser,
9494
cmark_syntax_extension *extension) {
9595
parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension);
9696
if (extension->match_inline || extension->insert_inline_from_delim) {
97+
if (!parser->inline_syntax_extensions) {
98+
// if we're loading an inline extension into this parser for the first time,
99+
// allocate new buffers for the inline parser character arrays
100+
parser->skip_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256);
101+
cmark_set_default_skip_chars(&parser->skip_chars, true);
102+
103+
parser->special_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256);
104+
cmark_set_default_special_chars(&parser->special_chars, true);
105+
}
106+
97107
parser->inline_syntax_extensions = cmark_llist_append(
98108
parser->mem, parser->inline_syntax_extensions, extension);
99109
}
@@ -132,6 +142,9 @@ static void cmark_parser_reset(cmark_parser *parser) {
132142
parser->syntax_extensions = saved_exts;
133143
parser->inline_syntax_extensions = saved_inline_exts;
134144
parser->options = saved_options;
145+
146+
cmark_set_default_skip_chars(&parser->skip_chars, false);
147+
cmark_set_default_special_chars(&parser->special_chars, false);
135148
}
136149

137150
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
@@ -417,9 +430,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
417430
for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
418431
unsigned char c = (unsigned char)(size_t)tmp_char->data;
419432
if (add)
420-
cmark_inlines_add_special_character(c, ext->emphasis);
433+
cmark_inlines_add_special_character(parser, c, ext->emphasis);
421434
else
422-
cmark_inlines_remove_special_character(c, ext->emphasis);
435+
cmark_inlines_remove_special_character(parser, c, ext->emphasis);
423436
}
424437
}
425438
}

src/include/inlines.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
extern "C" {
66
#endif
77

8+
#include <stdlib.h>
9+
#include "cmark-gfm_config.h"
810
#include "references.h"
911

1012
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
@@ -19,8 +21,11 @@ void cmark_parse_inlines(cmark_parser *parser,
1921
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
2022
cmark_map *refmap);
2123

22-
void cmark_inlines_add_special_character(unsigned char c, bool emphasis);
23-
void cmark_inlines_remove_special_character(unsigned char c, bool emphasis);
24+
void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis);
25+
void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis);
26+
27+
void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy);
28+
void cmark_set_default_special_chars(int8_t **special_chars, bool use_memcpy);
2429

2530
#ifdef __cplusplus
2631
}

src/include/parser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define CMARK_PARSER_H
33

44
#include <stdio.h>
5+
#include <stdlib.h>
56
#include "references.h"
67
#include "node.h"
78
#include "buffer.h"
@@ -49,6 +50,9 @@ struct cmark_parser {
4950
cmark_llist *syntax_extensions;
5051
cmark_llist *inline_syntax_extensions;
5152
cmark_ispunct_func backslash_ispunct;
53+
/* used when parsing inlines, can be populated by extensions if any are loaded */
54+
int8_t *skip_chars;
55+
int8_t *special_chars;
5256
};
5357

5458
#ifdef __cplusplus

src/inlines.c

Lines changed: 49 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "scanners.h"
1414
#include "inlines.h"
1515
#include "syntax_extension.h"
16-
#include "mutex.h"
1716

1817
static const char *EMDASH = "\xE2\x80\x94";
1918
static const char *ENDASH = "\xE2\x80\x93";
@@ -64,10 +63,14 @@ typedef struct subject{
6463
bool scanned_for_backticks;
6564
} subject;
6665

67-
// Extensions may populate this.
68-
static int8_t SKIP_CHARS[256];
66+
void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy) {
67+
static int8_t default_skip_chars[256];
6968

70-
CMARK_DEFINE_LOCK(chars);
69+
if (use_memcpy)
70+
memcpy(*skip_chars, &default_skip_chars, 256);
71+
else
72+
*skip_chars = default_skip_chars;
73+
}
7174

7275
static CMARK_INLINE bool S_is_line_end_char(char c) {
7376
return (c == '\n' || c == '\r');
@@ -80,7 +83,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
8083

8184
static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
8285
cmark_chunk *buffer, cmark_map *refmap);
83-
static bufsize_t subject_find_special_char(subject *subj, int options);
86+
static bufsize_t subject_find_special_char(cmark_parser *parser, subject *subj, int options);
8487

8588
// Create an inline with a literal string value.
8689
static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
@@ -394,8 +397,8 @@ static cmark_node *handle_backticks(subject *subj, int options) {
394397

395398
// Scan ***, **, or * and return number scanned, or 0.
396399
// Advances position.
397-
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
398-
bool *can_close) {
400+
static int scan_delims(cmark_parser *parser, subject *subj, unsigned char c,
401+
bool *can_open, bool *can_close) {
399402
int numdelims = 0;
400403
bufsize_t before_char_pos, after_char_pos;
401404
int32_t after_char = 0;
@@ -408,19 +411,15 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
408411
} else {
409412
before_char_pos = subj->pos - 1;
410413

411-
CMARK_INITIALIZE_AND_LOCK(chars);
412-
413414
// walk back to the beginning of the UTF_8 sequence:
414-
while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
415+
while ((peek_at(subj, before_char_pos) >> 6 == 2 || parser->skip_chars[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
415416
before_char_pos -= 1;
416417
}
417418
len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
418419
subj->pos - before_char_pos, &before_char);
419-
if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
420+
if (len == -1 || (before_char < 256 && parser->skip_chars[(unsigned char) before_char])) {
420421
before_char = 10;
421422
}
422-
423-
CMARK_UNLOCK(chars);
424423
}
425424

426425
if (c == '\'' || c == '"') {
@@ -438,18 +437,14 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
438437
} else {
439438
after_char_pos = subj->pos;
440439

441-
CMARK_INITIALIZE_AND_LOCK(chars);
442-
443-
while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
440+
while (parser->skip_chars[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
444441
after_char_pos += 1;
445442
}
446443
len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
447444
subj->input.len - after_char_pos, &after_char);
448-
if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
445+
if (len == -1 || (after_char < 256 && parser->skip_chars[(unsigned char) after_char])) {
449446
after_char = 10;
450447
}
451-
452-
CMARK_UNLOCK(chars);
453448
}
454449

455450
left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
@@ -548,13 +543,13 @@ static void push_bracket(subject *subj, bracket_type type, cmark_node *inl_text)
548543
}
549544

550545
// Assumes the subject has a c at the current position.
551-
static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
546+
static cmark_node *handle_delim(cmark_parser *parser, subject *subj, unsigned char c, bool smart) {
552547
bufsize_t numdelims;
553548
cmark_node *inl_text;
554549
bool can_open, can_close;
555550
cmark_chunk contents;
556551

557-
numdelims = scan_delims(subj, c, &can_open, &can_close);
552+
numdelims = scan_delims(parser, subj, c, &can_open, &can_close);
558553

559554
if (c == '\'' && smart) {
560555
contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
@@ -1346,18 +1341,25 @@ static cmark_node *handle_newline(subject *subj) {
13461341
}
13471342

13481343
// "\r\n\\`&_*[]<!"
1349-
static int8_t SPECIAL_CHARS[256] = {
1350-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1351-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1352-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1353-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1354-
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1355-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1356-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1357-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1358-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1359-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1360-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1344+
void cmark_set_default_special_chars(int8_t **special_chars, bool use_memcpy) {
1345+
static int8_t default_special_chars[256] = {
1346+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1347+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1348+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1349+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1350+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1351+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1352+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1353+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1354+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1355+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1356+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1357+
1358+
if (use_memcpy)
1359+
memcpy(*special_chars, &default_special_chars, 256);
1360+
else
1361+
*special_chars = default_special_chars;
1362+
}
13611363

13621364
// " ' . -
13631365
static const char SMART_PUNCT_CHARS[] = {
@@ -1374,41 +1376,30 @@ static const char SMART_PUNCT_CHARS[] = {
13741376
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13751377
};
13761378

1377-
static bufsize_t subject_find_special_char(subject *subj, int options) {
1379+
static bufsize_t subject_find_special_char(cmark_parser *parser, subject *subj, int options) {
13781380
bufsize_t n = subj->pos + 1;
1379-
bufsize_t ret = subj->input.len;
13801381

1381-
CMARK_INITIALIZE_AND_LOCK(chars);
13821382
while (n < subj->input.len) {
1383-
if (SPECIAL_CHARS[subj->input.data[n]]) {
1384-
ret = n;
1385-
break;
1386-
}
1387-
if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) {
1388-
ret = n;
1389-
break;
1390-
}
1383+
if (parser->special_chars[subj->input.data[n]])
1384+
return n;
1385+
if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1386+
return n;
13911387
n++;
13921388
}
1393-
CMARK_UNLOCK(chars);
13941389

1395-
return ret;
1390+
return subj->input.len;
13961391
}
13971392

1398-
void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
1399-
CMARK_INITIALIZE_AND_LOCK(chars);
1400-
SPECIAL_CHARS[c] = 1;
1393+
void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis) {
1394+
parser->special_chars[c] = 1;
14011395
if (emphasis)
1402-
SKIP_CHARS[c] = 1;
1403-
CMARK_UNLOCK(chars);
1396+
parser->skip_chars[c] = 1;
14041397
}
14051398

1406-
void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
1407-
CMARK_INITIALIZE_AND_LOCK(chars);
1408-
SPECIAL_CHARS[c] = 0;
1399+
void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis) {
1400+
parser->special_chars[c] = 0;
14091401
if (emphasis)
1410-
SKIP_CHARS[c] = 0;
1411-
CMARK_UNLOCK(chars);
1402+
parser->skip_chars[c] = 0;
14121403
}
14131404

14141405
static cmark_node *try_extensions(cmark_parser *parser,
@@ -1466,7 +1457,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
14661457
case '_':
14671458
case '\'':
14681459
case '"':
1469-
new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1460+
new_inl = handle_delim(parser, subj, c, (options & CMARK_OPT_SMART) != 0);
14701461
break;
14711462
case '-':
14721463
new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
@@ -1508,7 +1499,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
15081499
if (new_inl != NULL)
15091500
break;
15101501

1511-
endpos = subject_find_special_char(subj, options);
1502+
endpos = subject_find_special_char(parser, subj, options);
15121503
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
15131504
startpos = subj->pos;
15141505
subj->pos = endpos;

0 commit comments

Comments
 (0)