Skip to content

Commit a595ea9

Browse files
jmschonfeldQuietMisdreavus
authored andcommitted
Add preserve-whitespace and inline-only options
rdar://76711302
1 parent 0489d3c commit a595ea9

File tree

4 files changed

+89
-16
lines changed

4 files changed

+89
-16
lines changed

api_test/main.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,49 @@ static void ref_source_pos(test_batch_runner *runner) {
11521152
cmark_node_free(doc);
11531153
}
11541154

1155+
static void inline_only_opt(test_batch_runner *runner) {
1156+
static const char markdown[] =
1157+
"# My heading\n"
1158+
"> My block quote\n\n"
1159+
"- List item\n\n"
1160+
"[link](https://github.com)\n";
1161+
1162+
cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_INLINE_ONLY);
1163+
char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT, 0);
1164+
STR_EQ(runner, html, "<p># My heading\n"
1165+
"&gt; My block quote\n"
1166+
"\n"
1167+
"- List item\n"
1168+
"\n"
1169+
"<a href=\"https://github.com\">link</a>\n"
1170+
"</p>\n", "html is as expected");
1171+
free(html);
1172+
cmark_node_free(doc);
1173+
}
1174+
1175+
static void check_markdown_plaintext(test_batch_runner *runner, char *markdown) {
1176+
cmark_node *doc = cmark_parse_document(markdown, strlen(markdown), CMARK_OPT_PRESERVE_WHITESPACE);
1177+
cmark_node *pg = cmark_node_first_child(doc);
1178+
INT_EQ(runner, cmark_node_get_type(pg), CMARK_NODE_PARAGRAPH, "markdown '%s' did not produce a paragraph node", markdown);
1179+
cmark_node *textNode = cmark_node_first_child(pg);
1180+
INT_EQ(runner, cmark_node_get_type(textNode), CMARK_NODE_TEXT, "markdown '%s' did not produce a text node inside the paragraph node", markdown);
1181+
const char *text = cmark_node_get_literal(textNode);
1182+
STR_EQ(runner, text, markdown, "markdown '%s' resulted in '%s'", markdown, text);
1183+
}
1184+
1185+
static void preserve_whitespace_opt(test_batch_runner *runner) {
1186+
check_markdown_plaintext(runner, "hello");
1187+
check_markdown_plaintext(runner, "hello ");
1188+
check_markdown_plaintext(runner, " hello");
1189+
check_markdown_plaintext(runner, " hello");
1190+
check_markdown_plaintext(runner, "hello ");
1191+
check_markdown_plaintext(runner, "hel\nlo");
1192+
check_markdown_plaintext(runner, "hel\n\nlo");
1193+
check_markdown_plaintext(runner, "hel\nworld\nlo");
1194+
check_markdown_plaintext(runner, " hel \n world \n lo ");
1195+
check_markdown_plaintext(runner, " hello \n \n world ");
1196+
}
1197+
11551198
int main() {
11561199
int retval;
11571200
test_batch_runner *runner = test_batch_runner_new();
@@ -1182,6 +1225,8 @@ int main() {
11821225
source_pos(runner);
11831226
source_pos_inlines(runner);
11841227
ref_source_pos(runner);
1228+
inline_only_opt(runner);
1229+
preserve_whitespace_opt(runner);
11851230

11861231
test_print_summary(runner);
11871232
retval = test_ok(runner) ? 0 : 1;

src/blocks.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
6767
size_t len, bool eof);
6868

6969
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
70-
bufsize_t bytes);
70+
bufsize_t bytes, bool ensureEndsInNewline);
7171

7272
static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
7373
int start_line, int start_column) {
@@ -687,6 +687,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
687687
size_t len, bool eof) {
688688
const unsigned char *end = buffer + len;
689689
static const uint8_t repl[] = {239, 191, 189};
690+
bool preserveWhitespace = parser->options & CMARK_OPT_PRESERVE_WHITESPACE;
690691

691692
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
692693
// skip NL if last buffer ended with CR ; see #117
@@ -714,10 +715,10 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
714715
if (process) {
715716
if (parser->linebuf.size > 0) {
716717
cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
717-
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
718+
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size, !preserveWhitespace || !eof || eol < end);
718719
cmark_strbuf_clear(&parser->linebuf);
719720
} else {
720-
S_process_line(parser, buffer, chunk_len);
721+
S_process_line(parser, buffer, chunk_len, !preserveWhitespace || !eof || eol < end);
721722
}
722723
} else {
723724
if (eol < end && *eol == '\0') {
@@ -1023,6 +1024,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
10231024
*all_matched = false;
10241025
cmark_node *container = parser->root;
10251026
cmark_node_type cont_type;
1027+
1028+
10261029

10271030
while (S_last_child_is_open(container)) {
10281031
container = container->last_child;
@@ -1337,7 +1340,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
13371340
// then treat this as a "lazy continuation line" and add it to
13381341
// the open paragraph.
13391342
if (parser->current != last_matched_container &&
1340-
container == last_matched_container && !parser->blank &&
1343+
container == last_matched_container && (!parser->blank || (parser->options & CMARK_OPT_PRESERVE_WHITESPACE)) &&
13411344
S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
13421345
add_line(parser->current, input, parser);
13431346
} else { // not a lazy continuation
@@ -1395,15 +1398,21 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
13951398
container->as.heading.setext == false) {
13961399
chop_trailing_hashtags(input);
13971400
}
1398-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1401+
if ((parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0)
1402+
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
13991403
false);
14001404
add_line(container, input, parser);
14011405
} else {
14021406
// create paragraph container for line
1403-
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1404-
parser->first_nonspace + 1);
1405-
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1406-
false);
1407+
if (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) {
1408+
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1409+
parser->offset + 1);
1410+
} else {
1411+
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
1412+
parser->first_nonspace + 1);
1413+
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
1414+
false);
1415+
}
14071416
add_line(container, input, parser);
14081417
}
14091418

@@ -1413,7 +1422,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
14131422

14141423
/* See http://spec.commonmark.org/0.24/#phase-1-block-structure */
14151424
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1416-
bufsize_t bytes) {
1425+
bufsize_t bytes, bool ensureEndsInNewline) {
14171426
cmark_node *last_matched_container;
14181427
bool all_matched = true;
14191428
cmark_node *container;
@@ -1430,7 +1439,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
14301439
bytes = parser->curline.size;
14311440

14321441
// ensure line ends with a newline:
1433-
if (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1]))
1442+
if (ensureEndsInNewline && (bytes == 0 || !S_is_line_end_char(parser->curline.ptr[bytes - 1])))
14341443
cmark_strbuf_putc(&parser->curline, '\n');
14351444

14361445
parser->offset = 0;
@@ -1463,7 +1472,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
14631472

14641473
current = parser->current;
14651474

1466-
open_new_blocks(parser, &container, &input, all_matched);
1475+
// Only open new blocks if we're not limited to inline
1476+
if ((parser->options & CMARK_OPT_INLINE_ONLY) == 0)
1477+
open_new_blocks(parser, &container, &input, all_matched);
14671478

14681479
/* parser->current might have changed if feed_reentrant was called */
14691480
if (current == parser->current)
@@ -1490,7 +1501,7 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
14901501
return NULL;
14911502

14921503
if (parser->linebuf.size) {
1493-
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
1504+
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size, (parser->options & CMARK_OPT_PRESERVE_WHITESPACE) == 0);
14941505
cmark_strbuf_clear(&parser->linebuf);
14951506
}
14961507

src/include/cmark-gfm.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,17 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
769769
*/
770770
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)
771771

772+
/** Parse only inline markdown directives. Block directives will not be
773+
* parsed (their literal representations will remain in the output).
774+
*/
775+
#define CMARK_OPT_INLINE_ONLY (1 << 18)
776+
777+
/** Parse the markdown input without removing preceding/trailing whitespace and
778+
* without converting newline characters to breaks. Using this option also
779+
* enables the CMARK_OPT_INLINE_ONLY option.
780+
*/
781+
#define CMARK_OPT_PRESERVE_WHITESPACE ((1 << 19) | CMARK_OPT_INLINE_ONLY)
782+
772783
/**
773784
* ## Version information
774785
*/

src/inlines.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,12 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
14241424
switch (c) {
14251425
case '\r':
14261426
case '\n':
1427-
new_inl = handle_newline(subj);
1427+
if (options & CMARK_OPT_PRESERVE_WHITESPACE) {
1428+
advance(subj);
1429+
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
1430+
} else {
1431+
new_inl = handle_newline(subj);
1432+
}
14281433
break;
14291434
case '`':
14301435
new_inl = handle_backticks(subj, options);
@@ -1490,7 +1495,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
14901495
subj->pos = endpos;
14911496

14921497
// if we're at a newline, strip trailing spaces.
1493-
if (S_is_line_end_char(peek_char(subj))) {
1498+
if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) {
14941499
cmark_chunk_rtrim(&contents);
14951500
}
14961501

@@ -1511,7 +1516,8 @@ void cmark_parse_inlines(cmark_parser *parser,
15111516
subject subj;
15121517
cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
15131518
subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
1514-
cmark_chunk_rtrim(&subj.input);
1519+
if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0)
1520+
cmark_chunk_rtrim(&subj.input);
15151521

15161522
while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
15171523
;

0 commit comments

Comments
 (0)