Skip to content

Commit 1429b4b

Browse files
committed
support fenced code block
1 parent 854175c commit 1429b4b

File tree

5 files changed

+157
-8
lines changed

5 files changed

+157
-8
lines changed

src/data.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ void mdd_node_type_data_finalize(mdd_node_type_t type, mdd_node_type_data_t *obj
6969
case MDD_NODE_TYPE_LINK_SIZE:
7070
mdd_node_type_size_data_finalize(&obj->size_data);
7171
break;
72+
case MDD_NODE_TYPE_FENCED_CODE_BLOCK:
73+
mdd_node_type_code_block_data_finalize(&obj->code_block_data);
74+
break;
7275
default:
7376
break;
7477
}
@@ -99,6 +102,10 @@ void mdd_node_type_note_data_finalize(mdd_node_type_note_data_t *obj) {
99102
mdd_char_array_finalize(&obj->note);
100103
}
101104

105+
void mdd_node_type_code_block_data_finalize(mdd_node_type_code_block_data_t *obj) {
106+
mdd_char_array_finalize(&obj->lang);
107+
}
108+
102109
void mdd_node_type_label_data_finalize(mdd_node_type_label_data_t *obj) {
103110
mdd_char_array_finalize(&obj->text);
104111
}
@@ -176,6 +183,13 @@ void mdd_node_type_note_data_new(mdd_node_data_t *obj,
176183
}
177184
}
178185

186+
void mdd_node_type_code_block_data_new(mdd_node_data_t *obj, const char *lang) {
187+
assert(!obj->type_data);
188+
obj->type_data = mdd_node_type_data_new();
189+
mdd_node_type_code_block_data_t *code_block_data = &obj->type_data->code_block_data;
190+
mdd_char_array_assign(&obj->type_data->code_block_data.lang, lang);
191+
}
192+
179193
const char *mdd_node_type_to_string(mdd_node_type_t type) {
180194
switch (type) {
181195
case MDD_NODE_TYPE_UNKNOWN: return "Unknown";
@@ -209,6 +223,7 @@ const char *mdd_node_type_to_string(mdd_node_type_t type) {
209223
case MDD_NODE_TYPE_INLINE_EQUATION: return "InlineEquation";
210224
case MDD_NODE_TYPE_MARK: return "Mark";
211225
case MDD_NODE_TYPE_BLOCK_QUOTE: return "BlockQuote";
226+
case MDD_NODE_TYPE_FENCED_CODE_BLOCK: return "FencedCodeBlock";
212227
case MDD_NODE_TYPE_DUMMY: return "Dummy";
213228
case MDD_NODE_TYPE_H1: return "H1";
214229
case MDD_NODE_TYPE_H2: return "H2";
@@ -257,6 +272,12 @@ int mdd_node_type_data_dump(char *buf, size_t len, const mdd_node_data_t *obj) {
257272
mdd_char_array_to_string(&data->note));
258273
break;
259274
}
275+
case MDD_NODE_TYPE_FENCED_CODE_BLOCK: {
276+
const mdd_node_type_code_block_data_t *data = &obj->type_data->code_block_data;
277+
return snprintf(buf, len, "{%s}",
278+
mdd_char_array_to_string(&data->lang));
279+
break;
280+
}
260281
default:
261282
break;
262283
}

src/data.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ typedef enum mdd_node_type_tag {
3737
MDD_NODE_TYPE_INLINE_EQUATION,
3838
MDD_NODE_TYPE_MARK,
3939
MDD_NODE_TYPE_BLOCK_QUOTE,
40+
MDD_NODE_TYPE_FENCED_CODE_BLOCK,
4041
MDD_NODE_TYPE_DUMMY,
4142
MDD_NODE_TYPE_H1,
4243
MDD_NODE_TYPE_H2,
@@ -76,6 +77,13 @@ typedef struct mdd_node_type_note_data_tag {
7677

7778
void mdd_node_type_note_data_finalize(mdd_node_type_note_data_t *obj);
7879

80+
// Code block specific data.
81+
typedef struct mdd_node_type_code_block_data_tag {
82+
char_array_t lang;
83+
} mdd_node_type_code_block_data_t;
84+
85+
void mdd_node_type_code_block_data_finalize(mdd_node_type_code_block_data_t *obj);
86+
7987
// Label specific data.
8088
typedef struct mdd_node_type_label_data_tag {
8189
char_array_t text;
@@ -97,6 +105,7 @@ typedef union mdd_node_type_data_tag {
97105
mdd_node_type_label_data_t label_data;
98106
mdd_node_type_size_data_t size_data;
99107
mdd_node_type_note_data_t note_data;
108+
mdd_node_type_code_block_data_t code_block_data;
100109
} mdd_node_type_data_t;
101110

102111
void mdd_node_type_data_finalize(mdd_node_type_t type, mdd_node_type_data_t *obj);
@@ -148,6 +157,8 @@ void mdd_node_type_size_data_new(mdd_node_data_t *obj, int width, int height);
148157
void mdd_node_type_note_data_new(mdd_node_data_t *obj,
149158
char_array_t *reference, char_array_t *note);
150159

160+
void mdd_node_type_code_block_data_new(mdd_node_data_t *obj, const char *lang);
161+
151162
// Context data during parse.
152163
typedef struct mdd_data_tag {
153164
// Input string to parse.

src/markdowndown.peg

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ int look_back_nonspace(mddi_context_t *ctx);
4848
int inline_equation_pre(mddi_context_t *ctx);
4949
int inline_equation_post(mddi_context_t *ctx);
5050

51+
int look_back_one_char(mddi_context_t *ctx, char *excludes, size_t len, int positive);
52+
5153
}
5254

5355
######
@@ -84,11 +86,36 @@ block <-
8486
{
8587
$$ = mdd_node_create_0(MDD_NODE_TYPE_BLANK_LINE, $0s, $0e);
8688
}
87-
/ (e:block_quote / e:verbatim / e:note / e:reference / e:plain)
89+
/ (e:block_quote / e:verbatim / e:fenced_code_block / e:note / e:reference / e:plain)
90+
{
91+
$$ = e;
92+
}
93+
94+
fenced_code_block <- (e:fenced_code_block_tick / e:fenced_code_block_tidle)
8895
{
8996
$$ = e;
9097
}
9198

99+
fenced_code_block_tick_close <- '```' space (new_line / end_of_file)
100+
101+
fenced_code_block_tick <- '```' space <(!new_line !'`' .)*> new_line
102+
(!fenced_code_block_tick_close (!new_line .)* new_line)*
103+
fenced_code_block_tick_close
104+
{
105+
$$ = mdd_node_create_0(MDD_NODE_TYPE_FENCED_CODE_BLOCK, $0s, $0e);
106+
mdd_node_type_code_block_data_new(&$$->custom, $1);
107+
}
108+
109+
fenced_code_block_tidle_close <- '~~~' space (new_line / end_of_file)
110+
111+
fenced_code_block_tidle <- '~~~' space <(!new_line !'~' .)*> new_line
112+
(!fenced_code_block_tidle_close (!new_line .)* new_line)*
113+
fenced_code_block_tidle_close
114+
{
115+
$$ = mdd_node_create_0(MDD_NODE_TYPE_FENCED_CODE_BLOCK, $0s, $0e);
116+
mdd_node_type_code_block_data_new(&$$->custom, $1);
117+
}
118+
92119
# TODO: for block quote, we need to re-parse it separately after the main parse.
93120
# For now, we will only parse the inline elements within it.
94121
block_quote <-
@@ -235,12 +262,15 @@ inline_equation_single <- '$' &{ @@ = inline_equation_pre(pcc_ctx); } !'$' !'\\'
235262

236263
inline_equation_multiple <- '$' &{ @@ = inline_equation_pre(pcc_ctx); } !'$' nonspace_char (!'$' !new_line .)+ '$' &{ @@ = inline_equation_post(pcc_ctx); } ![0-9]
237264

265+
# Define all ticks rules together
238266
ticks1 <- '`' !'`'
239267
ticks2 <- '``' !'`'
240268
ticks3 <- '```' !'`'
241269
ticks4 <- '````' !'`'
242270
ticks5 <- '`````' !'`'
243271

272+
fenced_code_block_start_tick_line <- '```' (!new_line !'`' .)* new_line
273+
244274
code <- (ticks1 space ((!'`' nonspace_char)+ / !ticks1 '`'+ / !(space ticks1) (space_char / new_line !blank_line))+ space ticks1
245275
/ ticks2 space ((!'`' nonspace_char)+ / !ticks2 '`'+ / !(space ticks2) (space_char / new_line !blank_line))+ space ticks2
246276
/ !fenced_code_block_start_tick_line ticks3 space ((!'`' nonspace_char)+ / !ticks3 '`'+ / !(space ticks3) (space_char / new_line !blank_line))+ space ticks3
@@ -250,10 +280,6 @@ code <- (ticks1 space ((!'`' nonspace_char)+ / !ticks1 '`'+ / !(space ticks1) (s
250280
$$ = mdd_node_create_0(MDD_NODE_TYPE_CODE, $0s, $0e);
251281
}
252282

253-
fenced_code_block_start_tick <- '```' (!new_line !'`' .)*
254-
255-
fenced_code_block_start_tick_line <- fenced_code_block_start_tick new_line
256-
257283
inline_note <- '^['
258284
{
259285
// Need to fix the range at the end.
@@ -698,15 +724,22 @@ void mddi_ast_node_custom_data__finalize(mddi_ast_node_custom_data_t *obj) {
698724
}
699725

700726
int look_back_nonspace(mddi_context_t *ctx) {
727+
char data[] = {' ', '\t', '\n', '\r'};
728+
return look_back_one_char(ctx, data, sizeof(data), 0);
729+
}
730+
731+
int look_back_one_char(mddi_context_t *ctx, char *chars, size_t len, int positive) {
701732
const char *buf = ctx->buffer.p + ctx->cur;
702733
if (ctx->cur == 0) {
703734
return 1;
704735
}
705736
const unsigned char ch = *(buf - 1);
706-
if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r') {
707-
return 1;
737+
for (size_t i = 0; i < len; ++i) {
738+
if (ch == chars[i]) {
739+
return positive;
740+
}
708741
}
709-
return 0;
742+
return !positive;
710743
}
711744

712745
int inline_equation_pre(mddi_context_t *ctx) {

tests/parser/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ add_executable(test_block_quote
7575
test_utils.h test_utils.c)
7676
target_link_libraries(test_block_quote PRIVATE markdowndown)
7777

78+
add_executable(test_fenced_code_block
79+
test_fenced_code_block.c
80+
test_utils.h test_utils.c)
81+
target_link_libraries(test_fenced_code_block PRIVATE markdowndown)
82+
7883
add_test(NAME test_document
7984
COMMAND $<TARGET_FILE:test_document>)
8085
add_test(NAME test_front_matter
@@ -105,3 +110,5 @@ add_test(NAME test_mark
105110
COMMAND $<TARGET_FILE:test_mark>)
106111
add_test(NAME test_block_quote
107112
COMMAND $<TARGET_FILE:test_block_quote>)
113+
add_test(NAME test_fenced_code_block
114+
COMMAND $<TARGET_FILE:test_fenced_code_block>)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include "test_utils.h"
2+
3+
int main() {
4+
// Basic fenced code block with backticks
5+
TEST_AST("```\n"
6+
"code here\n"
7+
"```\n",
8+
"var: Document[0,18)\n"
9+
" var: BlockList[0,18)\n"
10+
" nul: FencedCodeBlock[0,18){}\n");
11+
12+
// End of file.
13+
TEST_AST("```\n"
14+
"code here\n"
15+
"```",
16+
"var: Document[0,17)\n"
17+
" var: BlockList[0,17)\n"
18+
" nul: FencedCodeBlock[0,17){}\n");
19+
20+
// Fenced code block with language identifier
21+
TEST_AST("```python\n"
22+
"def hello():\n"
23+
" print('Hello')\n"
24+
"```\n",
25+
"var: Document[0,46)\n"
26+
" var: BlockList[0,46)\n"
27+
" nul: FencedCodeBlock[0,46){python}\n");
28+
29+
// Fenced code block with tildes
30+
TEST_AST("~~~\n"
31+
"code here\n"
32+
"~~~\n",
33+
"var: Document[0,18)\n"
34+
" var: BlockList[0,18)\n"
35+
" nul: FencedCodeBlock[0,18){}\n");
36+
37+
// Fenced code block with tildes and language
38+
TEST_AST("~~~javascript\n"
39+
"function hello() {\n"
40+
" console.log('Hello');\n"
41+
"}\n"
42+
"~~~\n",
43+
"var: Document[0,65)\n"
44+
" var: BlockList[0,65)\n"
45+
" nul: FencedCodeBlock[0,65){javascript}\n");
46+
47+
// Multiple fenced code blocks
48+
TEST_AST("```python\n"
49+
"print('Hello')\n"
50+
"```\n\n"
51+
"```javascript\n"
52+
"console.log('World')\n"
53+
"```\n",
54+
"var: Document[0,69)\n"
55+
" var: BlockList[0,69)\n"
56+
" nul: FencedCodeBlock[0,29){python}\n"
57+
" nul: BlankLine[29,30)\n"
58+
" nul: FencedCodeBlock[30,69){javascript}\n");
59+
60+
// Fenced code block with empty lines
61+
TEST_AST("```\n"
62+
"line 1\n"
63+
"\n"
64+
"line 2\n"
65+
"```\n",
66+
"var: Document[0,23)\n"
67+
" var: BlockList[0,23)\n"
68+
" nul: FencedCodeBlock[0,23){}\n");
69+
70+
TEST_AST("```\n"
71+
"```\n",
72+
"var: Document[0,8)\n"
73+
" var: BlockList[0,8)\n"
74+
" nul: FencedCodeBlock[0,8){}\n");
75+
76+
return 0;
77+
}

0 commit comments

Comments
 (0)