Skip to content

Commit c568c47

Browse files
lowmelvinclaude
andcommitted
Add API to expose delimiter info for inline nodes
Add functions to retrieve delimiter character and length for EMPH, STRONG, and CODE nodes. This enables use cases like rendering markdown with delimiters included inside styled elements. New API: - cmark_parse_document_for_delimiters(): Parse with source position tracking - cmark_node_get_delimiter_info(): Get delimiter char and length - cmark_node_get_delim_char(): Convenience for just the character - cmark_node_get_delim_length(): Get length without source text Key design decisions: - Computes delimiter info on-demand from source positions (no parsing changes) - Requires CMARK_OPT_SOURCEPOS for accurate multi-line position tracking - All new code isolated in cmark_delim.{h,c} to minimize merge conflicts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 5efc2e1 commit c568c47

File tree

7 files changed

+608
-0
lines changed

7 files changed

+608
-0
lines changed

api_test/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
add_executable(api_test
22
cplusplus.cpp
3+
delim_test.c
4+
delim_test.h
35
harness.c
46
harness.h
57
main.c

api_test/delim_test.c

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
5+
#include "cmark.h"
6+
#include "cmark_delim.h"
7+
#include "harness.h"
8+
#include "delim_test.h"
9+
10+
static void test_emph_asterisk(test_batch_runner *runner) {
11+
const char *md = "*italic*";
12+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
13+
cmark_node *para = cmark_node_first_child(doc);
14+
cmark_node *emph = cmark_node_first_child(para);
15+
16+
int delim_char = 0, delim_len = 0;
17+
int ok = cmark_node_get_delimiter_info(emph, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
18+
19+
OK(runner, ok == 1, "emph asterisk: get_delimiter_info returns 1");
20+
INT_EQ(runner, delim_char, '*', "emph asterisk: delim_char is *");
21+
INT_EQ(runner, delim_len, 1, "emph asterisk: delim_len is 1");
22+
23+
cmark_node_free(doc);
24+
}
25+
26+
static void test_emph_underscore(test_batch_runner *runner) {
27+
const char *md = "_italic_";
28+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
29+
cmark_node *para = cmark_node_first_child(doc);
30+
cmark_node *emph = cmark_node_first_child(para);
31+
32+
int delim_char = 0, delim_len = 0;
33+
int ok = cmark_node_get_delimiter_info(emph, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
34+
35+
OK(runner, ok == 1, "emph underscore: get_delimiter_info returns 1");
36+
INT_EQ(runner, delim_char, '_', "emph underscore: delim_char is _");
37+
INT_EQ(runner, delim_len, 1, "emph underscore: delim_len is 1");
38+
39+
cmark_node_free(doc);
40+
}
41+
42+
static void test_strong_asterisk(test_batch_runner *runner) {
43+
const char *md = "**bold**";
44+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
45+
cmark_node *para = cmark_node_first_child(doc);
46+
cmark_node *strong = cmark_node_first_child(para);
47+
48+
int delim_char = 0, delim_len = 0;
49+
int ok = cmark_node_get_delimiter_info(strong, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
50+
51+
OK(runner, ok == 1, "strong asterisk: get_delimiter_info returns 1");
52+
INT_EQ(runner, delim_char, '*', "strong asterisk: delim_char is *");
53+
INT_EQ(runner, delim_len, 2, "strong asterisk: delim_len is 2");
54+
55+
cmark_node_free(doc);
56+
}
57+
58+
static void test_strong_underscore(test_batch_runner *runner) {
59+
const char *md = "__bold__";
60+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
61+
cmark_node *para = cmark_node_first_child(doc);
62+
cmark_node *strong = cmark_node_first_child(para);
63+
64+
int delim_char = 0, delim_len = 0;
65+
int ok = cmark_node_get_delimiter_info(strong, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
66+
67+
OK(runner, ok == 1, "strong underscore: get_delimiter_info returns 1");
68+
INT_EQ(runner, delim_char, '_', "strong underscore: delim_char is _");
69+
INT_EQ(runner, delim_len, 2, "strong underscore: delim_len is 2");
70+
71+
cmark_node_free(doc);
72+
}
73+
74+
static void test_code_single_backtick(test_batch_runner *runner) {
75+
const char *md = "`code`";
76+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
77+
cmark_node *para = cmark_node_first_child(doc);
78+
cmark_node *code = cmark_node_first_child(para);
79+
80+
int delim_char = 0, delim_len = 0;
81+
int ok = cmark_node_get_delimiter_info(code, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
82+
83+
OK(runner, ok == 1, "code single: get_delimiter_info returns 1");
84+
INT_EQ(runner, delim_char, '`', "code single: delim_char is `");
85+
INT_EQ(runner, delim_len, 1, "code single: delim_len is 1");
86+
87+
cmark_node_free(doc);
88+
}
89+
90+
static void test_code_double_backtick(test_batch_runner *runner) {
91+
const char *md = "``code with ` inside``";
92+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
93+
cmark_node *para = cmark_node_first_child(doc);
94+
cmark_node *code = cmark_node_first_child(para);
95+
96+
int delim_char = 0, delim_len = 0;
97+
int ok = cmark_node_get_delimiter_info(code, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
98+
99+
OK(runner, ok == 1, "code double: get_delimiter_info returns 1");
100+
INT_EQ(runner, delim_char, '`', "code double: delim_char is `");
101+
INT_EQ(runner, delim_len, 2, "code double: delim_len is 2");
102+
103+
cmark_node_free(doc);
104+
}
105+
106+
static void test_delim_length_convenience(test_batch_runner *runner) {
107+
// Test cmark_node_get_delim_length convenience function
108+
cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
109+
cmark_node *strong = cmark_node_new(CMARK_NODE_STRONG);
110+
cmark_node *code = cmark_node_new(CMARK_NODE_CODE);
111+
cmark_node *text = cmark_node_new(CMARK_NODE_TEXT);
112+
113+
INT_EQ(runner, cmark_node_get_delim_length(emph), 1, "delim_length emph is 1");
114+
INT_EQ(runner, cmark_node_get_delim_length(strong), 2, "delim_length strong is 2");
115+
INT_EQ(runner, cmark_node_get_delim_length(code), 0, "delim_length code is 0 (needs source)");
116+
INT_EQ(runner, cmark_node_get_delim_length(text), 0, "delim_length text is 0");
117+
INT_EQ(runner, cmark_node_get_delim_length(NULL), 0, "delim_length NULL is 0");
118+
119+
cmark_node_free(emph);
120+
cmark_node_free(strong);
121+
cmark_node_free(code);
122+
cmark_node_free(text);
123+
}
124+
125+
static void test_delim_char_convenience(test_batch_runner *runner) {
126+
// Test cmark_node_get_delim_char convenience function
127+
const char *md = "*italic* and `code`";
128+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
129+
cmark_node *para = cmark_node_first_child(doc);
130+
cmark_node *emph = cmark_node_first_child(para);
131+
cmark_node *code = cmark_node_last_child(para);
132+
133+
INT_EQ(runner, cmark_node_get_delim_char(emph, md, strlen(md), CMARK_OPT_SOURCEPOS), '*',
134+
"delim_char convenience for emph");
135+
INT_EQ(runner, cmark_node_get_delim_char(code, md, strlen(md), CMARK_OPT_SOURCEPOS), '`',
136+
"delim_char convenience for code");
137+
138+
cmark_node_free(doc);
139+
}
140+
141+
static void test_null_handling(test_batch_runner *runner) {
142+
// Test NULL node handling
143+
int delim_char = 0, delim_len = 0;
144+
int ok = cmark_node_get_delimiter_info(NULL, "test", 4, CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
145+
OK(runner, ok == 0, "get_delimiter_info returns 0 for NULL node");
146+
147+
// Test NULL source handling
148+
cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
149+
ok = cmark_node_get_delimiter_info(emph, NULL, 0, CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
150+
OK(runner, ok == 0, "get_delimiter_info returns 0 for NULL source");
151+
cmark_node_free(emph);
152+
153+
// Test non-delimiter node type
154+
cmark_node *text = cmark_node_new(CMARK_NODE_TEXT);
155+
ok = cmark_node_get_delimiter_info(text, "test", 4, CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
156+
OK(runner, ok == 0, "get_delimiter_info returns 0 for TEXT node");
157+
cmark_node_free(text);
158+
159+
// Test missing CMARK_OPT_SOURCEPOS
160+
const char *md = "*italic*";
161+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
162+
cmark_node *para = cmark_node_first_child(doc);
163+
cmark_node *emph_node = cmark_node_first_child(para);
164+
ok = cmark_node_get_delimiter_info(emph_node, md, strlen(md), CMARK_OPT_DEFAULT, &delim_char, &delim_len);
165+
OK(runner, ok == 0, "get_delimiter_info returns 0 without CMARK_OPT_SOURCEPOS");
166+
cmark_node_free(doc);
167+
}
168+
169+
static void test_nested_emphasis(test_batch_runner *runner) {
170+
// Test nested emphasis: ***bold italic***
171+
const char *md = "***bold italic***";
172+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
173+
cmark_node *para = cmark_node_first_child(doc);
174+
// Structure is: para -> emph -> strong -> text
175+
cmark_node *emph = cmark_node_first_child(para);
176+
cmark_node *strong = cmark_node_first_child(emph);
177+
178+
int delim_char = 0, delim_len = 0;
179+
180+
// Check outer emph
181+
OK(runner, cmark_node_get_type(emph) == CMARK_NODE_EMPH, "nested: outer is emph");
182+
cmark_node_get_delimiter_info(emph, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
183+
INT_EQ(runner, delim_char, '*', "nested: emph delim_char is *");
184+
INT_EQ(runner, delim_len, 1, "nested: emph delim_len is 1");
185+
186+
// Check inner strong
187+
OK(runner, cmark_node_get_type(strong) == CMARK_NODE_STRONG, "nested: inner is strong");
188+
cmark_node_get_delimiter_info(strong, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
189+
INT_EQ(runner, delim_char, '*', "nested: strong delim_char is *");
190+
INT_EQ(runner, delim_len, 2, "nested: strong delim_len is 2");
191+
192+
cmark_node_free(doc);
193+
}
194+
195+
static void test_multiline(test_batch_runner *runner) {
196+
// Test emphasis spanning multiple lines
197+
const char *md = "line 1\n*italic\ntext*\nline 3";
198+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
199+
cmark_node *para = cmark_node_first_child(doc);
200+
201+
// Find the emph node
202+
cmark_node *node = cmark_node_first_child(para);
203+
while (node && cmark_node_get_type(node) != CMARK_NODE_EMPH) {
204+
node = cmark_node_next(node);
205+
}
206+
207+
OK(runner, node != NULL, "multiline: found emph node");
208+
if (node) {
209+
int delim_char = 0, delim_len = 0;
210+
int ok = cmark_node_get_delimiter_info(node, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
211+
OK(runner, ok == 1, "multiline: get_delimiter_info returns 1");
212+
INT_EQ(runner, delim_char, '*', "multiline: delim_char is *");
213+
INT_EQ(runner, delim_len, 1, "multiline: delim_len is 1");
214+
}
215+
216+
cmark_node_free(doc);
217+
}
218+
219+
static void test_multiline_code_then_emph(test_batch_runner *runner) {
220+
// Test the edge case: multi-line code span followed by emphasis
221+
// This requires CMARK_OPT_SOURCEPOS to track positions correctly
222+
const char *md = "`multi\nline` *emph*";
223+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
224+
cmark_node *para = cmark_node_first_child(doc);
225+
226+
// Find the code node (first child)
227+
cmark_node *code = cmark_node_first_child(para);
228+
OK(runner, cmark_node_get_type(code) == CMARK_NODE_CODE, "multiline_code_emph: first is code");
229+
230+
int delim_char = 0, delim_len = 0;
231+
int ok = cmark_node_get_delimiter_info(code, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
232+
OK(runner, ok == 1, "multiline_code_emph: code get_delimiter_info returns 1");
233+
INT_EQ(runner, delim_char, '`', "multiline_code_emph: code delim_char is `");
234+
INT_EQ(runner, delim_len, 1, "multiline_code_emph: code delim_len is 1");
235+
236+
// Find the emph node (after text node with space)
237+
cmark_node *node = cmark_node_next(code);
238+
while (node && cmark_node_get_type(node) != CMARK_NODE_EMPH) {
239+
node = cmark_node_next(node);
240+
}
241+
242+
OK(runner, node != NULL, "multiline_code_emph: found emph node");
243+
if (node) {
244+
ok = cmark_node_get_delimiter_info(node, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
245+
OK(runner, ok == 1, "multiline_code_emph: emph get_delimiter_info returns 1");
246+
INT_EQ(runner, delim_char, '*', "multiline_code_emph: emph delim_char is *");
247+
INT_EQ(runner, delim_len, 1, "multiline_code_emph: emph delim_len is 1");
248+
}
249+
250+
cmark_node_free(doc);
251+
}
252+
253+
static void test_cr_line_endings(test_batch_runner *runner) {
254+
// Test CR-only line endings (old Mac style)
255+
const char *md = "line1\r*emph*";
256+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
257+
cmark_node *para = cmark_node_first_child(doc);
258+
259+
// Find emph node
260+
cmark_node *node = cmark_node_first_child(para);
261+
while (node && cmark_node_get_type(node) != CMARK_NODE_EMPH) {
262+
node = cmark_node_next(node);
263+
}
264+
265+
OK(runner, node != NULL, "cr_endings: found emph node");
266+
if (node) {
267+
int delim_char = 0, delim_len = 0;
268+
int ok = cmark_node_get_delimiter_info(node, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
269+
OK(runner, ok == 1, "cr_endings: get_delimiter_info returns 1");
270+
INT_EQ(runner, delim_char, '*', "cr_endings: delim_char is *");
271+
INT_EQ(runner, delim_len, 1, "cr_endings: delim_len is 1");
272+
}
273+
274+
cmark_node_free(doc);
275+
}
276+
277+
static void test_crlf_line_endings(test_batch_runner *runner) {
278+
// Test CRLF line endings (Windows style)
279+
const char *md = "line1\r\n*emph*";
280+
cmark_node *doc = cmark_parse_document_for_delimiters(md, strlen(md), CMARK_OPT_DEFAULT);
281+
cmark_node *para = cmark_node_first_child(doc);
282+
283+
// Find emph node
284+
cmark_node *node = cmark_node_first_child(para);
285+
while (node && cmark_node_get_type(node) != CMARK_NODE_EMPH) {
286+
node = cmark_node_next(node);
287+
}
288+
289+
OK(runner, node != NULL, "crlf_endings: found emph node");
290+
if (node) {
291+
int delim_char = 0, delim_len = 0;
292+
int ok = cmark_node_get_delimiter_info(node, md, strlen(md), CMARK_OPT_SOURCEPOS, &delim_char, &delim_len);
293+
OK(runner, ok == 1, "crlf_endings: get_delimiter_info returns 1");
294+
INT_EQ(runner, delim_char, '*', "crlf_endings: delim_char is *");
295+
INT_EQ(runner, delim_len, 1, "crlf_endings: delim_len is 1");
296+
}
297+
298+
cmark_node_free(doc);
299+
}
300+
301+
void test_delimiters(test_batch_runner *runner) {
302+
test_emph_asterisk(runner);
303+
test_emph_underscore(runner);
304+
test_strong_asterisk(runner);
305+
test_strong_underscore(runner);
306+
test_code_single_backtick(runner);
307+
test_code_double_backtick(runner);
308+
test_delim_length_convenience(runner);
309+
test_delim_char_convenience(runner);
310+
test_null_handling(runner);
311+
test_nested_emphasis(runner);
312+
test_multiline(runner);
313+
test_multiline_code_then_emph(runner);
314+
test_cr_line_endings(runner);
315+
test_crlf_line_endings(runner);
316+
}

api_test/delim_test.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#ifndef CMARK_DELIM_TEST_H
2+
#define CMARK_DELIM_TEST_H
3+
4+
#include "harness.h"
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
void test_delimiters(test_batch_runner *runner);
11+
12+
#ifdef __cplusplus
13+
}
14+
#endif
15+
16+
#endif /* CMARK_DELIM_TEST_H */

api_test/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "harness.h"
1010
#include "cplusplus.h"
11+
#include "delim_test.h"
1112

1213
#define UTF8_REPL "\xEF\xBF\xBD"
1314

@@ -1185,6 +1186,7 @@ int main(void) {
11851186
source_pos(runner);
11861187
source_pos_inlines(runner);
11871188
ref_source_pos(runner);
1189+
test_delimiters(runner);
11881190

11891191
test_print_summary(runner);
11901192
retval = test_ok(runner) ? 0 : 1;

src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_library(cmark
1010
buffer.c
1111
cmark.c
1212
cmark_ctype.c
13+
cmark_delim.c
1314
commonmark.c
1415
houdini_href_e.c
1516
houdini_html_e.c
@@ -72,6 +73,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc
7273

7374
install(FILES
7475
cmark.h
76+
cmark_delim.h
7577
${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h
7678
${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h
7779
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}

0 commit comments

Comments
 (0)