@@ -269,14 +269,20 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
269269 // inline was finished in inlines.c.
270270}
271271
272- static bool validate_protocol (char protocol [], uint8_t * data , int rewind ) {
272+ static bool validate_protocol (char protocol [], uint8_t * data , int rewind , int max_rewind ) {
273273 size_t len = strlen (protocol );
274274
275+ if (len > (size_t )(max_rewind - rewind )) {
276+ return false;
277+ }
278+
275279 // Check that the protocol matches
276- for (int i = 1 ; i <= len ; i ++ ) {
277- if (data [- rewind - i ] != protocol [len - i ]) {
278- return false;
279- }
280+ if (memcmp (data - rewind - len , protocol , len ) != 0 ) {
281+ return false;
282+ }
283+
284+ if (len == (size_t )(max_rewind - rewind )) {
285+ return true;
280286 }
281287
282288 char prev_char = data [- rewind - len - 1 ];
@@ -285,126 +291,145 @@ static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
285291 return !cmark_isalnum (prev_char );
286292}
287293
288- static void postprocess_text (cmark_parser * parser , cmark_node * text , int offset , int depth ) {
289- // postprocess_text can recurse very deeply if there is a very long line of
290- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
291- if (depth > 1000 ) return ;
294+ static void postprocess_text (cmark_parser * parser , cmark_node * text ) {
295+ size_t start = 0 ;
296+ size_t offset = 0 ;
297+ // `text` is going to be split into a list of nodes containing shorter segments
298+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
299+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
300+ // the references into allocated buffers. The detached buffer is freed before we
301+ // return.
302+ cmark_chunk detached_chunk = text -> as .literal ;
303+ text -> as .literal = cmark_chunk_dup (& detached_chunk , 0 , detached_chunk .len );
304+
305+ uint8_t * data = text -> as .literal .data ;
306+ size_t remaining = text -> as .literal .len ;
307+
308+ while (true) {
309+ size_t link_end ;
310+ uint8_t * at ;
311+ bool auto_mailto = true;
312+ bool is_xmpp = false;
313+ size_t rewind ;
314+ size_t max_rewind ;
315+ size_t np = 0 ;
316+
317+ if (offset >= remaining )
318+ break ;
292319
293- size_t link_end ;
294- uint8_t * data = text -> as .literal .data ,
295- * at ;
296- size_t size = text -> as .literal .len ;
297- bool auto_mailto = true;
298- bool is_xmpp = false;
299- int rewind , max_rewind ,
300- nb = 0 , np = 0 , ns = 0 ;
320+ at = (uint8_t * )memchr (data + start + offset , '@' , remaining - offset );
321+ if (!at )
322+ break ;
301323
302- if (offset < 0 || (size_t )offset >= size )
303- return ;
324+ max_rewind = at - (data + start + offset );
304325
305- data += offset ;
306- size -= offset ;
326+ found_at :
327+ for (rewind = 0 ; rewind < max_rewind ; ++ rewind ) {
328+ uint8_t c = data [start + offset + max_rewind - rewind - 1 ];
307329
308- at = (uint8_t * )memchr (data , '@' , size );
309- if (!at )
310- return ;
330+ if (cmark_isalnum (c ))
331+ continue ;
311332
312- max_rewind = (int )(at - data );
313- data += max_rewind ;
314- size -= max_rewind ;
333+ if (strchr (".+-_" , c ) != NULL )
334+ continue ;
315335
316- for (rewind = 0 ; rewind < max_rewind ; ++ rewind ) {
317- uint8_t c = data [- rewind - 1 ];
336+ if (strchr (":" , c ) != NULL ) {
337+ if (validate_protocol ("mailto:" , data + start + offset + max_rewind , rewind , max_rewind )) {
338+ auto_mailto = false;
339+ continue ;
340+ }
341+
342+ if (validate_protocol ("xmpp:" , data + start + offset + max_rewind , rewind , max_rewind )) {
343+ auto_mailto = false;
344+ is_xmpp = true;
345+ continue ;
346+ }
347+ }
318348
319- if ( cmark_isalnum ( c ))
320- continue ;
349+ break ;
350+ }
321351
322- if (strchr (".+-_" , c ) != NULL )
352+ if (rewind == 0 ) {
353+ offset += max_rewind + 1 ;
323354 continue ;
355+ }
356+
357+ assert (data [start + offset + max_rewind ] == '@' );
358+ for (link_end = 1 ; link_end < remaining - offset - max_rewind ; ++ link_end ) {
359+ uint8_t c = data [start + offset + max_rewind + link_end ];
324360
325- if (strchr (":" , c ) != NULL ) {
326- if (validate_protocol ("mailto:" , data , rewind )) {
327- auto_mailto = false;
361+ if (cmark_isalnum (c ))
328362 continue ;
329- }
330363
331- if (validate_protocol ("xmpp:" , data , rewind )) {
332- auto_mailto = false;
333- is_xmpp = true;
364+ if (c == '@' ) {
365+ // Found another '@', so go back and try again with an updated offset and max_rewind.
366+ offset += max_rewind + 1 ;
367+ max_rewind = link_end - 1 ;
368+ goto found_at ;
369+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
370+ cmark_isalnum (data [start + offset + max_rewind + link_end + 1 ]))
371+ np ++ ;
372+ else if (c == '/' && is_xmpp )
334373 continue ;
335- }
374+ else if (c != '-' && c != '_' )
375+ break ;
336376 }
337377
338- break ;
339- }
340-
341- if (rewind == 0 || ns > 0 ) {
342- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
343- return ;
344- }
345-
346- for (link_end = 0 ; link_end < size ; ++ link_end ) {
347- uint8_t c = data [link_end ];
348-
349- if (cmark_isalnum (c ))
350- continue ;
351-
352- if (c == '@' )
353- nb ++ ;
354- else if (c == '.' && link_end < size - 1 && cmark_isalnum (data [link_end + 1 ]))
355- np ++ ;
356- else if (c == '/' && is_xmpp )
378+ if (link_end < 2 || np == 0 ||
379+ (!cmark_isalpha (data [start + offset + max_rewind + link_end - 1 ]) &&
380+ data [start + offset + max_rewind + link_end - 1 ] != '.' )) {
381+ offset += max_rewind + link_end ;
357382 continue ;
358- else if (c != '-' && c != '_' )
359- break ;
360- }
383+ }
361384
362- if (link_end < 2 || nb != 1 || np == 0 ||
363- (!cmark_isalpha (data [link_end - 1 ]) && data [link_end - 1 ] != '.' )) {
364- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
365- return ;
366- }
385+ link_end = autolink_delim (data + start + offset + max_rewind , link_end );
367386
368- link_end = autolink_delim (data , link_end );
387+ if (link_end == 0 ) {
388+ offset += max_rewind + 1 ;
389+ continue ;
390+ }
369391
370- if (link_end == 0 ) {
371- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
372- return ;
373- }
392+ cmark_node * link_node = cmark_node_new_with_mem (CMARK_NODE_LINK , parser -> mem );
393+ cmark_strbuf buf ;
394+ cmark_strbuf_init (parser -> mem , & buf , 10 );
395+ if (auto_mailto )
396+ cmark_strbuf_puts (& buf , "mailto:" );
397+ cmark_strbuf_put (& buf , data + start + offset + max_rewind - rewind , (bufsize_t )(link_end + rewind ));
398+ link_node -> as .link .url = cmark_chunk_buf_detach (& buf );
399+
400+ cmark_node * link_text = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
401+ cmark_chunk email = cmark_chunk_dup (
402+ & detached_chunk ,
403+ start + offset + max_rewind - rewind ,
404+ (bufsize_t )(link_end + rewind ));
405+ cmark_chunk_to_cstr (parser -> mem , & email );
406+ link_text -> as .literal = email ;
407+ cmark_node_append_child (link_node , link_text );
374408
375- cmark_chunk_to_cstr ( parser -> mem , & text -> as . literal );
409+ cmark_node_insert_after ( text , link_node );
376410
377- cmark_node * link_node = cmark_node_new_with_mem (CMARK_NODE_LINK , parser -> mem );
378- cmark_strbuf buf ;
379- cmark_strbuf_init (parser -> mem , & buf , 10 );
380- if (auto_mailto )
381- cmark_strbuf_puts (& buf , "mailto:" );
382- cmark_strbuf_put (& buf , data - rewind , (bufsize_t )(link_end + rewind ));
383- link_node -> as .link .url = cmark_chunk_buf_detach (& buf );
384-
385- cmark_node * link_text = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
386- cmark_chunk email = cmark_chunk_dup (
387- & text -> as .literal ,
388- offset + max_rewind - rewind ,
389- (bufsize_t )(link_end + rewind ));
390- cmark_chunk_to_cstr (parser -> mem , & email );
391- link_text -> as .literal = email ;
392- cmark_node_append_child (link_node , link_text );
411+ cmark_node * post = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
412+ post -> as .literal = cmark_chunk_dup (& detached_chunk ,
413+ (bufsize_t )(start + offset + max_rewind + link_end ),
414+ (bufsize_t )(remaining - offset - max_rewind - link_end ));
393415
394- cmark_node_insert_after (text , link_node );
416+ cmark_node_insert_after (link_node , post );
395417
396- cmark_node * post = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
397- post -> as .literal = cmark_chunk_dup (& text -> as .literal ,
398- (bufsize_t )(offset + max_rewind + link_end ),
399- (bufsize_t )(size - link_end ));
400- cmark_chunk_to_cstr (parser -> mem , & post -> as .literal );
418+ text -> as .literal = cmark_chunk_dup (& detached_chunk , start , offset + max_rewind - rewind );
419+ cmark_chunk_to_cstr (parser -> mem , & text -> as .literal );
401420
402- cmark_node_insert_after (link_node , post );
421+ text = post ;
422+ start += offset + max_rewind + link_end ;
423+ remaining -= offset + max_rewind + link_end ;
424+ offset = 0 ;
425+ }
403426
404- text -> as .literal .len = offset + max_rewind - rewind ;
405- text -> as .literal .data [text -> as .literal .len ] = 0 ;
427+ // Convert the reference to allocated memory.
428+ assert (!text -> as .literal .alloc );
429+ cmark_chunk_to_cstr (parser -> mem , & text -> as .literal );
406430
407- postprocess_text (parser , post , 0 , depth + 1 );
431+ // Free the detached buffer.
432+ cmark_chunk_free (parser -> mem , & detached_chunk );
408433}
409434
410435static cmark_node * postprocess (cmark_syntax_extension * ext , cmark_parser * parser , cmark_node * root ) {
@@ -431,7 +456,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
431456 }
432457
433458 if (ev == CMARK_EVENT_ENTER && node -> type == CMARK_NODE_TEXT ) {
434- postprocess_text (parser , node , 0 , /*depth*/ 0 );
459+ postprocess_text (parser , node );
435460 }
436461 }
437462
0 commit comments