@@ -269,14 +269,20 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
269
269
// inline was finished in inlines.c.
270
270
}
271
271
272
- static bool validate_protocol (char protocol [], uint8_t * data , int rewind ) {
272
+ static bool validate_protocol (char protocol [], uint8_t * data , int rewind , int max_rewind ) {
273
273
size_t len = strlen (protocol );
274
274
275
+ if (len > (size_t )(max_rewind - rewind )) {
276
+ return false;
277
+ }
278
+
275
279
// Check that the protocol matches
276
- for (int i = 1 ; i <= len ; i ++ ) {
277
- if (data [- rewind - i ] != protocol [len - i ]) {
278
- return false;
279
- }
280
+ if (memcmp (data - rewind - len , protocol , len ) != 0 ) {
281
+ return false;
282
+ }
283
+
284
+ if (len == (size_t )(max_rewind - rewind )) {
285
+ return true;
280
286
}
281
287
282
288
char prev_char = data [- rewind - len - 1 ];
@@ -285,126 +291,145 @@ static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
285
291
return !cmark_isalnum (prev_char );
286
292
}
287
293
288
- static void postprocess_text (cmark_parser * parser , cmark_node * text , int offset , int depth ) {
289
- // postprocess_text can recurse very deeply if there is a very long line of
290
- // '@' only. Stop at a reasonable depth to ensure it cannot crash.
291
- if (depth > 1000 ) return ;
294
+ static void postprocess_text (cmark_parser * parser , cmark_node * text ) {
295
+ size_t start = 0 ;
296
+ size_t offset = 0 ;
297
+ // `text` is going to be split into a list of nodes containing shorter segments
298
+ // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
299
+ // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
300
+ // the references into allocated buffers. The detached buffer is freed before we
301
+ // return.
302
+ cmark_chunk detached_chunk = text -> as .literal ;
303
+ text -> as .literal = cmark_chunk_dup (& detached_chunk , 0 , detached_chunk .len );
304
+
305
+ uint8_t * data = text -> as .literal .data ;
306
+ size_t remaining = text -> as .literal .len ;
307
+
308
+ while (true) {
309
+ size_t link_end ;
310
+ uint8_t * at ;
311
+ bool auto_mailto = true;
312
+ bool is_xmpp = false;
313
+ size_t rewind ;
314
+ size_t max_rewind ;
315
+ size_t np = 0 ;
316
+
317
+ if (offset >= remaining )
318
+ break ;
292
319
293
- size_t link_end ;
294
- uint8_t * data = text -> as .literal .data ,
295
- * at ;
296
- size_t size = text -> as .literal .len ;
297
- bool auto_mailto = true;
298
- bool is_xmpp = false;
299
- int rewind , max_rewind ,
300
- nb = 0 , np = 0 , ns = 0 ;
320
+ at = (uint8_t * )memchr (data + start + offset , '@' , remaining - offset );
321
+ if (!at )
322
+ break ;
301
323
302
- if (offset < 0 || (size_t )offset >= size )
303
- return ;
324
+ max_rewind = at - (data + start + offset );
304
325
305
- data += offset ;
306
- size -= offset ;
326
+ found_at :
327
+ for (rewind = 0 ; rewind < max_rewind ; ++ rewind ) {
328
+ uint8_t c = data [start + offset + max_rewind - rewind - 1 ];
307
329
308
- at = (uint8_t * )memchr (data , '@' , size );
309
- if (!at )
310
- return ;
330
+ if (cmark_isalnum (c ))
331
+ continue ;
311
332
312
- max_rewind = (int )(at - data );
313
- data += max_rewind ;
314
- size -= max_rewind ;
333
+ if (strchr (".+-_" , c ) != NULL )
334
+ continue ;
315
335
316
- for (rewind = 0 ; rewind < max_rewind ; ++ rewind ) {
317
- uint8_t c = data [- rewind - 1 ];
336
+ if (strchr (":" , c ) != NULL ) {
337
+ if (validate_protocol ("mailto:" , data + start + offset + max_rewind , rewind , max_rewind )) {
338
+ auto_mailto = false;
339
+ continue ;
340
+ }
341
+
342
+ if (validate_protocol ("xmpp:" , data + start + offset + max_rewind , rewind , max_rewind )) {
343
+ auto_mailto = false;
344
+ is_xmpp = true;
345
+ continue ;
346
+ }
347
+ }
318
348
319
- if ( cmark_isalnum ( c ))
320
- continue ;
349
+ break ;
350
+ }
321
351
322
- if (strchr (".+-_" , c ) != NULL )
352
+ if (rewind == 0 ) {
353
+ offset += max_rewind + 1 ;
323
354
continue ;
355
+ }
356
+
357
+ assert (data [start + offset + max_rewind ] == '@' );
358
+ for (link_end = 1 ; link_end < remaining - offset - max_rewind ; ++ link_end ) {
359
+ uint8_t c = data [start + offset + max_rewind + link_end ];
324
360
325
- if (strchr (":" , c ) != NULL ) {
326
- if (validate_protocol ("mailto:" , data , rewind )) {
327
- auto_mailto = false;
361
+ if (cmark_isalnum (c ))
328
362
continue ;
329
- }
330
363
331
- if (validate_protocol ("xmpp:" , data , rewind )) {
332
- auto_mailto = false;
333
- is_xmpp = true;
364
+ if (c == '@' ) {
365
+ // Found another '@', so go back and try again with an updated offset and max_rewind.
366
+ offset += max_rewind + 1 ;
367
+ max_rewind = link_end - 1 ;
368
+ goto found_at ;
369
+ } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
370
+ cmark_isalnum (data [start + offset + max_rewind + link_end + 1 ]))
371
+ np ++ ;
372
+ else if (c == '/' && is_xmpp )
334
373
continue ;
335
- }
374
+ else if (c != '-' && c != '_' )
375
+ break ;
336
376
}
337
377
338
- break ;
339
- }
340
-
341
- if (rewind == 0 || ns > 0 ) {
342
- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
343
- return ;
344
- }
345
-
346
- for (link_end = 0 ; link_end < size ; ++ link_end ) {
347
- uint8_t c = data [link_end ];
348
-
349
- if (cmark_isalnum (c ))
350
- continue ;
351
-
352
- if (c == '@' )
353
- nb ++ ;
354
- else if (c == '.' && link_end < size - 1 && cmark_isalnum (data [link_end + 1 ]))
355
- np ++ ;
356
- else if (c == '/' && is_xmpp )
378
+ if (link_end < 2 || np == 0 ||
379
+ (!cmark_isalpha (data [start + offset + max_rewind + link_end - 1 ]) &&
380
+ data [start + offset + max_rewind + link_end - 1 ] != '.' )) {
381
+ offset += max_rewind + link_end ;
357
382
continue ;
358
- else if (c != '-' && c != '_' )
359
- break ;
360
- }
383
+ }
361
384
362
- if (link_end < 2 || nb != 1 || np == 0 ||
363
- (!cmark_isalpha (data [link_end - 1 ]) && data [link_end - 1 ] != '.' )) {
364
- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
365
- return ;
366
- }
385
+ link_end = autolink_delim (data + start + offset + max_rewind , link_end );
367
386
368
- link_end = autolink_delim (data , link_end );
387
+ if (link_end == 0 ) {
388
+ offset += max_rewind + 1 ;
389
+ continue ;
390
+ }
369
391
370
- if (link_end == 0 ) {
371
- postprocess_text (parser , text , max_rewind + 1 + offset , depth + 1 );
372
- return ;
373
- }
392
+ cmark_node * link_node = cmark_node_new_with_mem (CMARK_NODE_LINK , parser -> mem );
393
+ cmark_strbuf buf ;
394
+ cmark_strbuf_init (parser -> mem , & buf , 10 );
395
+ if (auto_mailto )
396
+ cmark_strbuf_puts (& buf , "mailto:" );
397
+ cmark_strbuf_put (& buf , data + start + offset + max_rewind - rewind , (bufsize_t )(link_end + rewind ));
398
+ link_node -> as .link .url = cmark_chunk_buf_detach (& buf );
399
+
400
+ cmark_node * link_text = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
401
+ cmark_chunk email = cmark_chunk_dup (
402
+ & detached_chunk ,
403
+ start + offset + max_rewind - rewind ,
404
+ (bufsize_t )(link_end + rewind ));
405
+ cmark_chunk_to_cstr (parser -> mem , & email );
406
+ link_text -> as .literal = email ;
407
+ cmark_node_append_child (link_node , link_text );
374
408
375
- cmark_chunk_to_cstr ( parser -> mem , & text -> as . literal );
409
+ cmark_node_insert_after ( text , link_node );
376
410
377
- cmark_node * link_node = cmark_node_new_with_mem (CMARK_NODE_LINK , parser -> mem );
378
- cmark_strbuf buf ;
379
- cmark_strbuf_init (parser -> mem , & buf , 10 );
380
- if (auto_mailto )
381
- cmark_strbuf_puts (& buf , "mailto:" );
382
- cmark_strbuf_put (& buf , data - rewind , (bufsize_t )(link_end + rewind ));
383
- link_node -> as .link .url = cmark_chunk_buf_detach (& buf );
384
-
385
- cmark_node * link_text = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
386
- cmark_chunk email = cmark_chunk_dup (
387
- & text -> as .literal ,
388
- offset + max_rewind - rewind ,
389
- (bufsize_t )(link_end + rewind ));
390
- cmark_chunk_to_cstr (parser -> mem , & email );
391
- link_text -> as .literal = email ;
392
- cmark_node_append_child (link_node , link_text );
411
+ cmark_node * post = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
412
+ post -> as .literal = cmark_chunk_dup (& detached_chunk ,
413
+ (bufsize_t )(start + offset + max_rewind + link_end ),
414
+ (bufsize_t )(remaining - offset - max_rewind - link_end ));
393
415
394
- cmark_node_insert_after (text , link_node );
416
+ cmark_node_insert_after (link_node , post );
395
417
396
- cmark_node * post = cmark_node_new_with_mem (CMARK_NODE_TEXT , parser -> mem );
397
- post -> as .literal = cmark_chunk_dup (& text -> as .literal ,
398
- (bufsize_t )(offset + max_rewind + link_end ),
399
- (bufsize_t )(size - link_end ));
400
- cmark_chunk_to_cstr (parser -> mem , & post -> as .literal );
418
+ text -> as .literal = cmark_chunk_dup (& detached_chunk , start , offset + max_rewind - rewind );
419
+ cmark_chunk_to_cstr (parser -> mem , & text -> as .literal );
401
420
402
- cmark_node_insert_after (link_node , post );
421
+ text = post ;
422
+ start += offset + max_rewind + link_end ;
423
+ remaining -= offset + max_rewind + link_end ;
424
+ offset = 0 ;
425
+ }
403
426
404
- text -> as .literal .len = offset + max_rewind - rewind ;
405
- text -> as .literal .data [text -> as .literal .len ] = 0 ;
427
+ // Convert the reference to allocated memory.
428
+ assert (!text -> as .literal .alloc );
429
+ cmark_chunk_to_cstr (parser -> mem , & text -> as .literal );
406
430
407
- postprocess_text (parser , post , 0 , depth + 1 );
431
+ // Free the detached buffer.
432
+ cmark_chunk_free (parser -> mem , & detached_chunk );
408
433
}
409
434
410
435
static cmark_node * postprocess (cmark_syntax_extension * ext , cmark_parser * parser , cmark_node * root ) {
@@ -431,7 +456,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
431
456
}
432
457
433
458
if (ev == CMARK_EVENT_ENTER && node -> type == CMARK_NODE_TEXT ) {
434
- postprocess_text (parser , node , 0 , /*depth*/ 0 );
459
+ postprocess_text (parser , node );
435
460
}
436
461
}
437
462
0 commit comments