@@ -39,6 +39,14 @@ static roleDefinition PerlModuleRoles [] = {
3939 { true, "unused" , "specified in `no' built-in function" },
4040};
4141
42+ typedef enum {
43+ R_HEREDOC_ENDLABEL ,
44+ } perlHeredocRole ;
45+
46+ static roleDefinition PerlHeredocRoles [] = {
47+ { true, "endmarker" , "end marker" },
48+ };
49+
4250static kindDefinition PerlKinds [] = {
4351 { true, 'c' , "constant" , "constants" },
4452 { true, 'f' , "format" , "formats" },
@@ -48,6 +56,19 @@ static kindDefinition PerlKinds [] = {
4856 { false, 'd' , "subroutineDeclaration" , "subroutine declarations" },
4957 { false, 'M' , "module" , "modules" ,
5058 .referenceOnly = true, ATTACH_ROLES (PerlModuleRoles )},
59+ { false, 'h' , "heredoc" , "marker for here document" ,
60+ .referenceOnly = false, ATTACH_ROLES (PerlHeredocRoles ) },
61+ };
62+
63+ struct hereDocMarker {
64+ vString * marker ;
65+ bool indented ;
66+ int corkIndex ;
67+ };
68+
69+ struct hereDocMarkerManager {
70+ ptrArray * markers ;
71+ size_t current ;
5172};
5273
5374/*
@@ -363,6 +384,177 @@ static void parseQuotedWords(const unsigned char *cp,
363384 } while ((cp = readLineFromInputFile ()) != NULL );
364385}
365386
387+ /*
388+ * Extract heredoc markers and skip the heredoc areas.
389+ *
390+ * - https://perldoc.perl.org/perlop#%3C%3CEOF
391+ */
392+ static struct hereDocMarker * hereDocMarkerNew (bool indented )
393+ {
394+ struct hereDocMarker * marker = xMalloc (1 , struct hereDocMarker );
395+
396+ marker -> indented = indented ;
397+ marker -> marker = vStringNew ();
398+ marker -> corkIndex = CORK_NIL ;
399+
400+ return marker ;
401+ }
402+
403+ static void hereDocMarkerDelete (struct hereDocMarker * marker )
404+ {
405+ vStringDelete (marker -> marker );
406+ eFree (marker );
407+ }
408+
409+ static unsigned char * readHereDocMarker (unsigned char * line ,
410+ vString * marker ,
411+ unsigned char quote_char )
412+ {
413+ unsigned char * cp = line ;
414+ bool backslash = false;
415+
416+ for (cp = line ; * cp != '\0' ; cp ++ )
417+ {
418+ if (backslash )
419+ {
420+ vStringPut (marker , * cp );
421+ backslash = false;
422+ continue ;
423+ }
424+
425+ if (quote_char == '"' && (* cp == '\\' ))
426+ {
427+ backslash = true;
428+ continue ;
429+ }
430+
431+ if (quote_char && * cp == quote_char )
432+ {
433+ cp ++ ;
434+ break ;
435+ }
436+
437+ if (!quote_char && !isIdentifier (* cp ))
438+ break ;
439+
440+ vStringPut (marker , * cp );
441+ }
442+
443+ return cp ;
444+ }
445+
446+ static void collectHereDocMarkers (struct hereDocMarkerManager * mgr ,
447+ const unsigned char * line )
448+ {
449+ unsigned char * starter = (unsigned char * )strstr ((char * )line , "<<" );
450+ unsigned char * cp = NULL ;
451+ bool indented = false;
452+ unsigned char quote_char = 0 ;
453+
454+ if (starter == NULL )
455+ return ;
456+
457+ cp = starter + 2 ;
458+ while (isspace (* cp ))
459+ cp ++ ;
460+
461+ if (* cp == '\0' )
462+ return ;
463+
464+ /* Is shift operator? */
465+ if (isdigit (* cp ))
466+ {
467+ /* Scan the rest of the string. */
468+ collectHereDocMarkers (mgr , ++ cp );
469+ return ;
470+ }
471+
472+ if (* cp == '~' ) {
473+ indented = true;
474+ cp ++ ;
475+ if (* cp == '\0' )
476+ return ;
477+ while (isspace (* cp ))
478+ cp ++ ;
479+ if (* cp == '\0' )
480+ return ;
481+ }
482+
483+ switch (* cp )
484+ {
485+ case '\'' :
486+ case '"' :
487+ case '`' :
488+ quote_char = * cp ;
489+ /* Fall through */
490+ case '\\' :
491+ cp ++ ;
492+ if (* cp == '\0' )
493+ return ;
494+ break ;
495+ default :
496+ break ;
497+ }
498+
499+ struct hereDocMarker * marker = hereDocMarkerNew (indented );
500+ const unsigned char * last_cp = cp ;
501+ cp = readHereDocMarker (cp , marker -> marker , quote_char );
502+ if (vStringLength (marker -> marker ) > 0 )
503+ {
504+ marker -> corkIndex = makeSimpleTag (marker -> marker ,
505+ KIND_PERL_HEREDOCMARKER );
506+ ptrArrayAdd (mgr -> markers , marker );
507+ }
508+ else
509+ hereDocMarkerDelete (marker );
510+
511+ if (* cp != '\0' && cp != last_cp )
512+ collectHereDocMarkers (mgr , cp );
513+ }
514+
515+ static bool isInHereDoc (struct hereDocMarkerManager * mgr ,
516+ const unsigned char * line )
517+ {
518+ if (ptrArrayCount (mgr -> markers ) == 0 )
519+ return false;
520+
521+ const unsigned char * cp = line ;
522+ struct hereDocMarker * current = ptrArrayItem (mgr -> markers , mgr -> current );
523+ if (current -> indented )
524+ {
525+ while (isspace (* cp ))
526+ cp ++ ;
527+ }
528+ if (strncmp ((const char * )cp , vStringValue (current -> marker ), vStringLength (current -> marker )) == 0
529+ && (cp [vStringLength (current -> marker )] == '\0'
530+ || (!isIdentifier (cp [vStringLength (current -> marker )]))))
531+ {
532+ tagEntryInfo * tag = getEntryInCorkQueue (current -> corkIndex );
533+ if (tag )
534+ tag -> extensionFields .endLine = getInputLineNumber ();
535+ mgr -> current ++ ;
536+ if (mgr -> current == ptrArrayCount (mgr -> markers ))
537+ {
538+ ptrArrayClear (mgr -> markers );
539+ mgr -> current = 0 ;
540+ }
541+ }
542+ return true;
543+ }
544+
545+ static void initHereDocMarkerManager (struct hereDocMarkerManager * mgr )
546+ {
547+ mgr -> markers = ptrArrayNew ((ptrArrayDeleteFunc )hereDocMarkerDelete );
548+ mgr -> current = 0 ;
549+ }
550+
551+ static void finiHereDocMarkerManager (struct hereDocMarkerManager * mgr )
552+ {
553+ ptrArrayDelete (mgr -> markers );
554+ mgr -> markers = NULL ;
555+ mgr -> current = 0 ;
556+ }
557+
366558/* Algorithm adapted from from GNU etags.
367559 * Perl support by Bart Robinson <[email protected] > 368560 * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
@@ -395,6 +587,9 @@ static void findPerlTags (void)
395587 RESPECT_DATA = (1 << 1 ),
396588 } respect_token = RESPECT_END | RESPECT_DATA ;
397589
590+ struct hereDocMarkerManager hdoc_mgr ;
591+ initHereDocMarkerManager (& hdoc_mgr );
592+
398593 while ((line = readLineFromInputFile ()) != NULL )
399594 {
400595 bool spaceRequired = false;
@@ -403,6 +598,9 @@ static void findPerlTags (void)
403598 perlKind kind = KIND_PERL_NONE ;
404599 tagEntryInfo e ;
405600
601+ if (isInHereDoc (& hdoc_mgr , line ))
602+ continue ;
603+
406604 if (skipPodDoc )
407605 {
408606 if (strncmp ((const char * ) line , "=cut" , (size_t ) 4 ) == 0 )
@@ -463,6 +661,8 @@ static void findPerlTags (void)
463661 while (isspace (* cp ))
464662 cp ++ ;
465663
664+ collectHereDocMarkers (& hdoc_mgr , cp );
665+
466666 if (strncmp ((const char * ) cp , "sub" , (size_t ) 3 ) == 0 )
467667 {
468668 TRACE ("this looks like a sub\n" );
@@ -710,6 +910,7 @@ static void findPerlTags (void)
710910
711911END_MAIN_WHILE :
712912 vStringDelete (name );
913+ finiHereDocMarkerManager (& hdoc_mgr );
713914 if (package != NULL )
714915 vStringDelete (package );
715916}
0 commit comments