Skip to content

Commit 08bf1b9

Browse files
committed
Perl: skip here documents
Close #3200. Signed-off-by: Masatake YAMATO <[email protected]>
1 parent fd0fbb0 commit 08bf1b9

File tree

9 files changed

+300
-0
lines changed

9 files changed

+300
-0
lines changed

Tmain/list-roles.d/stdout-expected.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Make I/makefile optional on optionally included
6565
NSIS i/script included on included with !include
6666
Perl M/module unused on specified in `no' built-in function
6767
Perl M/module used on specified in `use' built-in function
68+
Perl h/heredoc endmarker on end marker
6869
Protobuf D/protodef imported on imported
6970
Protobuf m/message extension on extending the message
7071
Python i/module imported on imported modules
@@ -164,6 +165,7 @@ Make I/makefile optional on optionally included
164165
NSIS i/script included on included with !include
165166
Perl M/module unused on specified in `no' built-in function
166167
Perl M/module used on specified in `use' built-in function
168+
Perl h/heredoc endmarker on end marker
167169
Protobuf D/protodef imported on imported
168170
Protobuf m/message extension on extending the message
169171
Python i/module imported on imported modules
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--kinds-Perl=+h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
EOM input.pl /^$this->print_log(<<EOM);$/;" h
12
test input.pl /^sub test {$/;" s
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
--sort=no
2+
--fields=+en
3+
--kinds-Perl=*
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
myfunc0 input.pl /^sub myfunc0$/;" s line:1
2+
foo input.pl /^ print <<"foo", <<~\\bar; # you can stack them$/;" h line:5 end:10
3+
bar input.pl /^ print <<"foo", <<~\\bar; # you can stack them$/;" h line:5 end:15
4+
myfunc3 input.pl /^sub myfunc3$/;" s line:17
5+
THIS input.pl /^ myfunc0(<< "THIS", 23, <<'THAT');$/;" h line:21 end:33
6+
THAT input.pl /^ myfunc0(<< "THIS", 23, <<'THAT');$/;" h line:21 end:41
7+
myfunc9 input.pl /^sub myfunc9$/;" s line:42
8+
AB"CD input.pl /^myfunc0(<< "AB\\"CD", << 'EF\\GH');$/;" h line:46 end:53
9+
EF\\GH input.pl /^myfunc0(<< "AB\\"CD", << 'EF\\GH');$/;" h line:46 end:59
10+
myfunc12 input.pl /^sub myfunc12$/;" s line:61
11+
END input-0.pl /^(($argc == (1 << 0x1)) or ($argc >= (1<<1) and $opt_write)) or die <<END;$/;" h line:2 end:10
12+
foo input-0.pl /^sub foo {$/;" s line:12
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Derived from https://github.com/geany/geany/blob/master/scripts/fix-alignment.pl
2+
(($argc == (1 << 0x1)) or ($argc >= (1<<1) and $opt_write)) or die <<END;
3+
Usage:
4+
$scriptname sourcefile [>outfile]
5+
Print formatted output to STDOUT or outfile.
6+
Warning: do not use the same file for outfile.
7+
$scriptname -w sourcefile(s)
8+
Writes to the file(s) in-place.
9+
Warning: backup your file(s) first or use clean version control files.
10+
END
11+
12+
sub foo {
13+
print "hello\n";
14+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
sub myfunc0
2+
{
3+
}
4+
5+
print <<"foo", <<~\bar; # you can stack them
6+
I said foo.
7+
sub myfunc1
8+
{
9+
}
10+
foo
11+
I said bar.
12+
sub myfunc2
13+
{
14+
}
15+
bar
16+
17+
sub myfunc3
18+
{
19+
}
20+
21+
myfunc0(<< "THIS", 23, <<'THAT');
22+
sub myfunc4
23+
{
24+
}
25+
Here's a line
26+
sub myfunc5
27+
{
28+
}
29+
or two.
30+
sub myfunc6
31+
{
32+
}
33+
THIS
34+
sub myfunc7
35+
{
36+
}
37+
and here's another.
38+
sub myfunc8
39+
{
40+
}
41+
THAT
42+
sub myfunc9
43+
{
44+
}
45+
46+
myfunc0(<< "AB\"CD", << 'EF\GH');
47+
label0:
48+
sub myfunc10
49+
{
50+
label1:
51+
}
52+
label2:
53+
AB"CD
54+
sub myfunc11
55+
{
56+
label3:
57+
}
58+
label4:
59+
EF\GH
60+
61+
sub myfunc12
62+
{
63+
print "12\n";
64+
}
65+
myfunc12();

parsers/perl.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ static roleDefinition PerlModuleRoles [] = {
3939
{ true, "unused", "specified in `no' built-in function" },
4040
};
4141

42+
typedef enum {
43+
R_HEREDOC_ENDLABEL,
44+
} perlHeredocRole;
45+
46+
static roleDefinition PerlHeredocRoles [] = {
47+
{ true, "endmarker", "end marker" },
48+
};
49+
4250
static kindDefinition PerlKinds [] = {
4351
{ true, 'c', "constant", "constants" },
4452
{ true, 'f', "format", "formats" },
@@ -48,6 +56,19 @@ static kindDefinition PerlKinds [] = {
4856
{ false, 'd', "subroutineDeclaration", "subroutine declarations" },
4957
{ false, 'M', "module", "modules",
5058
.referenceOnly = true, ATTACH_ROLES(PerlModuleRoles)},
59+
{ false, 'h', "heredoc", "marker for here document",
60+
.referenceOnly = false, ATTACH_ROLES (PerlHeredocRoles) },
61+
};
62+
63+
struct hereDocMarker {
64+
vString *marker;
65+
bool indented;
66+
int corkIndex;
67+
};
68+
69+
struct hereDocMarkerManager {
70+
ptrArray *markers;
71+
size_t current;
5172
};
5273

5374
/*
@@ -363,6 +384,177 @@ static void parseQuotedWords(const unsigned char *cp,
363384
} while ((cp = readLineFromInputFile()) != NULL);
364385
}
365386

387+
/*
388+
* Extract heredoc markers and skip the heredoc areas.
389+
*
390+
* - https://perldoc.perl.org/perlop#%3C%3CEOF
391+
*/
392+
static struct hereDocMarker *hereDocMarkerNew (bool indented)
393+
{
394+
struct hereDocMarker *marker = xMalloc(1, struct hereDocMarker);
395+
396+
marker->indented = indented;
397+
marker->marker = vStringNew();
398+
marker->corkIndex = CORK_NIL;
399+
400+
return marker;
401+
}
402+
403+
static void hereDocMarkerDelete (struct hereDocMarker *marker)
404+
{
405+
vStringDelete (marker->marker);
406+
eFree (marker);
407+
}
408+
409+
static unsigned char *readHereDocMarker (unsigned char *line,
410+
vString *marker,
411+
unsigned char quote_char)
412+
{
413+
unsigned char *cp = line;
414+
bool backslash = false;
415+
416+
for (cp = line; *cp != '\0'; cp++)
417+
{
418+
if (backslash)
419+
{
420+
vStringPut (marker, *cp);
421+
backslash = false;
422+
continue;
423+
}
424+
425+
if (quote_char == '"' && (*cp == '\\'))
426+
{
427+
backslash = true;
428+
continue;
429+
}
430+
431+
if (quote_char && *cp == quote_char)
432+
{
433+
cp++;
434+
break;
435+
}
436+
437+
if (!quote_char && !isIdentifier(*cp))
438+
break;
439+
440+
vStringPut (marker, *cp);
441+
}
442+
443+
return cp;
444+
}
445+
446+
static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
447+
const unsigned char *line)
448+
{
449+
unsigned char *starter = (unsigned char*)strstr((char *)line, "<<");
450+
unsigned char *cp = NULL;
451+
bool indented = false;
452+
unsigned char quote_char = 0;
453+
454+
if (starter == NULL)
455+
return;
456+
457+
cp = starter + 2;
458+
while (isspace (*cp))
459+
cp++;
460+
461+
if (*cp == '\0')
462+
return;
463+
464+
/* Is shift operator? */
465+
if (isdigit (*cp))
466+
{
467+
/* Scan the rest of the string. */
468+
collectHereDocMarkers (mgr, ++cp);
469+
return;
470+
}
471+
472+
if (*cp == '~') {
473+
indented = true;
474+
cp++;
475+
if (*cp == '\0')
476+
return;
477+
while (isspace (*cp))
478+
cp++;
479+
if (*cp == '\0')
480+
return;
481+
}
482+
483+
switch (*cp)
484+
{
485+
case '\'':
486+
case '"':
487+
case '`':
488+
quote_char = *cp;
489+
/* Fall through */
490+
case '\\':
491+
cp++;
492+
if (*cp == '\0')
493+
return;
494+
break;
495+
default:
496+
break;
497+
}
498+
499+
struct hereDocMarker *marker = hereDocMarkerNew (indented);
500+
const unsigned char *last_cp = cp;
501+
cp = readHereDocMarker(cp, marker->marker, quote_char);
502+
if (vStringLength (marker->marker) > 0)
503+
{
504+
marker->corkIndex = makeSimpleTag (marker->marker,
505+
KIND_PERL_HEREDOCMARKER);
506+
ptrArrayAdd (mgr->markers, marker);
507+
}
508+
else
509+
hereDocMarkerDelete (marker);
510+
511+
if (*cp != '\0' && cp != last_cp)
512+
collectHereDocMarkers (mgr, cp);
513+
}
514+
515+
static bool isInHereDoc (struct hereDocMarkerManager *mgr,
516+
const unsigned char *line)
517+
{
518+
if (ptrArrayCount (mgr->markers) == 0)
519+
return false;
520+
521+
const unsigned char *cp = line;
522+
struct hereDocMarker *current = ptrArrayItem (mgr->markers, mgr->current);
523+
if (current->indented)
524+
{
525+
while (isspace(*cp))
526+
cp++;
527+
}
528+
if (strncmp((const char *)cp, vStringValue (current->marker), vStringLength (current->marker)) == 0
529+
&& (cp [vStringLength (current->marker)] == '\0'
530+
|| (!isIdentifier (cp [vStringLength (current->marker)]))))
531+
{
532+
tagEntryInfo *tag = getEntryInCorkQueue (current->corkIndex);
533+
if (tag)
534+
tag->extensionFields.endLine = getInputLineNumber();
535+
mgr->current++;
536+
if (mgr->current == ptrArrayCount (mgr->markers))
537+
{
538+
ptrArrayClear (mgr->markers);
539+
mgr->current = 0;
540+
}
541+
}
542+
return true;
543+
}
544+
545+
static void initHereDocMarkerManager(struct hereDocMarkerManager *mgr)
546+
{
547+
mgr->markers = ptrArrayNew((ptrArrayDeleteFunc)hereDocMarkerDelete);
548+
mgr->current = 0;
549+
}
550+
551+
static void finiHereDocMarkerManager(struct hereDocMarkerManager *mgr)
552+
{
553+
ptrArrayDelete (mgr->markers);
554+
mgr->markers = NULL;
555+
mgr->current = 0;
556+
}
557+
366558
/* Algorithm adapted from from GNU etags.
367559
* Perl support by Bart Robinson <[email protected]>
368560
* Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
@@ -395,6 +587,9 @@ static void findPerlTags (void)
395587
RESPECT_DATA = (1 << 1),
396588
} respect_token = RESPECT_END | RESPECT_DATA;
397589

590+
struct hereDocMarkerManager hdoc_mgr;
591+
initHereDocMarkerManager (&hdoc_mgr);
592+
398593
while ((line = readLineFromInputFile ()) != NULL)
399594
{
400595
bool spaceRequired = false;
@@ -403,6 +598,9 @@ static void findPerlTags (void)
403598
perlKind kind = KIND_PERL_NONE;
404599
tagEntryInfo e;
405600

601+
if (isInHereDoc (&hdoc_mgr, line))
602+
continue;
603+
406604
if (skipPodDoc)
407605
{
408606
if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0)
@@ -463,6 +661,8 @@ static void findPerlTags (void)
463661
while (isspace (*cp))
464662
cp++;
465663

664+
collectHereDocMarkers (&hdoc_mgr, cp);
665+
466666
if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
467667
{
468668
TRACE("this looks like a sub\n");
@@ -710,6 +910,7 @@ static void findPerlTags (void)
710910

711911
END_MAIN_WHILE:
712912
vStringDelete (name);
913+
finiHereDocMarkerManager (&hdoc_mgr);
713914
if (package != NULL)
714915
vStringDelete (package);
715916
}

parsers/perl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ enum PerlKindType {
3030
KIND_PERL_SUBROUTINE,
3131
KIND_PERL_SUBROUTINE_DECLARATION,
3232
KIND_PERL_MODULE,
33+
KIND_PERL_HEREDOCMARKER,
3334
};
3435

3536
struct sPerlSubparser {

0 commit comments

Comments
 (0)