Skip to content

Commit 35a27ba

Browse files
authored
Merge pull request #3490 from jafl/php-parse-javascript-broken-by-php
html: parse javascript broken by php
2 parents 5dc6ad1 + b0ac451 commit 35a27ba

File tree

6 files changed

+195
-36
lines changed

6 files changed

+195
-36
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
--sort=yes
2+
--extras=+rg
3+
--fields=+rln
4+
--language-force=html
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.blarg input.php /^.blarg {$/;" c line:31 language:CSS roles:def
2+
My Home Page input.php /^ <title>My Home Page<\/title>$/;" j line:8 language:HTML roles:def
3+
X input.php /^<html><head><title>X<\/title>$/;" j line:1 language:HTML roles:def
4+
css/general.css input.php /^ <link rel="stylesheet" href="css\/general.css" type="text\/css">$/;" C line:9 language:HTML roles:extFile
5+
draw input.php /^ function draw($x) {$/;" f line:3 language:PHP roles:def
6+
f1 input.php /^ var f1 = function (n) {$/;" f line:21 language:JavaScript roles:def
7+
f2 input.php /^ var f2 = function (m) {$/;" f line:26 language:JavaScript roles:def
8+
nothing input.php /^ function nothing($x) {$/;" f line:36 language:PHP roles:def
9+
stuff input.php /^ <h1>stuff<\/h1>$/;" h line:13 language:HTML roles:def
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<html><head><title>X</title>
2+
<?php // This test input is derived from #2256 submitted by @StephenWall.
3+
function draw($x) {
4+
echo "$x";
5+
}
6+
?>
7+
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
8+
<title>My Home Page</title>
9+
<link rel="stylesheet" href="css/general.css" type="text/css">
10+
<style type="text/css"></stylE>
11+
</head>
12+
<body>
13+
<h1>stuff</h1>
14+
<a href="/blah">nowhere</a>
15+
<?=draw('nowhere')?>
16+
<a href="/blech">somewhere</a>
17+
<?=draw('somewhere')?>
18+
</body>
19+
</html>
20+
<script>
21+
var f1 = function (n) {
22+
return n + <?php /* some value */ ?>;
23+
}
24+
</script>
25+
<script>
26+
var f2 = function (m) {
27+
return m + <% /* some value from JSP */ %>;
28+
}
29+
</script>
30+
<style>
31+
.blarg {
32+
position: relative;
33+
}
34+
</style>
35+
<?php
36+
function nothing($x) {
37+
;
38+
}
39+
?>

main/promise.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,21 @@ int makePromise (const char *parser,
7171
int r;
7272
langType lang = LANG_IGNORE;
7373

74+
const bool is_thin_stream_spec =
75+
isThinStreamSpec(startLine, startCharOffset,
76+
endLine, endCharOffset,
77+
sourceLineOffset);
78+
79+
if (!is_thin_stream_spec
80+
&& (startLine > endLine
81+
|| (startLine == endLine && startCharOffset >= endCharOffset)))
82+
return -1;
83+
7484
verbose("makePromise: %s start(line: %lu, offset: %ld, srcline: %lu), end(line: %lu, offset: %ld)\n",
7585
parser? parser: "*", startLine, startCharOffset, sourceLineOffset,
7686
endLine, endCharOffset);
7787

78-
if ((!isThinStreamSpec(startLine,
79-
startCharOffset,
80-
endLine,
81-
endCharOffset,
82-
sourceLineOffset))
88+
if ((!is_thin_stream_spec)
8389
&& ( !isXtagEnabled (XTAG_GUEST)))
8490
return -1;
8591

parsers/html.c

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "routines.h"
2020
#include "keyword.h"
2121
#include "promise.h"
22+
#include "trace.h"
2223

2324
/* The max. number of nested elements - prevents further recursion if the limit
2425
* is exceeded and avoids stack overflow for invalid input containing too many
@@ -157,8 +158,8 @@ typedef enum {
157158
TOKEN_NAME, /* tag and attribute names */
158159
TOKEN_STRING, /* single- or double-quoted attribute value */
159160
TOKEN_TEXT,
160-
TOKEN_TAG_START, /* < */
161-
TOKEN_TAG_START2, /* </ */
161+
TOKEN_OPEN_TAG_START, /* < */
162+
TOKEN_CLOSE_TAG_START, /* </ */
162163
TOKEN_TAG_END, /* > */
163164
TOKEN_TAG_END2, /* /> */
164165
TOKEN_EQUAL,
@@ -167,14 +168,14 @@ typedef enum {
167168
} tokenType;
168169

169170
#ifdef DEBUG
170-
const char *tokenTypes[] = {
171+
static const char *tokenTypes[] = {
171172
#define E(X) [TOKEN_##X] = #X
172173
E(EOF),
173174
E(NAME),
174175
E(STRING),
175176
E(TEXT),
176-
E(TAG_START),
177-
E(TAG_START2),
177+
E(OPEN_TAG_START),
178+
E(CLOSE_TAG_START),
178179
E(TAG_END),
179180
E(TAG_END2),
180181
E(EQUAL),
@@ -195,16 +196,7 @@ static int Lang_html;
195196

196197
static void readTag (tokenInfo *token, vString *text, int depth);
197198

198-
#ifdef DEBUG
199-
#if 0
200-
static void dumpToken (tokenInfo *token, const char *context, const char* extra_context)
201-
{
202-
fprintf (stderr, "[%7s] %-20s@%s.%s\n",
203-
tokenTypes[token->type], vStringValue(token->string),
204-
context, extra_context? extra_context: "_");
205-
}
206-
#endif
207-
#endif
199+
static void skipOtherScriptContent (const int delimiter);
208200

209201
static void readTokenText (tokenInfo *const token, bool collectText)
210202
{
@@ -265,7 +257,6 @@ static void readToken (tokenInfo *const token, bool skipComments)
265257
case '<':
266258
{
267259
int d = getcFromInputFile ();
268-
269260
if (d == '!')
270261
{
271262
d = getcFromInputFile ();
@@ -296,14 +287,17 @@ static void readToken (tokenInfo *const token, bool skipComments)
296287
ungetcToInputFile (d);
297288
token->type = TOKEN_OTHER;
298289
}
299-
else if (d == '?')
300-
token->type = TOKEN_OTHER;
290+
else if (d == '?' || d == '%')
291+
{
292+
skipOtherScriptContent(d);
293+
goto getNextChar;
294+
}
301295
else if (d == '/')
302-
token->type = TOKEN_TAG_START2;
296+
token->type = TOKEN_CLOSE_TAG_START;
303297
else
304298
{
305299
ungetcToInputFile (d);
306-
token->type = TOKEN_TAG_START;
300+
token->type = TOKEN_OPEN_TAG_START;
307301
}
308302
break;
309303
}
@@ -356,6 +350,8 @@ static void readToken (tokenInfo *const token, bool skipComments)
356350
break;
357351
}
358352
}
353+
354+
TRACE_PRINT("token: %s (%s)", tokenTypes[token->type], vStringValue (token->string));
359355
}
360356

361357
static void appendText (vString *text, vString *appendedText)
@@ -373,6 +369,8 @@ static void appendText (vString *text, vString *appendedText)
373369

374370
static bool readTagContent (tokenInfo *token, vString *text, long *line, long *lineOffset, int depth)
375371
{
372+
TRACE_ENTER();
373+
376374
tokenType type;
377375

378376
readTokenText (token, text != NULL);
@@ -384,21 +382,25 @@ static bool readTagContent (tokenInfo *token, vString *text, long *line, long *l
384382
*lineOffset = getInputLineOffset ();
385383
readToken (token, false);
386384
type = token->type;
387-
if (type == TOKEN_TAG_START)
385+
if (type == TOKEN_OPEN_TAG_START)
388386
readTag (token, text, depth + 1);
389-
if (type == TOKEN_COMMENT || type == TOKEN_TAG_START)
387+
if (type == TOKEN_COMMENT || type == TOKEN_OPEN_TAG_START)
390388
{
391389
readTokenText (token, text != NULL);
392390
appendText (text, token->string);
393391
}
394392
}
395-
while (type == TOKEN_COMMENT || type == TOKEN_TAG_START);
393+
while (type == TOKEN_COMMENT || type == TOKEN_OPEN_TAG_START);
396394

397-
return type == TOKEN_TAG_START2;
395+
TRACE_LEAVE_TEXT("is_close_tag? %d", type == TOKEN_CLOSE_TAG_START);
396+
397+
return type == TOKEN_CLOSE_TAG_START;
398398
}
399399

400400
static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset)
401401
{
402+
TRACE_ENTER();
403+
402404
bool found_start = false;
403405
bool found_script = false;
404406

@@ -415,7 +417,7 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset)
415417
readToken (token, false);
416418
type = token->type;
417419

418-
if (type == TOKEN_TAG_START2)
420+
if (type == TOKEN_CLOSE_TAG_START)
419421
{
420422
found_start = true;
421423
line_tmp[1] = line_tmp[0];
@@ -434,9 +436,58 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset)
434436
}
435437
while ((type != TOKEN_EOF) && (!found_script));
436438

439+
TRACE_LEAVE_TEXT("found_script? %d", found_script);
440+
437441
return found_script;
438442
}
439443

444+
static void skipOtherScriptContent (const int delimiter)
445+
{
446+
TRACE_ENTER();
447+
448+
const long startSourceLineNumber = getSourceLineNumber ();
449+
const long startLineNumber = getInputLineNumber ();
450+
const long startLineOffset = getInputLineOffset () - 2;
451+
452+
vString *script_name = vStringNew ();
453+
bool reading_script_name = true;
454+
while (1)
455+
{
456+
int c = getcFromInputFile ();
457+
if (c == EOF)
458+
{
459+
break;
460+
}
461+
else if (reading_script_name && !isspace(c))
462+
{
463+
vStringPut (script_name, c);
464+
}
465+
else if (reading_script_name)
466+
{
467+
reading_script_name = false;
468+
}
469+
else if (c == delimiter)
470+
{
471+
c = getcFromInputFile ();
472+
if (c == '>')
473+
{
474+
break;
475+
}
476+
ungetcToInputFile (c);
477+
}
478+
}
479+
480+
if (strcasecmp ("php", vStringValue (script_name)) == 0
481+
|| strcmp ("=", vStringValue (script_name)) == 0)
482+
makePromise ("PHP", startLineNumber, startLineOffset,
483+
getInputLineNumber (), getInputLineOffset (),
484+
startSourceLineNumber);
485+
486+
vStringDelete (script_name);
487+
488+
TRACE_LEAVE();
489+
}
490+
440491
static void makeClassRefTags (const char *classes)
441492
{
442493
vString *klass = vStringNew ();
@@ -463,6 +514,8 @@ static void makeClassRefTags (const char *classes)
463514

464515
static void readTag (tokenInfo *token, vString *text, int depth)
465516
{
517+
TRACE_ENTER();
518+
466519
bool textCreated = false;
467520

468521
readToken (token, true);
@@ -640,23 +693,29 @@ static void readTag (tokenInfo *token, vString *text, int depth)
640693
out:
641694
if (textCreated)
642695
vStringDelete (text);
696+
697+
TRACE_LEAVE();
643698
}
644699

645700
static void findHtmlTags (void)
646701
{
702+
TRACE_ENTER();
703+
647704
tokenInfo token;
648705

649706
token.string = vStringNew ();
650707

651708
do
652709
{
653710
readToken (&token, true);
654-
if (token.type == TOKEN_TAG_START)
711+
if (token.type == TOKEN_OPEN_TAG_START)
655712
readTag (&token, NULL, 0);
656713
}
657714
while (token.type != TOKEN_EOF);
658715

659716
vStringDelete (token.string);
717+
718+
TRACE_LEAVE();
660719
}
661720

662721
static void initialize (const langType language)

0 commit comments

Comments
 (0)