Skip to content

Commit b2588fe

Browse files
authored
Merge pull request #3598 from masatake/html--html-string-in-js-in-html
HTML: introduce a specialized tokenizer for script areas Close #3581. Close #3597.
2 parents 3af4135 + baef2cd commit b2588fe

File tree

7 files changed

+98
-1
lines changed

7 files changed

+98
-1
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
--sort=no
2+
--extras=+g
3+
--fields=+Kl
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Foo input.html /^<h1>Foo<\/h1>$/;" heading1 language:HTML
2+
BAR input.html /^<h1>BAR<\/h1>$/;" heading1 language:HTML
3+
x input.html /^ var x/;" variable language:JavaScript
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<h1>Foo</h1>
2+
<script>
3+
// <!--
4+
var x
5+
</script>
6+
<h1>BAR</h1>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
--sort=no
2+
--extras=+g
3+
--fields=+Kl
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Foo input.html /^<h1>Foo<\/h1>$/;" heading1 language:HTML
2+
BAR input.html /^<h1>BAR<\/h1>$/;" heading1 language:HTML
3+
bar input.html /^ const bar = 123$/;" constant language:JavaScript
4+
baz input.html /^ function baz () {$/;" function language:JavaScript
5+
bar2 input.html /^ const bar2 = 123$/;" constant language:JavaScript
6+
baz2 input.html /^ function baz2 () {$/;" function language:JavaScript
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<!-- Taken from #3581 submitted by @polyscone -->
2+
<h1>Foo</h1>
3+
4+
<script>
5+
const bar = 123
6+
7+
// I don't know why, but an apostrophe breaks
8+
// the JavaScript guest language
9+
function baz () {
10+
return 'abc'
11+
}
12+
</script>
13+
14+
<script>
15+
const bar2 = 123
16+
17+
// I don"t know why, but an apostrophe breaks
18+
// the JavaScript guest language
19+
function baz2 () {
20+
return 'abc'
21+
}
22+
</script>
23+
24+
<h1>BAR</h1>

parsers/html.c

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,58 @@ static void readTokenText (tokenInfo *const token, bool collectText)
236236
}
237237
}
238238

239+
static void readTokenInScript (tokenInfo *const token)
240+
{
241+
int c;
242+
243+
vStringClear (token->string);
244+
245+
c = getcFromInputFile ();
246+
while (isspace (c))
247+
c = getcFromInputFile ();
248+
249+
switch (c)
250+
{
251+
case EOF:
252+
token->type = TOKEN_EOF;
253+
break;
254+
255+
case '<':
256+
{
257+
int d = getcFromInputFile ();
258+
if (d == '/')
259+
token->type = TOKEN_CLOSE_TAG_START;
260+
else
261+
{
262+
ungetcToInputFile (d);
263+
token->type = TOKEN_OTHER;
264+
}
265+
break;
266+
}
267+
default:
268+
{
269+
while (!isspace (c) && c != '<' && c != '>' && c != '/' &&
270+
c != '=' && c != '\'' && c != '"' && c != EOF)
271+
{
272+
vStringPut (token->string, tolower (c));
273+
c = getcFromInputFile ();
274+
}
275+
276+
if (vStringLength (token->string) == 0)
277+
token->type = TOKEN_OTHER;
278+
else
279+
{
280+
token->type = TOKEN_NAME;
281+
if (c != EOF)
282+
ungetcToInputFile (c);
283+
}
284+
break;
285+
}
286+
}
287+
288+
TRACE_PRINT("token (in script): %s (%s)", tokenTypes[token->type], vStringValue (token->string));
289+
}
290+
239291
static void readToken (tokenInfo *const token, bool skipComments)
240292
{
241293
int c;
@@ -414,7 +466,7 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset)
414466
line_tmp[0] = getInputLineNumber ();
415467
lineOffset_tmp[0] = getInputLineOffset ();
416468

417-
readToken (token, false);
469+
readTokenInScript (token);
418470
type = token->type;
419471

420472
if (type == TOKEN_CLOSE_TAG_START)

0 commit comments

Comments
 (0)