Skip to content

Commit 9b43090

Browse files
authored
Merge pull request #1900 from riganti/html-script-raw-parsing
Special-case parsing of <script> and <style> elements, similarly to HTML
2 parents 3bd2a01 + f1526d7 commit 9b43090

File tree

6 files changed

+319
-14
lines changed

6 files changed

+319
-14
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using DotVVM.Framework.Configuration;
5+
6+
namespace DotVVM.Framework.Compilation.Parser.Dothtml;
7+
8+
public sealed class DotvvmSyntaxConfiguration
9+
{
10+
private readonly HashSet<string> rawTextElements;
11+
public IEnumerable<string> RawTextElements => rawTextElements;
12+
13+
public bool IsRawTextElement(string elementName) =>
14+
rawTextElements.Contains(elementName);
15+
16+
public DotvvmSyntaxConfiguration(IEnumerable<string> rawTextElements)
17+
{
18+
this.rawTextElements = rawTextElements.ToHashSet(StringComparer.OrdinalIgnoreCase);
19+
}
20+
21+
public static DotvvmSyntaxConfiguration FromMarkupConfig(DotvvmMarkupConfiguration markupConfiguration)
22+
{
23+
var rawTextElements = markupConfiguration.RawTextElements;
24+
25+
return new DotvvmSyntaxConfiguration(rawTextElements);
26+
}
27+
28+
public static DotvvmSyntaxConfiguration Default { get; } = new DotvvmSyntaxConfiguration(["script", "style", "dot:InlineScript", "dot:HtmlLiteral"]);
29+
}

src/Framework/Framework/Compilation/Parser/Dothtml/Tokenizer/DothtmlTokenizer.cs

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ namespace DotVVM.Framework.Compilation.Parser.Dothtml.Tokenizer
1313
/// </summary>
1414
public class DothtmlTokenizer : TokenizerBase<DothtmlToken, DothtmlTokenType>
1515
{
16-
public DothtmlTokenizer() : base(DothtmlTokenType.Text, DothtmlTokenType.WhiteSpace)
16+
private readonly DotvvmSyntaxConfiguration config;
17+
18+
public DothtmlTokenizer(DotvvmSyntaxConfiguration? config = null) : base(DothtmlTokenType.Text, DothtmlTokenType.WhiteSpace)
1719
{
20+
this.config = config ?? DotvvmSyntaxConfiguration.Default;
1821
}
1922

2023
private static bool IsAllowedAttributeFirstChar(char ch)
@@ -249,13 +252,15 @@ private ReadElementType ReadElement(bool wasOpenBraceRead = false)
249252
}
250253

251254
// read tag name
252-
if (!ReadTagOrAttributeName(isAttributeName: false))
255+
if (!ReadTagOrAttributeName(isAttributeName: false, out var tagPrefix, out var tagName))
253256
{
254257
CreateToken(DothtmlTokenType.Text, errorProvider: t => CreateTokenError(t, DothtmlTokenType.OpenTag, DothtmlTokenizerErrors.TagNameExpected));
255258
CreateToken(DothtmlTokenType.CloseTag, errorProvider: t => CreateTokenError());
256259
return ReadElementType.Error;
257260
}
258261

262+
var tagFullName = tagPrefix is null ? tagName ?? "" : tagPrefix + ":" + tagName;
263+
259264
// read tag attributes
260265
SkipWhitespace();
261266
if (!isClosingTag)
@@ -291,11 +296,14 @@ private ReadElementType ReadElement(bool wasOpenBraceRead = false)
291296
}
292297
}
293298

299+
bool isSelfClosing = false;
300+
294301
if (Peek() == '/' && !isClosingTag)
295302
{
296303
// self closing tag
297304
Read();
298305
CreateToken(DothtmlTokenType.Slash, "/");
306+
isSelfClosing = true;
299307
}
300308
if (Peek() != '>')
301309
{
@@ -306,20 +314,74 @@ private ReadElementType ReadElement(bool wasOpenBraceRead = false)
306314

307315
Read();
308316
CreateToken(DothtmlTokenType.CloseTag, ">");
317+
318+
if (!isClosingTag && !isSelfClosing && config.IsRawTextElement(tagFullName))
319+
{
320+
// HTML <script>, <style> tags: read content until we find the closing the, i.e. the `</script` sequence
321+
ReadRawTextTag(tagFullName);
322+
return ReadElementType.RawTextTag;
323+
}
324+
309325
return ReadElementType.ValidTag;
310326
}
311327

312328
public enum ReadElementType
313329
{
314330
Error,
315331
ValidTag,
332+
RawTextTag,
316333
CData,
317334
Comment,
318335
Doctype,
319336
XmlProcessingInstruction,
320337
ServerComment
321338
}
322339

340+
public void ReadRawTextTag(string name)
341+
{
342+
// Read everything as raw text until the matching end tag
343+
// used to parsing <script>, <style>, <dot:InlineScript>, <dot:HtmlLiteral>
344+
while (Peek() != NullChar)
345+
{
346+
if (PeekIsString("</") &&
347+
PeekSpan(name.Length + 2).Slice(2).Equals(name.AsSpan(), StringComparison.OrdinalIgnoreCase) &&
348+
!char.IsLetterOrDigit(Peek(name.Length + 2)))
349+
{
350+
CreateToken(DothtmlTokenType.Text);
351+
Debug.Assert(Peek() == '<');
352+
Read();
353+
CreateToken(DothtmlTokenType.OpenTag);
354+
355+
Debug.Assert(Peek() == '/');
356+
Read();
357+
CreateToken(DothtmlTokenType.Slash);
358+
359+
if (!ReadTagOrAttributeName(isAttributeName: false, out _, out _))
360+
{
361+
CreateToken(DothtmlTokenType.Text, errorProvider: t => CreateTokenError(t, DothtmlTokenType.OpenTag, DothtmlTokenizerErrors.TagNameExpected));
362+
}
363+
364+
SkipWhitespace();
365+
366+
if (Read() != '>')
367+
{
368+
CreateToken(DothtmlTokenType.CloseTag, errorProvider: t => CreateTokenError(t, DothtmlTokenType.OpenTag, DothtmlTokenizerErrors.TagNotClosed));
369+
}
370+
else
371+
{
372+
CreateToken(DothtmlTokenType.CloseTag);
373+
}
374+
375+
return;
376+
}
377+
Read();
378+
}
379+
380+
// not terminated
381+
382+
CreateToken(DothtmlTokenType.Text, errorProvider: t => CreateTokenError(t, DothtmlTokenType.OpenTag, DothtmlTokenizerErrors.TagNotClosed));
383+
}
384+
323385
public ReadElementType ReadHtmlSpecial(bool openBraceConsumed = false)
324386
{
325387
var s = ReadOneOf("![CDATA[", "!--", "!DOCTYPE", "?", "%--");
@@ -437,7 +499,7 @@ private void Assert(bool expression)
437499
/// <summary>
438500
/// Reads the name of the tag or attribute.
439501
/// </summary>
440-
private bool ReadTagOrAttributeName(bool isAttributeName)
502+
private bool ReadTagOrAttributeName(bool isAttributeName, out string? prefix, out string? name)
441503
{
442504
var readIdentifierFunc = isAttributeName ? (Func<DothtmlTokenType, char, bool>)ReadAttributeName : (Func<DothtmlTokenType, char, bool>)ReadIdentifier;
443505

@@ -446,6 +508,7 @@ private bool ReadTagOrAttributeName(bool isAttributeName)
446508
// read the identifier
447509
if (!readIdentifierFunc(DothtmlTokenType.Text, ':'))
448510
{
511+
prefix = name = null;
449512
return false;
450513
}
451514
}
@@ -457,14 +520,23 @@ private bool ReadTagOrAttributeName(bool isAttributeName)
457520

458521
if (Peek() == ':')
459522
{
523+
prefix = Tokens[^1].Text;
524+
460525
Read();
461526
CreateToken(DothtmlTokenType.Colon, ":");
462527

463528
if (!readIdentifierFunc(DothtmlTokenType.Text, '\0'))
464529
{
465530
CreateToken(DothtmlTokenType.Text, errorProvider: t => CreateTokenError(t, DothtmlTokenType.OpenTag, DothtmlTokenizerErrors.MissingTagName));
531+
name = null;
466532
return true;
467533
}
534+
name = Tokens[^1].Text;
535+
}
536+
else
537+
{
538+
prefix = null;
539+
name = Tokens[^1].Text;
468540
}
469541

470542
SkipWhitespace();
@@ -477,7 +549,7 @@ private bool ReadTagOrAttributeName(bool isAttributeName)
477549
private bool ReadAttribute()
478550
{
479551
// attribute name
480-
if (!ReadTagOrAttributeName(isAttributeName: true))
552+
if (!ReadTagOrAttributeName(isAttributeName: true, out _, out _))
481553
{
482554
return false;
483555
}

src/Framework/Framework/Compilation/Parser/TokenizerBase.cs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,7 @@ protected bool ReadTextUntil(TTokenType tokenType, string stopString, bool stopO
188188
protected bool PeekIsString(string? str)
189189
{
190190
if (str is null) return false;
191-
if (Peek() != str[0]) return false;
192-
for (int i = 1; i < str.Length; i++)
193-
{
194-
if (Peek(i) != str[i]) return false;
195-
}
196-
return true;
191+
return PeekSpan(str.Length).SequenceEqual(str.AsSpan());
197192
}
198193

199194
protected string? ReadOneOf(params string[] strings)
@@ -212,8 +207,6 @@ protected bool PeekIsString(string? str)
212207

213208
protected abstract TToken NewToken(string text, TTokenType type, int lineNumber, int columnNumber, int length, int startPosition);
214209

215-
char[] tokenCharBuffer = new char[20];
216-
217210
protected string GetCurrentTokenText(int charsFromEndToSkip = 0)
218211
{
219212
var start = LastTokenPosition;
@@ -302,6 +295,9 @@ protected void OnTokenFound(TToken token)
302295
TokenFound?.Invoke(token);
303296
}
304297

298+
protected ReadOnlySpan<char> PeekSpan(int length) =>
299+
sourceText.AsSpan(position, Math.Min(length, sourceText.Length - position));
300+
305301
/// <summary>
306302
/// Peeks the current char.
307303
/// </summary>

src/Framework/Framework/Configuration/DotvvmMarkupConfiguration.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
using DotVVM.Framework.Compilation.ControlTree.Resolved;
1010
using DotVVM.Framework.Compilation.Javascript;
1111
using System.Text.Json.Serialization;
12+
using DotVVM.Framework.Compilation.Parser.Dothtml;
1213

1314
namespace DotVVM.Framework.Configuration
1415
{
@@ -71,6 +72,15 @@ public IList<BindingExtensionParameter> DefaultExtensionParameters
7172

7273
public ViewCompilationConfiguration ViewCompilation { get; private set; } = new ViewCompilationConfiguration();
7374

75+
/// <summary> List of HTML elements which content is not parsed as [dot]html, but streated as raw text until the end tag. By default it is <c>script</c> and <c>style</c> tags in addition to DotVVM <c>dot:InlineScript</c>. The property is meant primarily as compatibility option, as it may be ignored by tooling. </summary>
76+
[JsonPropertyName("rawTextElements")]
77+
public IList<string> RawTextElements
78+
{
79+
get => _rawTextElements;
80+
set { ThrowIfFrozen(); _rawTextElements = [..value]; }
81+
}
82+
private IList<string> _rawTextElements = new FreezableList<string>(DotvvmSyntaxConfiguration.Default.RawTextElements);
83+
7484

7585
public void AddServiceImport(string identifier, Type type)
7686
{
@@ -197,6 +207,7 @@ public void Freeze()
197207
FreezableList.Freeze(ref _importedNamespaces);
198208
JavascriptTranslator.Freeze();
199209
FreezableList.Freeze(ref _defaultExtensionParameters);
210+
FreezableList.Freeze(ref _rawTextElements);
200211
}
201212
}
202213
}

0 commit comments

Comments
 (0)