Skip to content

Commit 0d5fb8c

Browse files
Improves non-HTML token writing
Refactors the non-HTML token writing process to avoid string allocations by writing whitespace directly from the original token content.
1 parent 8a4beb0 commit 0d5fb8c

File tree

1 file changed

+74
-34
lines changed

1 file changed

+74
-34
lines changed

src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorHtmlWriter.cs

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ internal sealed class RazorHtmlWriter : SyntaxWalker
3434
// can be written as a block, allowing any block of 4 characters or more to be written as a comment (ie '/**/`)
3535
// which takes pressure off the TypeScript/JavaScript compiler. Doing this per token means we can end up with
3636
// "@className" being written as '~/*~~~~~*/', which means Html formatting will insert a space which breaks things.
37-
private int _tildesToWrite;
37+
private int _placeholderSize;
3838

3939
private RazorHtmlWriter(RazorSourceDocument source, CodeWriter codeWriter, ImmutableArray<SourceMapping>.Builder sourceMappings)
4040
{
@@ -205,8 +205,14 @@ public override void VisitUnclassifiedTextLiteral(UnclassifiedTextLiteralSyntax
205205

206206
public override void VisitToken(SyntaxToken token)
207207
{
208-
base.VisitToken(token);
209-
WriteToken(token);
208+
if (_isWritingHtml)
209+
{
210+
WriteHtmlToken(token);
211+
}
212+
else
213+
{
214+
WriteNonHtmlToken(token);
215+
}
210216
}
211217

212218
private readonly ref struct WriterStateSaver
@@ -232,35 +238,6 @@ private WriterStateSaver IsHtml()
232238

233239
private WriterStateSaver IsNotHtml()
234240
=> new(this, isWritingHtml: false);
235-
236-
private void WriteToken(SyntaxToken token)
237-
{
238-
if (_isWritingHtml)
239-
{
240-
WriteHtmlToken(token);
241-
return;
242-
}
243-
244-
// If we were tracking a source mapping span before now, add it to the list. Importantly there are cases
245-
// where there are 0-length C# nodes, so this step is very important if the source mappings are to match
246-
// the syntax tree.
247-
AddLastSourceMappingAndClear();
248-
249-
// We're in non-HTML context. Let's replace all non-whitespace chars with a tilde(~).
250-
foreach (var c in token.Content)
251-
{
252-
if (char.IsWhiteSpace(c))
253-
{
254-
WriteCSharpContentPlaceholder();
255-
_codeWriter.Write(c.ToString());
256-
}
257-
else
258-
{
259-
_tildesToWrite++;
260-
}
261-
}
262-
}
263-
264241
private void WriteHtmlToken(SyntaxToken token)
265242
{
266243
var content = token.Content;
@@ -301,6 +278,68 @@ private void WriteHtmlToken(SyntaxToken token)
301278
_lastSpans = (newOriginal, newGenerated);
302279
}
303280

281+
private void WriteNonHtmlToken(SyntaxToken token)
282+
{
283+
// If we're tracking a source mapping span, add it to the list. There are cases where there
284+
// are 0-length C# nodes, so it's important to perform this step before checking the token
285+
// content to ensure the source mappings match the syntax tree.
286+
AddLastSourceMappingAndClear();
287+
288+
var content = token.Content.AsMemory();
289+
if (content.Length == 0)
290+
{
291+
// If the token is empty, we don't need to do anything further.
292+
return;
293+
}
294+
295+
// To avoid allocating new strings, we want to write whitespace sliced from the original
296+
// token content. To achieve this, we track transitions between whitespace and non-whitespace
297+
// characters. When we're tracking whitespace, whitespaceIndex will be set to the index of the
298+
// last transition to whitespace. When we encounter a non-whitespace character, we write the
299+
// C# content placeholder (if any) followed by the whitespace. Then, we reset the whitespaceIndex to -1.
300+
301+
var whitespaceIndex = -1;
302+
303+
for (var i = 0; i < content.Length; i++)
304+
{
305+
var charIsWhitespace = char.IsWhiteSpace(content.Span[i]);
306+
307+
if (charIsWhitespace)
308+
{
309+
// If we're transitioning from non-whitespace to whitespace, set the index.
310+
if (whitespaceIndex < 0)
311+
{
312+
whitespaceIndex = i;
313+
}
314+
315+
continue;
316+
}
317+
318+
// At this point, we have a non-whitespace character. If we were tracking whitespace,
319+
// we need to write the C# content placeholder (if any) and the whitespace.
320+
if (whitespaceIndex >= 0)
321+
{
322+
WriteCSharpContentPlaceholder();
323+
_codeWriter.Write(content[whitespaceIndex..i]);
324+
325+
// We're transitioning from whitespace to non-whitespace, so reset the index.
326+
whitespaceIndex = -1;
327+
}
328+
329+
// If we didn't transition from whitespace to non-whitespace, be sure to
330+
// increment the C# content placeholder size so that we can write it later.
331+
_placeholderSize++;
332+
}
333+
334+
// If we finished processing the content but were still tracking whitespace,
335+
// we need to write the C# content placeholder and the whitespace content.
336+
if (whitespaceIndex >= 0)
337+
{
338+
WriteCSharpContentPlaceholder();
339+
_codeWriter.Write(content[whitespaceIndex..]);
340+
}
341+
}
342+
304343
/// <summary>
305344
/// Returns <see langword="true"/> if the new span starts after the last span.
306345
/// </summary>
@@ -312,15 +351,16 @@ private static SourceSpan ExtendSpan(SourceSpan span, int length, int endCharact
312351

313352
private void WriteCSharpContentPlaceholder()
314353
{
315-
var tildesToWrite = _tildesToWrite;
354+
var tildesToWrite = _placeholderSize;
316355

317356
if (tildesToWrite == 0)
318357
{
319358
// Nothing to write, so just return
320359
return;
321360
}
322361

323-
_tildesToWrite = 0;
362+
// Reset the placeholder size.
363+
_placeholderSize = 0;
324364

325365
var writeComment = false;
326366

0 commit comments

Comments
 (0)