Skip to content

Commit 1c424f9

Browse files
authored
Remove regex from the compiler (#12063)
Fixes #12062 Nerd sniped myself reading this issue this morning. Seemed straight forward enough, ~but I decided to tweak the behaviour slightly because there seemed no point for the extra complexity of trying to keep track of how long it had been since a lowercase letter.~ I was overthinking the algorithm. ~| Method | Mean | Error | StdDev | Gen0 | Allocated | |------- |------------:|----------:|----------:|-------:|----------:| | Regex | 1,133.09 ns | 21.888 ns | 24.328 ns | 0.0191 | 112 B | | Loop | 98.81 ns | 2.070 ns | 2.301 ns | 0.0097 | 56 B |~
2 parents 6c585b6 + 2ae0d79 commit 1c424f9

File tree

2 files changed

+98
-21
lines changed

2 files changed

+98
-21
lines changed

src/Compiler/Microsoft.AspNetCore.Razor.Language/test/HtmlConventionsTest.cs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33

44
#nullable disable
55

6+
using System;
7+
using System.Text.RegularExpressions;
68
using Xunit;
79

810
namespace Microsoft.AspNetCore.Razor.Language;
911

1012
public class HtmlConventionsTest
1113
{
12-
public static TheoryData HtmlConversionData
14+
public static TheoryData<string, string> HtmlConversionData
1315
{
1416
get
1517
{
@@ -22,11 +24,21 @@ public static TheoryData HtmlConversionData
2224
{ "ALLCAPS", "allcaps" },
2325
{ "One1Two2Three3", "one1-two2-three3" },
2426
{ "ONE1TWO2THREE3", "one1two2three3" },
25-
{ "First_Second_ThirdHi", "first_second_third-hi" }
27+
{ "First_Second_ThirdHi", "first_second_third-hi" },
28+
{ "ONE1Two", "one1-two" },
29+
{ "One123Two234Three345", "one123-two234-three345" },
30+
{ "ONE123TWO234THREE345", "one123two234three345" },
31+
{ "1TWO2THREE3", "1two2three3" },
32+
{ "alllowercase", "alllowercase" },
2633
};
2734
}
2835
}
2936

37+
private static readonly Regex OldHtmlCaseRegex = new Regex(
38+
"(?<!^)((?<=[a-zA-Z0-9])[A-Z][a-z])|((?<=[a-z])[A-Z])",
39+
RegexOptions.None,
40+
TimeSpan.FromMilliseconds(500));
41+
3042
[Theory]
3143
[MemberData(nameof(HtmlConversionData))]
3244
public void ToHtmlCase_ReturnsExpectedConversions(string input, string expectedOutput)
@@ -35,6 +47,10 @@ public void ToHtmlCase_ReturnsExpectedConversions(string input, string expectedO
3547
var output = HtmlConventions.ToHtmlCase(input);
3648

3749
// Assert
38-
Assert.Equal(output, expectedOutput);
50+
Assert.Equal(expectedOutput, output);
51+
52+
// Assure backwards compatibility with regex
53+
var regexResult = OldHtmlCaseRegex.Replace(input, "-$1$2").ToLowerInvariant();
54+
Assert.Equal(regexResult, output);
3955
}
4056
}

src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/HtmlConventions.cs

Lines changed: 79 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,17 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4-
#nullable disable
5-
64
using System;
7-
using System.Text.RegularExpressions;
5+
using System.Diagnostics;
6+
using System.Diagnostics.CodeAnalysis;
7+
using Microsoft.AspNetCore.Razor.PooledObjects;
88

99
namespace Microsoft.AspNetCore.Razor.Language;
1010

1111
public static class HtmlConventions
1212
{
13-
private const string HtmlCaseRegexReplacement = "-$1$2";
1413
private static readonly char[] InvalidNonWhitespaceHtmlCharacters =
15-
new[] { '@', '!', '<', '/', '?', '[', '>', ']', '=', '"', '\'', '*' };
16-
17-
// This matches the following AFTER the start of the input string (MATCH).
18-
// Any letter/number followed by an uppercase letter then lowercase letter: 1(Aa), a(Aa), A(Aa)
19-
// Any lowercase letter followed by an uppercase letter: a(A)
20-
// Each match is then prefixed by a "-" via the ToHtmlCase method.
21-
private static readonly Regex HtmlCaseRegex =
22-
new Regex(
23-
"(?<!^)((?<=[a-zA-Z0-9])[A-Z][a-z])|((?<=[a-z])[A-Z])",
24-
RegexOptions.None,
25-
TimeSpan.FromMilliseconds(500));
26-
14+
['@', '!', '<', '/', '?', '[', '>', ']', '=', '"', '\'', '*'];
2715

2816
internal static bool IsInvalidNonWhitespaceHtmlCharacters(char testChar)
2917
{
@@ -50,8 +38,81 @@ internal static bool IsInvalidNonWhitespaceHtmlCharacters(char testChar)
5038
/// ONE1TWO2THREE3 => one1two2three3
5139
/// First_Second_ThirdHi => first_second_third-hi
5240
/// </example>
53-
public static string ToHtmlCase(string name)
41+
public static string ToHtmlCase(string input)
42+
{
43+
if (string.IsNullOrEmpty(input))
44+
{
45+
return input;
46+
}
47+
48+
return TryGetKebabCaseString(input, out var result)
49+
? result
50+
: input;
51+
}
52+
53+
private static bool TryGetKebabCaseString(ReadOnlySpan<char> input, [NotNullWhen(true)] out string? result)
5454
{
55-
return HtmlCaseRegex.Replace(name, HtmlCaseRegexReplacement).ToLowerInvariant();
55+
using var _ = StringBuilderPool.GetPooledObject(out var builder);
56+
57+
var allLower = true;
58+
var i = 0;
59+
foreach (var c in input)
60+
{
61+
if (char.IsUpper(c))
62+
{
63+
allLower = false;
64+
65+
if (ShouldInsertHyphenBeforeUppercase(input, i))
66+
{
67+
builder.Append('-');
68+
}
69+
70+
builder.Append(char.ToLowerInvariant(c));
71+
}
72+
else
73+
{
74+
builder.Append(c);
75+
}
76+
77+
i++;
78+
}
79+
80+
if (allLower)
81+
{
82+
// If the input is all lowercase, we don't need to realize the builder,
83+
// it will just be cleared when the pooled object is disposed.
84+
result = null;
85+
return false;
86+
}
87+
88+
result = builder.ToString();
89+
return true;
90+
}
91+
92+
private static bool ShouldInsertHyphenBeforeUppercase(ReadOnlySpan<char> input, int i)
93+
{
94+
Debug.Assert(char.IsUpper(input[i]));
95+
96+
if (i == 0)
97+
{
98+
// First character is uppercase, no hyphen needed (e.g. This → this)
99+
return false;
100+
}
101+
102+
var prev = input[i - 1];
103+
if (char.IsLower(prev))
104+
{
105+
// Lowercase followed by uppercase (e.g. someThing → some-thing)
106+
return true;
107+
}
108+
109+
if ((char.IsUpper(prev) || char.IsDigit(prev)) &&
110+
(i + 1 < input.Length) && char.IsLower(input[i + 1]))
111+
{
112+
// Uppercase or digit followed by uppercase, followed by lowercase (e.g. CAPSOn → caps-on or ONE1Two → ONE1-Two)
113+
return true;
114+
}
115+
116+
return false;
56117
}
57118
}

0 commit comments

Comments
 (0)