Skip to content
This repository was archived by the owner on Jul 12, 2022. It is now read-only.

Commit 34abb33

Browse files
committed
Add a rule to enforce ASCII only literals
We have decided that in literals, we only want to allow ASCII characters. This rule enforces that guideline, replacing uses of non ASCII characters with \uXXXX or \UXXXXXXXX escape sequences.
1 parent b2e566e commit 34abb33

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

src/Microsoft.DotNet.CodeFormatting.Tests/Microsoft.DotNet.CodeFormatting.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
<Compile Include="Rules\HasNoNewLineBeforeEndBraceFormattingRuleTests.cs" />
113113
<Compile Include="Rules\HasPrivateAccessorOnFieldNamesFormattingRuleTests.cs" />
114114
<Compile Include="Rules\HasUnderScoreInPrivateFieldNamesFormattingRuleTests.cs" />
115+
<Compile Include="Rules\NonAsciiCharactersAreEscapedInLiteralsRuleTests.cs" />
115116
<Compile Include="Rules\UsesXunitForTestsFormattingRuleTests.cs" />
116117
</ItemGroup>
117118
<ItemGroup>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
4+
using Xunit;
5+
6+
namespace Microsoft.DotNet.CodeFormatting.Tests
7+
{
8+
public class NonAsciiChractersAreEscapedInLiteralsTests : CodeFormattingTestBase
9+
{
10+
[Fact]
11+
public void CanUseNonAsciiCharactersInComments()
12+
{
13+
var text = string.Format(@"
14+
// It's oaky to use non ASCII characters like {0} (CHECK MARK U+2713) or {1} (RAINBOW U+1F308) in comments.
15+
/*
16+
It's oaky to use non ASCII characters like {0} (CHECK MARK U+2713) or {1} (RAINBOW U+1F308) in comments.
17+
*/
18+
", '\u2713', "\U0001F308");
19+
var expected = text;
20+
21+
Verify(text, expected);
22+
}
23+
24+
[Fact]
25+
public void DoNotAllowUnicodeInLiterals()
26+
{
27+
var text = string.Format(@"
28+
using System;
29+
30+
class Test
31+
{{
32+
public static readonly string BadString = ""This has {0} and {1}, which are both bad."";
33+
public static readonly string AnotherBadString = @""This has {0} and {1}, which are both bad."";
34+
public const char BadChar = '{0}';
35+
}}
36+
", '\u2713', "\U0001F308");
37+
38+
var expected = @"
39+
using System;
40+
41+
class Test
42+
{
43+
public static readonly string BadString = ""This has \u2713 and \U0001F308, which are both bad."";
44+
public static readonly string AnotherBadString = @""This has \u2713 and \U0001F308, which are both bad."";
45+
public const char BadChar = '\u2713';
46+
}
47+
";
48+
Verify(text, expected);
49+
}
50+
51+
internal override IFormattingRule GetFormattingRule()
52+
{
53+
return new Rules.NonAsciiChractersAreEscapedInLiterals();
54+
}
55+
}
56+
}

src/Microsoft.DotNet.CodeFormatting/Microsoft.DotNet.CodeFormatting.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
<Compile Include="Rules\HasUsingsOutsideOfNamespaceFormattingRule.cs" />
100100
<Compile Include="Rules\IsFormattedFormattingRule.cs" />
101101
<Compile Include="Rules\IsSimplifiedFormattingRule.cs" />
102+
<Compile Include="Rules\NonAsciiCharactersAreEscapedInLiteralsRule.cs" />
102103
<Compile Include="Rules\RuleExtensions.cs" />
103104
<Compile Include="Rules\UsesXunitForTestsFormattingRule.cs" />
104105
<Compile Include="RuleTypeConstants.cs" />
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under MIT. See LICENSE in the project root for license information.
3+
using System;
4+
using System.ComponentModel.Composition;
5+
using System.Diagnostics;
6+
using System.Text;
7+
using System.Threading;
8+
using System.Threading.Tasks;
9+
10+
using Microsoft.CodeAnalysis;
11+
using Microsoft.CodeAnalysis.CSharp;
12+
using Microsoft.CodeAnalysis.CSharp.Syntax;
13+
14+
namespace Microsoft.DotNet.CodeFormatting.Rules
15+
{
16+
internal sealed class NonAsciiChractersAreEscapedInLiterals : IFormattingRule
17+
{
18+
public async Task<Document> ProcessAsync(Document document, CancellationToken cancellationToken)
19+
{
20+
var root = await document.GetSyntaxRootAsync(cancellationToken) as CompilationUnitSyntax;
21+
22+
if (root == null)
23+
return document;
24+
25+
var newRoot = UnicodeCharacterEscapingSyntaxRewriter.Rewriter.Visit(root);
26+
27+
return document.WithSyntaxRoot(newRoot);
28+
}
29+
30+
/// <summary>
31+
/// Rewrites string and character literals which contain non ascii characters to instead use the \uXXXX or \UXXXXXXXX syntax.
32+
/// </summary>
33+
class UnicodeCharacterEscapingSyntaxRewriter : CSharpSyntaxRewriter
34+
{
35+
public static readonly UnicodeCharacterEscapingSyntaxRewriter Rewriter = new UnicodeCharacterEscapingSyntaxRewriter();
36+
37+
private UnicodeCharacterEscapingSyntaxRewriter()
38+
{
39+
}
40+
41+
public override SyntaxNode VisitLiteralExpression(LiteralExpressionSyntax node)
42+
{
43+
switch (node.CSharpKind())
44+
{
45+
case SyntaxKind.StringLiteralExpression:
46+
return RewriteStringLiteralExpression(node);
47+
case SyntaxKind.CharacterLiteralExpression:
48+
return RewriteCharacterLiteralExpression(node);
49+
}
50+
51+
return base.Visit(node);
52+
}
53+
54+
private static SyntaxNode RewriteStringLiteralExpression(LiteralExpressionSyntax node)
55+
{
56+
Debug.Assert(node.CSharpKind() == SyntaxKind.StringLiteralExpression);
57+
58+
if (HasNonAsciiCharacters(node.Token.Text))
59+
{
60+
string convertedText = EscapeNonAsciiCharacters(node.Token.Text);
61+
62+
SyntaxToken t = SyntaxFactory.Literal(node.Token.LeadingTrivia, convertedText, node.Token.ValueText, node.Token.TrailingTrivia);
63+
64+
node = node.WithToken(t);
65+
}
66+
67+
return node;
68+
}
69+
70+
private static SyntaxNode RewriteCharacterLiteralExpression(LiteralExpressionSyntax node)
71+
{
72+
Debug.Assert(node.CSharpKind() == SyntaxKind.CharacterLiteralExpression);
73+
74+
if (HasNonAsciiCharacters(node.Token.Text))
75+
{
76+
string convertedText = EscapeNonAsciiCharacters(node.Token.Text);
77+
78+
SyntaxToken t = SyntaxFactory.Literal(node.Token.LeadingTrivia, convertedText, node.Token.ValueText, node.Token.TrailingTrivia);
79+
80+
node = node.WithToken(t);
81+
}
82+
83+
return node;
84+
}
85+
86+
87+
private static bool HasNonAsciiCharacters(string value)
88+
{
89+
for (int i = 0; i < value.Length; i++)
90+
{
91+
if (value[i] >= 0x80)
92+
{
93+
return true;
94+
}
95+
}
96+
97+
return false;
98+
}
99+
100+
private static string EscapeNonAsciiCharacters(string oldValue)
101+
{
102+
StringBuilder sb = new StringBuilder(oldValue.Length);
103+
104+
for (int i = 0; i < oldValue.Length; i++)
105+
{
106+
if (oldValue[i] < 0x80)
107+
{
108+
sb.Append(oldValue[i]);
109+
}
110+
else if (char.IsHighSurrogate(oldValue[i]) && i + 1 < oldValue.Length && char.IsLowSurrogate(oldValue[i + 1]))
111+
{
112+
sb.Append(string.Format(@"\U{0:X8}", char.ConvertToUtf32(oldValue[i], oldValue[i + 1])));
113+
i++; // move past the low surogate we consumed above.
114+
}
115+
else
116+
{
117+
sb.Append(string.Format(@"\u{0:X4}", (ushort)oldValue[i]));
118+
}
119+
}
120+
121+
return sb.ToString();
122+
}
123+
124+
}
125+
}
126+
}

0 commit comments

Comments
 (0)