Skip to content
This repository was archived by the owner on Jul 12, 2022. It is now read-only.

Commit da37746

Browse files
committed
Merge pull request #8 from ellismg/normalize-unicode-literals
Add a rule to enforce ASCII only literals
2 parents 70793e5 + bc872e3 commit da37746

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

src/Microsoft.DotNet.CodeFormatting.Tests/Microsoft.DotNet.CodeFormatting.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
<Compile Include="Rules\HasNoNewLineAfterOpenBraceFormattingRuleTests.cs" />
113113
<Compile Include="Rules\HasNoNewLineBeforeEndBraceFormattingRuleTests.cs" />
114114
<Compile Include="Rules\HasUnderScoreInPrivateFieldNamesFormattingRuleTests.cs" />
115+
<Compile Include="Rules\NonAsciiCharactersAreEscapedInLiteralsRuleTests.cs" />
115116
<Compile Include="Rules\UsesXunitForTestsFormattingRuleTests.cs" />
116117
</ItemGroup>
117118
<ItemGroup>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
4+
using Xunit;
5+
6+
namespace Microsoft.DotNet.CodeFormatting.Tests
7+
{
8+
public class NonAsciiChractersAreEscapedInLiteralsTests : CodeFormattingTestBase
9+
{
10+
[Fact]
11+
public void CanUseNonAsciiCharactersInComments()
12+
{
13+
var text = string.Format(@"
14+
// It's oaky to use non ASCII characters like {0} (CHECK MARK U+2713) or {1} (RAINBOW U+1F308) in comments.
15+
/*
16+
It's oaky to use non ASCII characters like {0} (CHECK MARK U+2713) or {1} (RAINBOW U+1F308) in comments.
17+
*/
18+
", '\u2713', "\U0001F308");
19+
var expected = text;
20+
21+
Verify(text, expected);
22+
}
23+
24+
[Fact]
25+
public void DoNotAllowUnicodeInLiterals()
26+
{
27+
var text = string.Format(@"
28+
using System;
29+
30+
class Test
31+
{{
32+
public static readonly string BadString = ""This has {0} and {1}, which are both bad."";
33+
public static readonly string AnotherBadString = @""This has {0} and {1}, which are both bad."";
34+
public const char BadChar = '{0}';
35+
}}
36+
", '\u2713', "\U0001F308");
37+
38+
var expected = @"
39+
using System;
40+
41+
class Test
42+
{
43+
public static readonly string BadString = ""This has \u2713 and \U0001F308, which are both bad."";
44+
public static readonly string AnotherBadString = @""This has \u2713 and \U0001F308, which are both bad."";
45+
public const char BadChar = '\u2713';
46+
}
47+
";
48+
Verify(text, expected);
49+
}
50+
51+
internal override IFormattingRule GetFormattingRule()
52+
{
53+
return new Rules.NonAsciiChractersAreEscapedInLiterals();
54+
}
55+
}
56+
}

src/Microsoft.DotNet.CodeFormatting/Microsoft.DotNet.CodeFormatting.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
<Compile Include="Rules\HasUsingsOutsideOfNamespaceFormattingRule.cs" />
100100
<Compile Include="Rules\IsFormattedFormattingRule.cs" />
101101
<Compile Include="Rules\IsSimplifiedFormattingRule.cs" />
102+
<Compile Include="Rules\NonAsciiCharactersAreEscapedInLiteralsRule.cs" />
102103
<Compile Include="Rules\RuleExtensions.cs" />
103104
<Compile Include="Rules\UsesXunitForTestsFormattingRule.cs" />
104105
<Compile Include="RuleTypeConstants.cs" />
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under MIT. See LICENSE in the project root for license information.
3+
using System;
4+
using System.ComponentModel.Composition;
5+
using System.Diagnostics;
6+
using System.Text;
7+
using System.Threading;
8+
using System.Threading.Tasks;
9+
10+
using Microsoft.CodeAnalysis;
11+
using Microsoft.CodeAnalysis.CSharp;
12+
using Microsoft.CodeAnalysis.CSharp.Syntax;
13+
14+
namespace Microsoft.DotNet.CodeFormatting.Rules
15+
{
16+
internal sealed class NonAsciiChractersAreEscapedInLiterals : IFormattingRule
17+
{
18+
public async Task<Document> ProcessAsync(Document document, CancellationToken cancellationToken)
19+
{
20+
var root = await document.GetSyntaxRootAsync(cancellationToken) as CompilationUnitSyntax;
21+
22+
if (root == null)
23+
return document;
24+
25+
var newRoot = UnicodeCharacterEscapingSyntaxRewriter.Rewriter.Visit(root);
26+
27+
return document.WithSyntaxRoot(newRoot);
28+
}
29+
30+
/// <summary>
31+
/// Rewrites string and character literals which contain non ascii characters to instead use the \uXXXX or \UXXXXXXXX syntax.
32+
/// </summary>
33+
internal class UnicodeCharacterEscapingSyntaxRewriter : CSharpSyntaxRewriter
34+
{
35+
public static readonly UnicodeCharacterEscapingSyntaxRewriter Rewriter = new UnicodeCharacterEscapingSyntaxRewriter();
36+
37+
private UnicodeCharacterEscapingSyntaxRewriter()
38+
{
39+
}
40+
41+
public override SyntaxNode VisitLiteralExpression(LiteralExpressionSyntax node)
42+
{
43+
switch (node.CSharpKind())
44+
{
45+
case SyntaxKind.StringLiteralExpression:
46+
return RewriteStringLiteralExpression(node);
47+
case SyntaxKind.CharacterLiteralExpression:
48+
return RewriteCharacterLiteralExpression(node);
49+
}
50+
51+
return base.Visit(node);
52+
}
53+
54+
private static SyntaxNode RewriteStringLiteralExpression(LiteralExpressionSyntax node)
55+
{
56+
Debug.Assert(node.CSharpKind() == SyntaxKind.StringLiteralExpression);
57+
58+
if (HasNonAsciiCharacters(node.Token.Text))
59+
{
60+
string convertedText = EscapeNonAsciiCharacters(node.Token.Text);
61+
62+
SyntaxToken t = SyntaxFactory.Literal(node.Token.LeadingTrivia, convertedText, node.Token.ValueText, node.Token.TrailingTrivia);
63+
64+
node = node.WithToken(t);
65+
}
66+
67+
return node;
68+
}
69+
70+
private static SyntaxNode RewriteCharacterLiteralExpression(LiteralExpressionSyntax node)
71+
{
72+
Debug.Assert(node.CSharpKind() == SyntaxKind.CharacterLiteralExpression);
73+
74+
if (HasNonAsciiCharacters(node.Token.Text))
75+
{
76+
string convertedText = EscapeNonAsciiCharacters(node.Token.Text);
77+
78+
SyntaxToken t = SyntaxFactory.Literal(node.Token.LeadingTrivia, convertedText, node.Token.ValueText, node.Token.TrailingTrivia);
79+
80+
node = node.WithToken(t);
81+
}
82+
83+
return node;
84+
}
85+
86+
87+
private static bool HasNonAsciiCharacters(string value)
88+
{
89+
for (int i = 0; i < value.Length; i++)
90+
{
91+
if (value[i] >= 0x80)
92+
{
93+
return true;
94+
}
95+
}
96+
97+
return false;
98+
}
99+
100+
private static string EscapeNonAsciiCharacters(string oldValue)
101+
{
102+
StringBuilder sb = new StringBuilder(oldValue.Length);
103+
104+
for (int i = 0; i < oldValue.Length; i++)
105+
{
106+
if (oldValue[i] < 0x80)
107+
{
108+
sb.Append(oldValue[i]);
109+
}
110+
else if (char.IsHighSurrogate(oldValue[i]) && i + 1 < oldValue.Length && char.IsLowSurrogate(oldValue[i + 1]))
111+
{
112+
sb.Append(string.Format(@"\U{0:X8}", char.ConvertToUtf32(oldValue[i], oldValue[i + 1])));
113+
i++; // move past the low surogate we consumed above.
114+
}
115+
else
116+
{
117+
sb.Append(string.Format(@"\u{0:X4}", (ushort)oldValue[i]));
118+
}
119+
}
120+
121+
return sb.ToString();
122+
}
123+
124+
}
125+
}
126+
}

0 commit comments

Comments
 (0)