1
+ // Copyright (c) Microsoft Corporation. All rights reserved.
2
+ // Licensed under MIT. See LICENSE in the project root for license information.
3
+ using System ;
4
+ using System . ComponentModel . Composition ;
5
+ using System . Diagnostics ;
6
+ using System . Text ;
7
+ using System . Threading ;
8
+ using System . Threading . Tasks ;
9
+
10
+ using Microsoft . CodeAnalysis ;
11
+ using Microsoft . CodeAnalysis . CSharp ;
12
+ using Microsoft . CodeAnalysis . CSharp . Syntax ;
13
+
14
+ namespace Microsoft . DotNet . CodeFormatting . Rules
15
+ {
16
+ internal sealed class NonAsciiChractersAreEscapedInLiterals : IFormattingRule
17
+ {
18
+ public async Task < Document > ProcessAsync ( Document document , CancellationToken cancellationToken )
19
+ {
20
+ var root = await document . GetSyntaxRootAsync ( cancellationToken ) as CompilationUnitSyntax ;
21
+
22
+ if ( root == null )
23
+ return document ;
24
+
25
+ var newRoot = UnicodeCharacterEscapingSyntaxRewriter . Rewriter . Visit ( root ) ;
26
+
27
+ return document . WithSyntaxRoot ( newRoot ) ;
28
+ }
29
+
30
+ /// <summary>
31
+ /// Rewrites string and character literals which contain non ascii characters to instead use the \uXXXX or \UXXXXXXXX syntax.
32
+ /// </summary>
33
+ internal class UnicodeCharacterEscapingSyntaxRewriter : CSharpSyntaxRewriter
34
+ {
35
+ public static readonly UnicodeCharacterEscapingSyntaxRewriter Rewriter = new UnicodeCharacterEscapingSyntaxRewriter ( ) ;
36
+
37
+ private UnicodeCharacterEscapingSyntaxRewriter ( )
38
+ {
39
+ }
40
+
41
+ public override SyntaxNode VisitLiteralExpression ( LiteralExpressionSyntax node )
42
+ {
43
+ switch ( node . CSharpKind ( ) )
44
+ {
45
+ case SyntaxKind . StringLiteralExpression :
46
+ return RewriteStringLiteralExpression ( node ) ;
47
+ case SyntaxKind . CharacterLiteralExpression :
48
+ return RewriteCharacterLiteralExpression ( node ) ;
49
+ }
50
+
51
+ return base . Visit ( node ) ;
52
+ }
53
+
54
+ private static SyntaxNode RewriteStringLiteralExpression ( LiteralExpressionSyntax node )
55
+ {
56
+ Debug . Assert ( node . CSharpKind ( ) == SyntaxKind . StringLiteralExpression ) ;
57
+
58
+ if ( HasNonAsciiCharacters ( node . Token . Text ) )
59
+ {
60
+ string convertedText = EscapeNonAsciiCharacters ( node . Token . Text ) ;
61
+
62
+ SyntaxToken t = SyntaxFactory . Literal ( node . Token . LeadingTrivia , convertedText , node . Token . ValueText , node . Token . TrailingTrivia ) ;
63
+
64
+ node = node . WithToken ( t ) ;
65
+ }
66
+
67
+ return node ;
68
+ }
69
+
70
+ private static SyntaxNode RewriteCharacterLiteralExpression ( LiteralExpressionSyntax node )
71
+ {
72
+ Debug . Assert ( node . CSharpKind ( ) == SyntaxKind . CharacterLiteralExpression ) ;
73
+
74
+ if ( HasNonAsciiCharacters ( node . Token . Text ) )
75
+ {
76
+ string convertedText = EscapeNonAsciiCharacters ( node . Token . Text ) ;
77
+
78
+ SyntaxToken t = SyntaxFactory . Literal ( node . Token . LeadingTrivia , convertedText , node . Token . ValueText , node . Token . TrailingTrivia ) ;
79
+
80
+ node = node . WithToken ( t ) ;
81
+ }
82
+
83
+ return node ;
84
+ }
85
+
86
+
87
+ private static bool HasNonAsciiCharacters ( string value )
88
+ {
89
+ for ( int i = 0 ; i < value . Length ; i ++ )
90
+ {
91
+ if ( value [ i ] >= 0x80 )
92
+ {
93
+ return true ;
94
+ }
95
+ }
96
+
97
+ return false ;
98
+ }
99
+
100
+ private static string EscapeNonAsciiCharacters ( string oldValue )
101
+ {
102
+ StringBuilder sb = new StringBuilder ( oldValue . Length ) ;
103
+
104
+ for ( int i = 0 ; i < oldValue . Length ; i ++ )
105
+ {
106
+ if ( oldValue [ i ] < 0x80 )
107
+ {
108
+ sb . Append ( oldValue [ i ] ) ;
109
+ }
110
+ else if ( char . IsHighSurrogate ( oldValue [ i ] ) && i + 1 < oldValue . Length && char . IsLowSurrogate ( oldValue [ i + 1 ] ) )
111
+ {
112
+ sb . Append ( string . Format ( @"\U{0:X8}" , char . ConvertToUtf32 ( oldValue [ i ] , oldValue [ i + 1 ] ) ) ) ;
113
+ i ++ ; // move past the low surogate we consumed above.
114
+ }
115
+ else
116
+ {
117
+ sb . Append ( string . Format ( @"\u{0:X4}" , ( ushort ) oldValue [ i ] ) ) ;
118
+ }
119
+ }
120
+
121
+ return sb . ToString ( ) ;
122
+ }
123
+
124
+ }
125
+ }
126
+ }
0 commit comments