11using System . Runtime . CompilerServices ;
2+ #if NET8_0_OR_GREATER
3+ using System . Numerics ;
4+ using System . Runtime . InteropServices ;
5+ using System . Runtime . Intrinsics ;
6+ #endif
27using static HotChocolate . Language . Properties . LangUtf8Resources ;
38
49namespace HotChocolate . Language ;
@@ -10,100 +15,244 @@ public static void Unescape(
1015 ref Span < byte > unescapedString ,
1116 bool isBlockString )
1217 {
13- var readPosition = - 1 ;
14- var writePosition = 0 ;
15- var eofPosition = escapedString . Length - 1 ;
16- int ? highSurrogate = null ;
18+ if ( escapedString . Length == 0 )
19+ {
20+ if ( unescapedString . Length > 0 )
21+ {
22+ unescapedString = unescapedString . Slice ( 0 , 0 ) ;
23+ }
24+ return ;
25+ }
1726
18- if ( escapedString . Length > 0 )
27+ // Fast path: no escapes just copy.
28+ var firstBackslash = escapedString . IndexOf ( GraphQLConstants . Backslash ) ;
29+ if ( firstBackslash == - 1 )
1930 {
20- do
31+ escapedString . CopyTo ( unescapedString ) ;
32+ unescapedString = unescapedString . Slice ( 0 , escapedString . Length ) ;
33+ return ;
34+ }
35+
36+ // Copy everything before first backslash
37+ if ( firstBackslash > 0 )
38+ {
39+ escapedString . Slice ( 0 , firstBackslash ) . CopyTo ( unescapedString ) ;
40+ }
41+
42+ var readPos = firstBackslash ;
43+ var writePos = firstBackslash ;
44+
45+ // -1 means no surrogate pending
46+ var highSurrogate = - 1 ;
47+
48+ // Process the first escape we already found
49+ ProcessEscapeSequence (
50+ escapedString , unescapedString ,
51+ ref readPos , ref writePos ,
52+ ref highSurrogate , isBlockString ) ;
53+
54+ #if NET8_0_OR_GREATER
55+ var remaining = escapedString . Length - readPos ;
56+
57+ // Vector256 path (32 bytes at a time) if we have enough bytes remain
58+ if ( Vector256 . IsHardwareAccelerated && remaining >= Vector256 < byte > . Count )
59+ {
60+ ref var srcStart = ref MemoryMarshal . GetReference ( escapedString ) ;
61+ ref var dstStart = ref MemoryMarshal . GetReference ( unescapedString ) ;
62+ var backslashVec = Vector256 . Create ( GraphQLConstants . Backslash ) ;
63+
64+ while ( readPos <= escapedString . Length - Vector256 < byte > . Count )
2165 {
22- var code = escapedString [ ++ readPosition ] ;
66+ var chunk = Vector256 . LoadUnsafe ( ref srcStart , ( nuint ) readPos ) ;
67+ var matches = Vector256 . Equals ( chunk , backslashVec ) ;
68+ var mask = matches . ExtractMostSignificantBits ( ) ;
2369
24- if ( code == GraphQLConstants . Backslash )
70+ if ( mask == 0 )
71+ {
72+ // No escapes in 32 bytes so we simply copy
73+ chunk . StoreUnsafe ( ref dstStart , ( nuint ) writePos ) ;
74+ readPos += Vector256 < byte > . Count ;
75+ writePos += Vector256 < byte > . Count ;
76+ }
77+ else
2578 {
26- code = escapedString [ ++ readPosition ] ;
79+ // Found backslash, copy up to it, then handle escape
80+ var firstEscape = BitOperations . TrailingZeroCount ( mask ) ;
81+ if ( firstEscape > 0 )
82+ {
83+ escapedString . Slice ( readPos , firstEscape )
84+ . CopyTo ( unescapedString . Slice ( writePos ) ) ;
85+ writePos += firstEscape ;
86+ }
87+ readPos += firstEscape ;
2788
28- if ( isBlockString && code == GraphQLConstants . Quote )
89+ ProcessEscapeSequence (
90+ escapedString , unescapedString ,
91+ ref readPos , ref writePos ,
92+ ref highSurrogate , isBlockString ) ;
93+ }
94+ }
95+ }
96+ // Vector128 fallback (16 bytes at a time), if we have enough bytes remaining
97+ else if ( Vector128 . IsHardwareAccelerated && remaining >= Vector128 < byte > . Count )
98+ {
99+ ref var srcStart = ref MemoryMarshal . GetReference ( escapedString ) ;
100+ ref var dstStart = ref MemoryMarshal . GetReference ( unescapedString ) ;
101+ var backslashVec = Vector128 . Create ( GraphQLConstants . Backslash ) ;
102+
103+ while ( readPos <= escapedString . Length - Vector128 < byte > . Count )
104+ {
105+ var chunk = Vector128 . LoadUnsafe ( ref srcStart , ( nuint ) readPos ) ;
106+ var matches = Vector128 . Equals ( chunk , backslashVec ) ;
107+ var mask = matches . ExtractMostSignificantBits ( ) ;
108+
109+ if ( mask == 0 )
110+ {
111+ // No escapes in 16 bytes so we simply copy
112+ chunk . StoreUnsafe ( ref dstStart , ( nuint ) writePos ) ;
113+ readPos += Vector128 < byte > . Count ;
114+ writePos += Vector128 < byte > . Count ;
115+ }
116+ else
117+ {
118+ // Found backslash, copy up to it, then handle escape
119+ var firstEscape = BitOperations . TrailingZeroCount ( mask ) ;
120+ if ( firstEscape > 0 )
29121 {
30- if ( escapedString [ readPosition + 1 ] == GraphQLConstants . Quote
31- && escapedString [ readPosition + 2 ] == GraphQLConstants . Quote )
32- {
33- readPosition += 2 ;
34- unescapedString [ writePosition ++ ] = GraphQLConstants . Quote ;
35- unescapedString [ writePosition ++ ] = GraphQLConstants . Quote ;
36- unescapedString [ writePosition ++ ] = GraphQLConstants . Quote ;
37- }
38- else
39- {
40- throw new Utf8EncodingException ( Utf8Helper_InvalidQuoteEscapeCount ) ;
41- }
122+ escapedString . Slice ( readPos , firstEscape )
123+ . CopyTo ( unescapedString . Slice ( writePos ) ) ;
124+ writePos += firstEscape ;
42125 }
43- else if ( code . IsValidEscapeCharacter ( ) )
126+ readPos += firstEscape ;
127+
128+ ProcessEscapeSequence (
129+ escapedString , unescapedString ,
130+ ref readPos , ref writePos ,
131+ ref highSurrogate , isBlockString ) ;
132+ }
133+ }
134+ }
135+ #endif
136+
137+ // Scalar tail for remaining bytes
138+ while ( readPos < escapedString . Length )
139+ {
140+ var code = escapedString [ readPos ] ;
141+
142+ if ( code == GraphQLConstants . Backslash )
143+ {
144+ ProcessEscapeSequence (
145+ escapedString , unescapedString ,
146+ ref readPos , ref writePos ,
147+ ref highSurrogate , isBlockString ) ;
148+ }
149+ else
150+ {
151+ unescapedString [ writePos ++ ] = code ;
152+ readPos ++ ;
153+ }
154+ }
155+
156+ if ( unescapedString . Length > writePos )
157+ {
158+ unescapedString = unescapedString . Slice ( 0 , writePos ) ;
159+ }
160+ }
161+
162+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
163+ private static void ProcessEscapeSequence (
164+ in ReadOnlySpan < byte > escaped ,
165+ Span < byte > unescaped ,
166+ ref int readPos ,
167+ ref int writePos ,
168+ ref int highSurrogate ,
169+ bool isBlockString )
170+ {
171+ if ( readPos + 1 >= escaped . Length )
172+ {
173+ throw new Utf8EncodingException (
174+ string . Format ( Utf8Helper_InvalidEscapeChar , '\\ ' ) ) ;
175+ }
176+
177+ // skip backslash
178+ readPos ++ ;
179+ var code = escaped [ readPos ++ ] ;
180+
181+ if ( isBlockString && code == GraphQLConstants . Quote )
182+ {
183+ if ( readPos + 1 < escaped . Length
184+ && escaped [ readPos ] == GraphQLConstants . Quote
185+ && escaped [ readPos + 1 ] == GraphQLConstants . Quote )
186+ {
187+ readPos += 2 ;
188+ unescaped [ writePos ++ ] = GraphQLConstants . Quote ;
189+ unescaped [ writePos ++ ] = GraphQLConstants . Quote ;
190+ unescaped [ writePos ++ ] = GraphQLConstants . Quote ;
191+ }
192+ else
193+ {
194+ throw new Utf8EncodingException ( Utf8Helper_InvalidQuoteEscapeCount ) ;
195+ }
196+ }
197+ else if ( code . IsValidEscapeCharacter ( ) )
198+ {
199+ if ( code == GraphQLConstants . U )
200+ {
201+ if ( readPos + 3 >= escaped . Length )
202+ {
203+ throw new Utf8EncodingException (
204+ string . Format ( Utf8Helper_InvalidEscapeChar , 'u' ) ) ;
205+ }
206+
207+ var unicodeDecimal = UnescapeUtf8Hex (
208+ escaped [ readPos ] ,
209+ escaped [ readPos + 1 ] ,
210+ escaped [ readPos + 2 ] ,
211+ escaped [ readPos + 3 ] ) ;
212+ readPos += 4 ;
213+
214+ if ( unicodeDecimal >= 0xD800 && unicodeDecimal <= 0xDBFF )
215+ {
216+ // High surrogate
217+ if ( highSurrogate >= 0 )
44218 {
45- if ( code == GraphQLConstants . U )
46- {
47- var unicodeDecimal = UnescapeUtf8Hex (
48- escapedString [ ++ readPosition ] ,
49- escapedString [ ++ readPosition ] ,
50- escapedString [ ++ readPosition ] ,
51- escapedString [ ++ readPosition ] ) ;
52-
53- if ( unicodeDecimal >= 0xD800 && unicodeDecimal <= 0xDBFF )
54- {
55- // High surrogate
56- if ( highSurrogate != null )
57- {
58- throw new Utf8EncodingException ( "Unexpected high surrogate." ) ;
59- }
60- highSurrogate = unicodeDecimal ;
61- }
62- else if ( unicodeDecimal >= 0xDC00 && unicodeDecimal <= 0xDFFF )
63- {
64- // Low surrogate
65- if ( highSurrogate == null )
66- {
67- throw new Utf8EncodingException ( "Unexpected low surrogate." ) ;
68- }
69- var fullUnicode = ( ( highSurrogate . Value - 0xD800 ) << 10 )
70- + ( unicodeDecimal - 0xDC00 )
71- + 0x10000 ;
72- UnescapeUtf8Hex ( fullUnicode , ref writePosition , unescapedString ) ;
73- highSurrogate = null ;
74- }
75- else
76- {
77- if ( highSurrogate != null )
78- {
79- throw new Utf8EncodingException ( "High surrogate not followed by low surrogate." ) ;
80- }
81- UnescapeUtf8Hex ( unicodeDecimal , ref writePosition , unescapedString ) ;
82- }
83- }
84- else
85- {
86- unescapedString [ writePosition ++ ] = code . EscapeCharacter ( ) ;
87- }
219+ throw new Utf8EncodingException ( "Unexpected high surrogate." ) ;
88220 }
89- else
221+ highSurrogate = unicodeDecimal ;
222+ }
223+ else if ( unicodeDecimal >= 0xDC00 && unicodeDecimal <= 0xDFFF )
224+ {
225+ // Low surrogate
226+ if ( highSurrogate < 0 )
90227 {
91- throw new Utf8EncodingException (
92- string . Format (
93- Utf8Helper_InvalidEscapeChar ,
94- ( char ) code ) ) ;
228+ throw new Utf8EncodingException ( "Unexpected low surrogate." ) ;
95229 }
230+ var fullUnicode = ( ( highSurrogate - 0xD800 ) << 10 )
231+ + ( unicodeDecimal - 0xDC00 )
232+ + 0x10000 ;
233+ UnescapeUtf8Hex ( fullUnicode , ref writePos , unescaped ) ;
234+ highSurrogate = - 1 ;
96235 }
97236 else
98237 {
99- unescapedString [ writePosition ++ ] = code ;
238+ if ( highSurrogate >= 0 )
239+ {
240+ throw new Utf8EncodingException ( "High surrogate not followed by low surrogate." ) ;
241+ }
242+ UnescapeUtf8Hex ( unicodeDecimal , ref writePos , unescaped ) ;
100243 }
101- } while ( readPosition < eofPosition ) ;
244+ }
245+ else
246+ {
247+ unescaped [ writePos ++ ] = code . EscapeCharacter ( ) ;
248+ }
102249 }
103-
104- if ( unescapedString . Length - writePosition > 0 )
250+ else
105251 {
106- unescapedString = unescapedString . Slice ( 0 , writePosition ) ;
252+ throw new Utf8EncodingException (
253+ string . Format (
254+ Utf8Helper_InvalidEscapeChar ,
255+ ( char ) code ) ) ;
107256 }
108257 }
109258
0 commit comments