@@ -47,7 +47,16 @@ public Token GetToken()
4747 return ReadNumber ( ) ;
4848
4949 if ( code == '"' )
50- return ReadString ( ) ;
50+ {
51+ if ( _currentIndex + 2 < _source . Length && _source . Span [ _currentIndex + 1 ] == '"' && _source . Span [ _currentIndex + 2 ] == '"' )
52+ {
53+ return ReadBlockString ( ) ;
54+ }
55+ else
56+ {
57+ return ReadString ( ) ;
58+ }
59+ }
5160
5261 return Throw_From_GetToken2 ( code ) ;
5362 }
@@ -172,6 +181,203 @@ private Token ReadComment()
172181 ) ;
173182 }
174183
184+ private Token ReadBlockString ( )
185+ {
186+ int start = _currentIndex += 2 ;
187+ char code = NextCode ( ) ;
188+
189+ Span < char > buffer = stackalloc char [ 4096 ] ;
190+ StringBuilder ? sb = null ;
191+
192+ int index = 0 ;
193+ bool escape = false ; //when the last character was \
194+ bool lastWasCr = false ;
195+
196+ while ( _currentIndex < _source . Length )
197+ {
198+ if ( code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D )
199+ {
200+ Throw_From_ReadBlockString1 ( code ) ;
201+ }
202+
203+ //check for """
204+ if ( code == '"' && _currentIndex + 2 < _source . Length && _source . Span [ _currentIndex + 1 ] == '"' && _source . Span [ _currentIndex + 2 ] == '"' )
205+ {
206+ //if last character was \ then go ahead and write out the """, skipping the \
207+ if ( escape )
208+ {
209+ escape = false ;
210+ }
211+ else
212+ {
213+ //end of blockstring
214+ break ;
215+ }
216+ }
217+ else if ( escape )
218+ {
219+ //last character was \ so write the \ and then retry this character with escaped = false
220+ code = '\\ ' ;
221+ _currentIndex -- ;
222+ escape = false ;
223+ }
224+ else if ( code == '\\ ' )
225+ {
226+ //this character is a \ so don't write anything yet, but check the next character
227+ escape = true ;
228+ code = NextCode ( ) ;
229+ lastWasCr = false ;
230+ continue ;
231+ }
232+ else
233+ {
234+ escape = false ;
235+ }
236+
237+
238+ if ( ! ( lastWasCr && code == '\n ' ) )
239+ {
240+ //write code
241+ if ( index < buffer . Length )
242+ {
243+ buffer [ index ++ ] = code == '\r ' ? '\n ' : code ;
244+ }
245+ else // fallback to StringBuilder in case of buffer overflow
246+ {
247+ if ( sb == null )
248+ sb = new StringBuilder ( buffer . Length * 2 ) ;
249+
250+ for ( int i = 0 ; i < buffer . Length ; ++ i )
251+ sb . Append ( buffer [ i ] ) ;
252+
253+ sb . Append ( code == '\r ' ? '\n ' : code ) ;
254+ index = 0 ;
255+ }
256+ }
257+
258+ lastWasCr = code == '\r ' ;
259+
260+ code = NextCode ( ) ;
261+ }
262+
263+ if ( _currentIndex >= _source . Length )
264+ {
265+ Throw_From_ReadString2 ( ) ;
266+ }
267+ _currentIndex += 2 ;
268+
269+ if ( sb != null )
270+ {
271+ for ( int i = 0 ; i < index ; ++ i )
272+ sb . Append ( buffer [ i ] ) ;
273+ }
274+
275+ //at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string
276+ //also, all line termination combinations have been replaced with LF
277+
278+ ROM value ;
279+ if ( sb != null )
280+ {
281+ var chars = new char [ sb . Length ] ;
282+ sb . CopyTo ( 0 , chars , 0 , sb . Length ) ;
283+ value = ProcessBuffer ( chars ) ;
284+ }
285+ else
286+ {
287+ value = ProcessBuffer ( buffer . Slice ( 0 , index ) ) ;
288+ }
289+
290+ return new Token
291+ (
292+ TokenKind . STRING ,
293+ value ,
294+ start ,
295+ _currentIndex + 1
296+ ) ;
297+
298+ static ROM ProcessBuffer ( Span < char > buffer )
299+ {
300+ //scan string to determine maximum valid commonIndent value,
301+ //number of initial blank lines, and number of trailing blank lines
302+ int commonIndent = int . MaxValue ;
303+ int initialBlankLines = 1 ;
304+ int skipLinesAfter ; //skip all text after line ###, as determined by the number of trailing blank lines
305+ {
306+ int trailingBlankLines = 0 ;
307+ int line = 0 ;
308+ int whitespace = 0 ;
309+ bool allWhitespace = true ;
310+ bool reachedCharacter = false ;
311+ for ( int index = 0 ; index < buffer . Length ; index ++ )
312+ {
313+ char code = buffer [ index ] ;
314+ if ( code == '\n ' )
315+ {
316+ if ( allWhitespace )
317+ trailingBlankLines += 1 ;
318+ if ( line != 0 && ! allWhitespace && whitespace < commonIndent )
319+ commonIndent = whitespace ;
320+ line ++ ;
321+ whitespace = 0 ;
322+ allWhitespace = true ;
323+ if ( ! reachedCharacter )
324+ initialBlankLines ++ ;
325+ }
326+ else if ( code == ' ' || code == '\t ' )
327+ {
328+ if ( allWhitespace )
329+ whitespace ++ ;
330+ }
331+ else
332+ {
333+ allWhitespace = false ;
334+ if ( ! reachedCharacter )
335+ initialBlankLines -- ;
336+ reachedCharacter = true ;
337+ trailingBlankLines = 0 ;
338+ }
339+ }
340+ if ( allWhitespace )
341+ trailingBlankLines += 1 ;
342+ if ( line != 0 && ! allWhitespace && whitespace < commonIndent )
343+ commonIndent = whitespace ;
344+ if ( commonIndent == int . MaxValue )
345+ commonIndent = 0 ;
346+ int lines = line + 1 ;
347+ skipLinesAfter = lines - trailingBlankLines ;
348+ }
349+
350+ //step through the input, skipping the initial blank lines and the trailing blank lines,
351+ //and skipping the initial blank characters from the start of each line
352+ Span < char > output = buffer . Length <= 4096 ? stackalloc char [ buffer . Length ] : new char [ buffer . Length ] ;
353+ int outputIndex = 0 ;
354+ {
355+ int line = 0 ;
356+ int col = 0 ;
357+ for ( int index = 0 ; index < buffer . Length ; index ++ )
358+ {
359+ char code = buffer [ index ] ;
360+ if ( code == '\n ' )
361+ {
362+ if ( ++ line >= skipLinesAfter )
363+ break ;
364+ col = 0 ;
365+ if ( line > initialBlankLines )
366+ output [ outputIndex ++ ] = code ;
367+ }
368+ else
369+ {
370+ if ( line >= initialBlankLines && ( line == 0 || col ++ >= commonIndent ) )
371+ output [ outputIndex ++ ] = code ;
372+ }
373+ }
374+ }
375+
376+ //return the string value from the output buffer
377+ return output . Slice ( 0 , outputIndex ) . ToString ( ) ;
378+ }
379+ }
380+
175381 private Token ReadString ( )
176382 {
177383 int start = _currentIndex ;
@@ -245,6 +451,11 @@ private void Throw_From_ReadString2()
245451 throw new GraphQLSyntaxErrorException ( "Unterminated string." , _source , _currentIndex ) ;
246452 }
247453
454+ private void Throw_From_ReadBlockString1 ( char code )
455+ {
456+ throw new GraphQLSyntaxErrorException ( $ "Invalid character within BlockString: \\ u{ ( int ) code : D4} .", _source , _currentIndex ) ;
457+ }
458+
248459 // sets escaped only to true
249460 private char ReadCharacterFromString ( char currentCharacter , ref bool escaped )
250461 {
0 commit comments