@@ -15,8 +15,9 @@ namespace SmartFormat.Core.Parsing;
1515/// <para/>
1616/// <para>
1717/// <b>Thread-safety</b>:<br/>
18- /// The <see cref="ParseFormat"/> method is thread-safe.
19- /// Other methods (e.g. changing SmartSettings or other properties) is not thread-safe.
18+ /// The <see cref="ParseFormat"/> method is stateless w.r.t. the instance
19+ /// is safe for concurrent calls provided <see cref="SmartSettings"/> are not concurrently mutated,
20+ /// and <see cref="SmartSettings.IsThreadSafeMode"/> is <see langword="true"/>.
2021/// </para>
2122/// </summary>
2223public class Parser
@@ -149,123 +150,215 @@ public void UseAlternativeBraces(char opening, char closing)
149150
150151 #endregion
151152
152- #region: Parsing :
153+ #region: Parsing and ParseContext :
154+
155+ /// <summary>
156+ /// Defines the current state of the parser within the main loop.
157+ /// </summary>
158+ private enum ParseContext
159+ {
160+ /// <summary>
161+ /// Top-level literal text or inside a placeholder's Format section
162+ /// </summary>
163+ LiteralText ,
164+ /// <summary>
165+ /// Inside the selector / header portion of a placeholder.
166+ /// It is only the header (selectors + optional formatter name start),
167+ /// not the entire placeholder including nested formats
168+ /// </summary>
169+ SelectorHeader
170+ }
153171
154172 /// <summary>
155173 /// Parses a format string. This method is thread-safe.
156174 /// <para/>
157- /// Changing the <see cref="Settings"/> or properties is not thread-safe.
175+ /// <para>
176+ /// <b>Thread-safety</b>:<br/>
177+ /// The <see cref="ParseFormat"/> method is stateless w.r.t. the instance
178+ /// is safe for concurrent calls provided <see cref="SmartSettings"/> are not concurrently mutated,
179+ /// and <see cref="SmartSettings.IsThreadSafeMode"/> is <see langword="true"/>.
180+ /// </para>
158181 /// </summary>
159182 /// <param name="inputFormat"></param>
160183 /// <returns>The <see cref="Format"/> for the parsed string.</returns>
161184 public Format ParseFormat ( string inputFormat )
162185 {
163- // Initialize the state that will be passed around for thread-safety
164- // instead of using stateful instance variables
165- // Format: Can be re-assigned with new placeholders, while resultFormat will become the parent
166- var state = ParserStatePool . Instance . Get ( )
167- . Initialize ( inputFormat , FormatPool . Instance . Get ( ) . Initialize ( Settings , inputFormat ) ) ;
186+ using var statePool = ParserStatePool . Instance . Get ( out var state ) ;
187+ // The result format must not be returned to the pool by the parser
188+ state . Initialize ( inputFormat , FormatPool . Instance . Get ( ) . Initialize ( Settings , inputFormat ) ) ;
168189
169190 var indexContainer = state . Index ;
170191
171- // Store parsing errors until parsing is finished:
172192 var parsingErrors = ParsingErrorsPool . Instance . Get ( ) . Initialize ( state . ResultFormat ) ;
173193
194+ // Context variables
195+ var currentContext = ParseContext . LiteralText ;
174196 Placeholder ? currentPlaceholder = null ;
175-
176- // Used for nested placeholders
177197 var nestedDepth = 0 ;
178198
179199 for ( indexContainer . Current = 0 ; indexContainer . Current < state . InputFormat . Length ; indexContainer . Current ++ )
180200 {
181201 var inputChar = state . InputFormat [ indexContainer . Current ] ;
182- if ( currentPlaceholder == null )
202+
203+ switch ( currentContext )
183204 {
184- // We're parsing literal text with an HTML tag
185- if ( _parserSettings . ParseInputAsHtml && inputChar == '<' )
186- {
187- ParseHtmlTags ( state ) ;
188- continue ;
189- }
205+ case ParseContext . SelectorHeader :
206+ ProcessSelector ( inputChar , state , parsingErrors , ref currentContext , ref currentPlaceholder ,
207+ ref nestedDepth ) ;
208+ break ;
209+ case ParseContext . LiteralText :
210+ ProcessLiteralText ( inputChar , state , parsingErrors , ref currentContext , ref currentPlaceholder ,
211+ ref nestedDepth ) ;
212+ break ;
213+ }
214+ }
190215
191- if ( inputChar == _parserSettings . PlaceholderBeginChar )
192- {
193- AddLiteralCharsParsedBefore ( state ) ;
216+ // Finalize parsing and handle any remaining issues
217+ FinalizeParsing ( state , parsingErrors , currentPlaceholder ) ;
194218
195- if ( EscapeLikeStringFormat ( _parserSettings . PlaceholderBeginChar , state ) ) continue ;
219+ // Check for any parsing errors:
220+ if ( parsingErrors . HasIssues )
221+ {
222+ OnParsingFailure ? . Invoke ( this , new ParsingErrorEventArgs ( parsingErrors , Settings . Parser . ErrorAction == ParseErrorAction . ThrowError ) ) ;
223+ return HandleParsingErrors ( parsingErrors , state . ResultFormat ) ;
224+ }
196225
197- CreateNewPlaceholder ( ref nestedDepth , state , out currentPlaceholder ) ;
198- }
199- else if ( inputChar == _parserSettings . PlaceholderEndChar )
200- {
201- AddLiteralCharsParsedBefore ( state ) ;
226+ ParsingErrorsPool . Instance . Return ( parsingErrors ) ;
227+ return state . ResultFormat ;
228+ }
202229
203- if ( EscapeLikeStringFormat ( _parserSettings . PlaceholderEndChar , state ) ) continue ;
230+ /// <summary>
231+ /// Handles parsing when the current state is LiteralText.
232+ /// This method is responsible for identifying the start of placeholders, handling escaped characters,
233+ /// and managing the closing of nested placeholders.
234+ /// </summary>
235+ private void ProcessLiteralText ( char inputChar , ParserState state , ParsingErrors parsingErrors ,
236+ ref ParseContext currentContext , ref Placeholder ? currentPlaceholder , ref int nestedDepth )
237+ {
238+ // We're parsing literal text with an HTML tag
239+ if ( _parserSettings . ParseInputAsHtml && inputChar == '<' )
240+ {
241+ ParseHtmlTags ( state ) ;
242+ return ;
243+ }
204244
205- // Make sure that this is a nested placeholder before we un-nest it:
206- if ( HasProcessedTooMayClosingBraces ( parsingErrors , state ) ) continue ;
245+ if ( inputChar == _parserSettings . PlaceholderBeginChar )
246+ {
247+ AddLiteralCharsParsedBefore ( state ) ;
248+ if ( EscapeLikeStringFormat ( _parserSettings . PlaceholderBeginChar , state ) ) return ;
207249
208- // End of the placeholder's Format, _resultFormat will change to ParentPlaceholder.Parent
209- FinishPlaceholderFormat ( ref nestedDepth , state ) ;
210- }
211- else if ( inputChar == _parserSettings . CharLiteralEscapeChar &&
212- _parserSettings . ConvertCharacterStringLiterals ||
213- ! Settings . StringFormatCompatibility && inputChar == _parserSettings . CharLiteralEscapeChar )
214- {
215- ParseAlternativeEscaping ( state ) ;
216- }
217- else if ( state . Index . NamedFormatterStart != PositionUndefined && ! ParseNamedFormatter ( state ) )
218- {
219- // continue the loop
220- }
221- }
222- else
250+ // Context transition
251+ CreateNewPlaceholder ( ref nestedDepth , state , out currentPlaceholder ) ;
252+ currentContext = ParseContext . SelectorHeader ;
253+ }
254+ else if ( inputChar == _parserSettings . PlaceholderEndChar )
255+ {
256+ AddLiteralCharsParsedBefore ( state ) ;
257+ if ( EscapeLikeStringFormat ( _parserSettings . PlaceholderEndChar , state ) ) return ;
258+ if ( HasProcessedTooManyClosingBraces ( parsingErrors , state ) ) return ;
259+
260+ // End of a nested placeholder's Format.
261+ FinishPlaceholderFormat ( ref nestedDepth , state ) ;
262+ }
263+ else if ( inputChar == _parserSettings . CharLiteralEscapeChar &&
264+ ( _parserSettings . ConvertCharacterStringLiterals || ! Settings . StringFormatCompatibility ) )
265+ {
266+ ParseAlternativeEscaping ( state ) ;
267+ }
268+ else if ( state . Index . NamedFormatterStart != PositionUndefined && ! ParseNamedFormatter ( state ) )
269+ {
270+ // continue the loop
271+ }
272+ }
273+
274+ /// <summary>
275+ /// Handles parsing when the current context is <see cref="ParseContext.SelectorHeader"/>.
276+ /// This method is responsible for parsing selectors, operators, and identifying the start
277+ /// of a format specifier ':' or the end of the placeholder '}'.
278+ /// </summary>
279+ private void ProcessSelector ( char inputChar , ParserState state , ParsingErrors parsingErrors ,
280+ ref ParseContext currentContext , ref Placeholder ? currentPlaceholder , ref int nestedDepth )
281+ {
282+ if ( currentPlaceholder == null )
283+ {
284+ throw new InvalidOperationException ( $ "Invalid parser context: { nameof ( ProcessSelector ) } called with a null { nameof ( currentPlaceholder ) } .") ;
285+ }
286+
287+ if ( _operatorChars . Contains ( inputChar ) || _customOperatorChars . Contains ( inputChar ) )
288+ {
289+ // Add the selector segment before the operator:
290+ if ( state . Index . Current != state . Index . LastEnd )
223291 {
224- // Placeholder is NOT null, so that means
225- // we're parsing the selectors:
226- ParseSelector ( ref currentPlaceholder , parsingErrors , ref nestedDepth , state ) ;
292+ currentPlaceholder . AddSelector ( SelectorPool . Instance . Get ( ) . Initialize ( Settings , currentPlaceholder , state . InputFormat , state . Index . LastEnd , state . Index . Current , state . Index . Operator , state . Index . Selector ) ) ;
293+ state . Index . Selector ++ ;
294+ state . Index . Operator = state . Index . Current ;
227295 }
296+ state . Index . LastEnd = state . Index . SafeAdd ( state . Index . Current , 1 ) ;
228297 }
298+ else if ( inputChar == _parserSettings . FormatterNameSeparator )
299+ {
300+ AddLastSelector ( ref currentPlaceholder , state , parsingErrors ) ;
301+
302+ // Start the format section of the placeholder.
303+ var newFormat = FormatPool . Instance . Get ( ) . Initialize ( Settings , currentPlaceholder , state . Index . Current + 1 ) ;
304+ currentPlaceholder . Format = newFormat ;
305+ state . ResultFormat = newFormat ;
306+ currentPlaceholder = null ; // We are now parsing the format, not the selectors.
307+ state . Index . NamedFormatterStart = Settings . StringFormatCompatibility ? PositionUndefined : state . Index . LastEnd ;
308+ state . Index . NamedFormatterOptionsStart = PositionUndefined ;
309+ state . Index . NamedFormatterOptionsEnd = PositionUndefined ;
229310
230- // We're at the end of the input string
311+ // We are now parsing the literal text *inside* the placeholder's format.
312+ currentContext = ParseContext . LiteralText ;
313+ }
314+ else if ( inputChar == _parserSettings . PlaceholderEndChar )
315+ {
316+ AddLastSelector ( ref currentPlaceholder , state , parsingErrors ) ;
231317
232- // 1. Is the last item a placeholder, that is not finished yet?
318+ // End the placeholder with no format.
319+ nestedDepth -- ;
320+ currentPlaceholder . EndIndex = state . Index . SafeAdd ( state . Index . Current , 1 ) ;
321+ currentPlaceholder = null ;
322+
323+ // Switch Context
324+ currentContext = ParseContext . LiteralText ;
325+ }
326+ else
327+ {
328+ // Ensure the selector characters are valid:
329+ if ( ! _validSelectorChars . Contains ( inputChar ) )
330+ parsingErrors . AddIssue ( state . ResultFormat ,
331+ $ "'0x{ Convert . ToUInt32 ( inputChar ) : X} ': " +
332+ _parsingErrorText [ ParsingError . InvalidCharactersInSelector ] ,
333+ state . Index . Current , state . Index . SafeAdd ( state . Index . Current , 1 ) ) ;
334+ }
335+ }
336+
337+ /// <summary>
338+ /// Finalizes parsing at the end of the input string.
339+ /// </summary>
340+ private void FinalizeParsing ( ParserState state , ParsingErrors parsingErrors , Placeholder ? currentPlaceholder )
341+ {
342+ // 1. Is the last item a placeholder that is not finished yet?
233343 if ( state . ResultFormat . ParentPlaceholder != null || currentPlaceholder != null )
234344 {
235345 parsingErrors . AddIssue ( state . ResultFormat , _parsingErrorText [ ParsingError . MissingClosingBrace ] ,
236- state . InputFormat . Length ,
237- state . InputFormat . Length ) ;
346+ state . InputFormat . Length , state . InputFormat . Length ) ;
238347 state . ResultFormat . EndIndex = state . InputFormat . Length ;
239348 }
349+ // 2. The last item must be a literal, so add it if necessary
240350 else if ( state . Index . LastEnd != state . InputFormat . Length )
241351 {
242- // 2. The last item must be a literal, so add it
243352 state . ResultFormat . Items . Add ( LiteralTextPool . Instance . Get ( ) . Initialize ( Settings , state . ResultFormat , state . InputFormat ,
244353 state . Index . LastEnd , state . InputFormat . Length ) ) ;
245354 }
246355
247- // This may happen with a missing closing brace, e.g. "{0:yyyy/MM/dd HH:mm:ss"
356+ // Unwind any unclosed nested formats (due to missing closing braces)
248357 while ( state . ResultFormat . ParentPlaceholder != null )
249358 {
250359 state . ResultFormat = state . ResultFormat . ParentPlaceholder . Parent ;
251360 state . ResultFormat . EndIndex = state . InputFormat . Length ;
252361 }
253-
254- // Check for any parsing errors:
255- if ( parsingErrors . HasIssues )
256- {
257- OnParsingFailure ? . Invoke ( this ,
258- new ParsingErrorEventArgs ( parsingErrors ,
259- Settings . Parser . ErrorAction == ParseErrorAction . ThrowError ) ) ;
260-
261- return HandleParsingErrors ( parsingErrors , state . ResultFormat ) ;
262- }
263-
264- ParsingErrorsPool . Instance . Return ( parsingErrors ) ;
265- var resultFormat = state . ResultFormat ;
266- ParserStatePool . Instance . Return ( state ) ;
267-
268- return resultFormat ;
269362 }
270363
271364 /// <summary>
@@ -292,7 +385,7 @@ private void AddLiteralCharsParsedBefore(ParserState state)
292385 /// <param name="state"></param>
293386 /// <returns></returns>
294387 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
295- private bool HasProcessedTooMayClosingBraces ( ParsingErrors parsingErrors , ParserState state )
388+ private bool HasProcessedTooManyClosingBraces ( ParsingErrors parsingErrors , ParserState state )
296389 {
297390 if ( state . ResultFormat . ParentPlaceholder != null ) return false ;
298391
@@ -498,71 +591,6 @@ private bool ParseNamedFormatter(ParserState state)
498591 return true ;
499592 }
500593
501- /// <summary>
502- /// Handles the selectors.
503- /// </summary>
504- /// <param name="currentPlaceholder"></param>
505- /// <param name="parsingErrors"></param>
506- /// <param name="nestedDepth"></param>
507- /// <param name="state"></param>
508- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
509- private void ParseSelector ( ref Placeholder ? currentPlaceholder , ParsingErrors parsingErrors , ref int nestedDepth , ParserState state )
510- {
511- if ( currentPlaceholder == null )
512- {
513- throw new ArgumentNullException ( nameof ( currentPlaceholder ) , $ "Unexpected null reference") ;
514- }
515-
516- var inputChar = state . InputFormat [ state . Index . Current ] ;
517- if ( _operatorChars . Contains ( inputChar ) || _customOperatorChars . Contains ( inputChar ) )
518- {
519- // Add the selector:
520- if ( state . Index . Current != state . Index . LastEnd ) // if equal, we're already parsing a selector
521- {
522- currentPlaceholder . AddSelector ( SelectorPool . Instance . Get ( ) . Initialize ( Settings , currentPlaceholder , state . InputFormat , state . Index . LastEnd , state . Index . Current , state . Index . Operator , state . Index . Selector ) ) ;
523- state . Index . Selector ++ ;
524- state . Index . Operator = state . Index . Current ;
525- }
526-
527- state . Index . LastEnd = state . Index . SafeAdd ( state . Index . Current , 1 ) ;
528- }
529- else if ( inputChar == _parserSettings . FormatterNameSeparator )
530- {
531- // Add the selector:
532- AddLastSelector ( ref currentPlaceholder , state , parsingErrors ) ;
533-
534- // Start the format:
535- var newFormat = FormatPool . Instance . Get ( ) . Initialize ( Settings , currentPlaceholder , state . Index . Current + 1 ) ;
536- currentPlaceholder . Format = newFormat ;
537- // parentFormat still lives in the current placeholder!
538- state . ResultFormat = newFormat ;
539- currentPlaceholder = null ;
540- // named formatters will not be parsed with string.Format compatibility switched ON.
541- // But this way we can handle e.g. Smart.Format("{Date:yyyy/MM/dd HH:mm:ss}") like string.Format
542- state . Index . NamedFormatterStart = Settings . StringFormatCompatibility ? PositionUndefined : state . Index . LastEnd ;
543- state . Index . NamedFormatterOptionsStart = PositionUndefined ;
544- state . Index . NamedFormatterOptionsEnd = PositionUndefined ;
545- }
546- else if ( inputChar == _parserSettings . PlaceholderEndChar )
547- {
548- AddLastSelector ( ref currentPlaceholder , state , parsingErrors ) ;
549-
550- // End the placeholder with no format:
551- nestedDepth -- ;
552- currentPlaceholder . EndIndex = state . Index . SafeAdd ( state . Index . Current , 1 ) ;
553- currentPlaceholder = null ;
554- }
555- else
556- {
557- // Ensure the selector characters are valid:
558- if ( ! _validSelectorChars . Contains ( inputChar ) )
559- parsingErrors . AddIssue ( state . ResultFormat ,
560- $ "'0x{ Convert . ToUInt32 ( inputChar ) : X} ': " +
561- _parsingErrorText [ ParsingError . InvalidCharactersInSelector ] ,
562- state . Index . Current , state . Index . SafeAdd ( state . Index . Current , 1 ) ) ;
563- }
564- }
565-
566594 /// <summary>
567595 /// Adds a <see cref="Selector"/> to the current <see cref="Placeholder"/>
568596 /// because the current character ':' or '}' indicates the end of a selector.
0 commit comments