Skip to content

Commit 3588dc9

Browse files
authored
chore: Parser.ParseFormat: Reduce cognitive complexity (#489)
1 parent 878f9ff commit 3588dc9

File tree

1 file changed

+165
-137
lines changed

1 file changed

+165
-137
lines changed

src/SmartFormat/Core/Parsing/Parser.cs

Lines changed: 165 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ namespace SmartFormat.Core.Parsing;
1515
/// <para/>
1616
/// <para>
1717
/// <b>Thread-safety</b>:<br/>
18-
/// The <see cref="ParseFormat"/> method is thread-safe.
19-
/// Other methods (e.g. changing SmartSettings or other properties) is not thread-safe.
18+
/// The <see cref="ParseFormat"/> method is stateless w.r.t. the instance
19+
/// is safe for concurrent calls provided <see cref="SmartSettings"/> are not concurrently mutated,
20+
/// and <see cref="SmartSettings.IsThreadSafeMode"/> is <see langword="true"/>.
2021
/// </para>
2122
/// </summary>
2223
public class Parser
@@ -149,123 +150,215 @@ public void UseAlternativeBraces(char opening, char closing)
149150

150151
#endregion
151152

152-
#region: Parsing :
153+
#region: Parsing and ParseContext :
154+
155+
/// <summary>
156+
/// Defines the current state of the parser within the main loop.
157+
/// </summary>
158+
private enum ParseContext
159+
{
160+
/// <summary>
161+
/// Top-level literal text or inside a placeholder's Format section
162+
/// </summary>
163+
LiteralText,
164+
/// <summary>
165+
/// Inside the selector / header portion of a placeholder.
166+
/// It is only the header (selectors + optional formatter name start),
167+
/// not the entire placeholder including nested formats
168+
/// </summary>
169+
SelectorHeader
170+
}
153171

154172
/// <summary>
155173
/// Parses a format string. This method is thread-safe.
156174
/// <para/>
157-
/// Changing the <see cref="Settings"/> or properties is not thread-safe.
175+
/// <para>
176+
/// <b>Thread-safety</b>:<br/>
177+
/// The <see cref="ParseFormat"/> method is stateless w.r.t. the instance
178+
/// is safe for concurrent calls provided <see cref="SmartSettings"/> are not concurrently mutated,
179+
/// and <see cref="SmartSettings.IsThreadSafeMode"/> is <see langword="true"/>.
180+
/// </para>
158181
/// </summary>
159182
/// <param name="inputFormat"></param>
160183
/// <returns>The <see cref="Format"/> for the parsed string.</returns>
161184
public Format ParseFormat(string inputFormat)
162185
{
163-
// Initialize the state that will be passed around for thread-safety
164-
// instead of using stateful instance variables
165-
// Format: Can be re-assigned with new placeholders, while resultFormat will become the parent
166-
var state = ParserStatePool.Instance.Get()
167-
.Initialize(inputFormat, FormatPool.Instance.Get().Initialize(Settings, inputFormat));
186+
using var statePool = ParserStatePool.Instance.Get(out var state);
187+
// The result format must not be returned to the pool by the parser
188+
state.Initialize(inputFormat, FormatPool.Instance.Get().Initialize(Settings, inputFormat));
168189

169190
var indexContainer = state.Index;
170191

171-
// Store parsing errors until parsing is finished:
172192
var parsingErrors = ParsingErrorsPool.Instance.Get().Initialize(state.ResultFormat);
173193

194+
// Context variables
195+
var currentContext = ParseContext.LiteralText;
174196
Placeholder? currentPlaceholder = null;
175-
176-
// Used for nested placeholders
177197
var nestedDepth = 0;
178198

179199
for (indexContainer.Current = 0; indexContainer.Current < state.InputFormat.Length; indexContainer.Current++)
180200
{
181201
var inputChar = state.InputFormat[indexContainer.Current];
182-
if (currentPlaceholder == null)
202+
203+
switch (currentContext)
183204
{
184-
// We're parsing literal text with an HTML tag
185-
if (_parserSettings.ParseInputAsHtml && inputChar == '<')
186-
{
187-
ParseHtmlTags(state);
188-
continue;
189-
}
205+
case ParseContext.SelectorHeader:
206+
ProcessSelector(inputChar, state, parsingErrors, ref currentContext, ref currentPlaceholder,
207+
ref nestedDepth);
208+
break;
209+
case ParseContext.LiteralText:
210+
ProcessLiteralText(inputChar, state, parsingErrors, ref currentContext, ref currentPlaceholder,
211+
ref nestedDepth);
212+
break;
213+
}
214+
}
190215

191-
if (inputChar == _parserSettings.PlaceholderBeginChar)
192-
{
193-
AddLiteralCharsParsedBefore(state);
216+
// Finalize parsing and handle any remaining issues
217+
FinalizeParsing(state, parsingErrors, currentPlaceholder);
194218

195-
if (EscapeLikeStringFormat(_parserSettings.PlaceholderBeginChar, state)) continue;
219+
// Check for any parsing errors:
220+
if (parsingErrors.HasIssues)
221+
{
222+
OnParsingFailure?.Invoke(this, new ParsingErrorEventArgs(parsingErrors, Settings.Parser.ErrorAction == ParseErrorAction.ThrowError));
223+
return HandleParsingErrors(parsingErrors, state.ResultFormat);
224+
}
196225

197-
CreateNewPlaceholder(ref nestedDepth, state, out currentPlaceholder);
198-
}
199-
else if (inputChar == _parserSettings.PlaceholderEndChar)
200-
{
201-
AddLiteralCharsParsedBefore(state);
226+
ParsingErrorsPool.Instance.Return(parsingErrors);
227+
return state.ResultFormat;
228+
}
202229

203-
if (EscapeLikeStringFormat(_parserSettings.PlaceholderEndChar, state)) continue;
230+
/// <summary>
231+
/// Handles parsing when the current state is LiteralText.
232+
/// This method is responsible for identifying the start of placeholders, handling escaped characters,
233+
/// and managing the closing of nested placeholders.
234+
/// </summary>
235+
private void ProcessLiteralText(char inputChar, ParserState state, ParsingErrors parsingErrors,
236+
ref ParseContext currentContext, ref Placeholder? currentPlaceholder, ref int nestedDepth)
237+
{
238+
// We're parsing literal text with an HTML tag
239+
if (_parserSettings.ParseInputAsHtml && inputChar == '<')
240+
{
241+
ParseHtmlTags(state);
242+
return;
243+
}
204244

205-
// Make sure that this is a nested placeholder before we un-nest it:
206-
if (HasProcessedTooMayClosingBraces(parsingErrors, state)) continue;
245+
if (inputChar == _parserSettings.PlaceholderBeginChar)
246+
{
247+
AddLiteralCharsParsedBefore(state);
248+
if (EscapeLikeStringFormat(_parserSettings.PlaceholderBeginChar, state)) return;
207249

208-
// End of the placeholder's Format, _resultFormat will change to ParentPlaceholder.Parent
209-
FinishPlaceholderFormat(ref nestedDepth, state);
210-
}
211-
else if (inputChar == _parserSettings.CharLiteralEscapeChar &&
212-
_parserSettings.ConvertCharacterStringLiterals ||
213-
!Settings.StringFormatCompatibility && inputChar == _parserSettings.CharLiteralEscapeChar)
214-
{
215-
ParseAlternativeEscaping(state);
216-
}
217-
else if (state.Index.NamedFormatterStart != PositionUndefined && !ParseNamedFormatter(state))
218-
{
219-
// continue the loop
220-
}
221-
}
222-
else
250+
// Context transition
251+
CreateNewPlaceholder(ref nestedDepth, state, out currentPlaceholder);
252+
currentContext = ParseContext.SelectorHeader;
253+
}
254+
else if (inputChar == _parserSettings.PlaceholderEndChar)
255+
{
256+
AddLiteralCharsParsedBefore(state);
257+
if (EscapeLikeStringFormat(_parserSettings.PlaceholderEndChar, state)) return;
258+
if (HasProcessedTooManyClosingBraces(parsingErrors, state)) return;
259+
260+
// End of a nested placeholder's Format.
261+
FinishPlaceholderFormat(ref nestedDepth, state);
262+
}
263+
else if (inputChar == _parserSettings.CharLiteralEscapeChar &&
264+
(_parserSettings.ConvertCharacterStringLiterals || !Settings.StringFormatCompatibility))
265+
{
266+
ParseAlternativeEscaping(state);
267+
}
268+
else if (state.Index.NamedFormatterStart != PositionUndefined && !ParseNamedFormatter(state))
269+
{
270+
// continue the loop
271+
}
272+
}
273+
274+
/// <summary>
275+
/// Handles parsing when the current context is <see cref="ParseContext.SelectorHeader"/>.
276+
/// This method is responsible for parsing selectors, operators, and identifying the start
277+
/// of a format specifier ':' or the end of the placeholder '}'.
278+
/// </summary>
279+
private void ProcessSelector(char inputChar, ParserState state, ParsingErrors parsingErrors,
280+
ref ParseContext currentContext, ref Placeholder? currentPlaceholder, ref int nestedDepth)
281+
{
282+
if (currentPlaceholder == null)
283+
{
284+
throw new InvalidOperationException($"Invalid parser context: {nameof(ProcessSelector)} called with a null {nameof(currentPlaceholder)}.");
285+
}
286+
287+
if (_operatorChars.Contains(inputChar) || _customOperatorChars.Contains(inputChar))
288+
{
289+
// Add the selector segment before the operator:
290+
if (state.Index.Current != state.Index.LastEnd)
223291
{
224-
// Placeholder is NOT null, so that means
225-
// we're parsing the selectors:
226-
ParseSelector(ref currentPlaceholder, parsingErrors, ref nestedDepth, state);
292+
currentPlaceholder.AddSelector(SelectorPool.Instance.Get().Initialize(Settings, currentPlaceholder, state.InputFormat, state.Index.LastEnd, state.Index.Current, state.Index.Operator, state.Index.Selector));
293+
state.Index.Selector++;
294+
state.Index.Operator = state.Index.Current;
227295
}
296+
state.Index.LastEnd = state.Index.SafeAdd(state.Index.Current, 1);
228297
}
298+
else if (inputChar == _parserSettings.FormatterNameSeparator)
299+
{
300+
AddLastSelector(ref currentPlaceholder, state, parsingErrors);
301+
302+
// Start the format section of the placeholder.
303+
var newFormat = FormatPool.Instance.Get().Initialize(Settings, currentPlaceholder, state.Index.Current + 1);
304+
currentPlaceholder.Format = newFormat;
305+
state.ResultFormat = newFormat;
306+
currentPlaceholder = null; // We are now parsing the format, not the selectors.
307+
state.Index.NamedFormatterStart = Settings.StringFormatCompatibility ? PositionUndefined : state.Index.LastEnd;
308+
state.Index.NamedFormatterOptionsStart = PositionUndefined;
309+
state.Index.NamedFormatterOptionsEnd = PositionUndefined;
229310

230-
// We're at the end of the input string
311+
// We are now parsing the literal text *inside* the placeholder's format.
312+
currentContext = ParseContext.LiteralText;
313+
}
314+
else if (inputChar == _parserSettings.PlaceholderEndChar)
315+
{
316+
AddLastSelector(ref currentPlaceholder, state, parsingErrors);
231317

232-
// 1. Is the last item a placeholder, that is not finished yet?
318+
// End the placeholder with no format.
319+
nestedDepth--;
320+
currentPlaceholder.EndIndex = state.Index.SafeAdd(state.Index.Current, 1);
321+
currentPlaceholder = null;
322+
323+
// Switch Context
324+
currentContext = ParseContext.LiteralText;
325+
}
326+
else
327+
{
328+
// Ensure the selector characters are valid:
329+
if (!_validSelectorChars.Contains(inputChar))
330+
parsingErrors.AddIssue(state.ResultFormat,
331+
$"'0x{Convert.ToUInt32(inputChar):X}': " +
332+
_parsingErrorText[ParsingError.InvalidCharactersInSelector],
333+
state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1));
334+
}
335+
}
336+
337+
/// <summary>
338+
/// Finalizes parsing at the end of the input string.
339+
/// </summary>
340+
private void FinalizeParsing(ParserState state, ParsingErrors parsingErrors, Placeholder? currentPlaceholder)
341+
{
342+
// 1. Is the last item a placeholder that is not finished yet?
233343
if (state.ResultFormat.ParentPlaceholder != null || currentPlaceholder != null)
234344
{
235345
parsingErrors.AddIssue(state.ResultFormat, _parsingErrorText[ParsingError.MissingClosingBrace],
236-
state.InputFormat.Length,
237-
state.InputFormat.Length);
346+
state.InputFormat.Length, state.InputFormat.Length);
238347
state.ResultFormat.EndIndex = state.InputFormat.Length;
239348
}
349+
// 2. The last item must be a literal, so add it if necessary
240350
else if (state.Index.LastEnd != state.InputFormat.Length)
241351
{
242-
// 2. The last item must be a literal, so add it
243352
state.ResultFormat.Items.Add(LiteralTextPool.Instance.Get().Initialize(Settings, state.ResultFormat, state.InputFormat,
244353
state.Index.LastEnd, state.InputFormat.Length));
245354
}
246355

247-
// This may happen with a missing closing brace, e.g. "{0:yyyy/MM/dd HH:mm:ss"
356+
// Unwind any unclosed nested formats (due to missing closing braces)
248357
while (state.ResultFormat.ParentPlaceholder != null)
249358
{
250359
state.ResultFormat = state.ResultFormat.ParentPlaceholder.Parent;
251360
state.ResultFormat.EndIndex = state.InputFormat.Length;
252361
}
253-
254-
// Check for any parsing errors:
255-
if (parsingErrors.HasIssues)
256-
{
257-
OnParsingFailure?.Invoke(this,
258-
new ParsingErrorEventArgs(parsingErrors,
259-
Settings.Parser.ErrorAction == ParseErrorAction.ThrowError));
260-
261-
return HandleParsingErrors(parsingErrors, state.ResultFormat);
262-
}
263-
264-
ParsingErrorsPool.Instance.Return(parsingErrors);
265-
var resultFormat = state.ResultFormat;
266-
ParserStatePool.Instance.Return(state);
267-
268-
return resultFormat;
269362
}
270363

271364
/// <summary>
@@ -292,7 +385,7 @@ private void AddLiteralCharsParsedBefore(ParserState state)
292385
/// <param name="state"></param>
293386
/// <returns></returns>
294387
[MethodImpl(MethodImplOptions.AggressiveInlining)]
295-
private bool HasProcessedTooMayClosingBraces(ParsingErrors parsingErrors, ParserState state)
388+
private bool HasProcessedTooManyClosingBraces(ParsingErrors parsingErrors, ParserState state)
296389
{
297390
if (state.ResultFormat.ParentPlaceholder != null) return false;
298391

@@ -498,71 +591,6 @@ private bool ParseNamedFormatter(ParserState state)
498591
return true;
499592
}
500593

501-
/// <summary>
502-
/// Handles the selectors.
503-
/// </summary>
504-
/// <param name="currentPlaceholder"></param>
505-
/// <param name="parsingErrors"></param>
506-
/// <param name="nestedDepth"></param>
507-
/// <param name="state"></param>
508-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
509-
private void ParseSelector(ref Placeholder? currentPlaceholder, ParsingErrors parsingErrors, ref int nestedDepth, ParserState state)
510-
{
511-
if (currentPlaceholder == null)
512-
{
513-
throw new ArgumentNullException(nameof(currentPlaceholder), $"Unexpected null reference");
514-
}
515-
516-
var inputChar = state.InputFormat[state.Index.Current];
517-
if (_operatorChars.Contains(inputChar) || _customOperatorChars.Contains(inputChar))
518-
{
519-
// Add the selector:
520-
if (state.Index.Current != state.Index.LastEnd) // if equal, we're already parsing a selector
521-
{
522-
currentPlaceholder.AddSelector(SelectorPool.Instance.Get().Initialize(Settings, currentPlaceholder, state.InputFormat, state.Index.LastEnd, state.Index.Current, state.Index.Operator, state.Index.Selector));
523-
state.Index.Selector++;
524-
state.Index.Operator = state.Index.Current;
525-
}
526-
527-
state.Index.LastEnd = state.Index.SafeAdd(state.Index.Current, 1);
528-
}
529-
else if (inputChar == _parserSettings.FormatterNameSeparator)
530-
{
531-
// Add the selector:
532-
AddLastSelector(ref currentPlaceholder, state, parsingErrors);
533-
534-
// Start the format:
535-
var newFormat = FormatPool.Instance.Get().Initialize(Settings, currentPlaceholder, state.Index.Current + 1);
536-
currentPlaceholder.Format = newFormat;
537-
// parentFormat still lives in the current placeholder!
538-
state.ResultFormat = newFormat;
539-
currentPlaceholder = null;
540-
// named formatters will not be parsed with string.Format compatibility switched ON.
541-
// But this way we can handle e.g. Smart.Format("{Date:yyyy/MM/dd HH:mm:ss}") like string.Format
542-
state.Index.NamedFormatterStart = Settings.StringFormatCompatibility ? PositionUndefined : state.Index.LastEnd;
543-
state.Index.NamedFormatterOptionsStart = PositionUndefined;
544-
state.Index.NamedFormatterOptionsEnd = PositionUndefined;
545-
}
546-
else if (inputChar == _parserSettings.PlaceholderEndChar)
547-
{
548-
AddLastSelector(ref currentPlaceholder, state, parsingErrors);
549-
550-
// End the placeholder with no format:
551-
nestedDepth--;
552-
currentPlaceholder.EndIndex = state.Index.SafeAdd(state.Index.Current, 1);
553-
currentPlaceholder = null;
554-
}
555-
else
556-
{
557-
// Ensure the selector characters are valid:
558-
if (!_validSelectorChars.Contains(inputChar))
559-
parsingErrors.AddIssue(state.ResultFormat,
560-
$"'0x{Convert.ToUInt32(inputChar):X}': " +
561-
_parsingErrorText[ParsingError.InvalidCharactersInSelector],
562-
state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1));
563-
}
564-
}
565-
566594
/// <summary>
567595
/// Adds a <see cref="Selector"/> to the current <see cref="Placeholder"/>
568596
/// because the current character ':' or '}' indicates the end of a selector.

0 commit comments

Comments
 (0)