1414using Elastic . Markdown . Myst . FrontMatter ;
1515using Elastic . Markdown . Myst . InlineParsers ;
1616using Elastic . Markdown . Myst . InlineParsers . Substitution ;
17+ using Elastic . Markdown . Myst . Linters ;
1718using Elastic . Markdown . Myst . Renderers ;
1819using Elastic . Markdown . Myst . Roles ;
1920using Elastic . Markdown . Myst . Roles . AppliesTo ;
@@ -30,94 +31,6 @@ public class MarkdownParser(BuildContext build, IParserResolvers resolvers)
3031 private BuildContext Build { get ; } = build ;
3132 private IParserResolvers Resolvers { get ; } = resolvers ;
3233
33- // Collection of irregular whitespace characters that may impair Markdown rendering
34- private static readonly char [ ] IrregularWhitespaceChars =
35- [
36- '\u000B ' , // Line Tabulation (\v) - <VT>
37- '\u000C ' , // Form Feed (\f) - <FF>
38- '\u00A0 ' , // No-Break Space - <NBSP>
39- '\u0085 ' , // Next Line
40- '\u1680 ' , // Ogham Space Mark
41- '\u180E ' , // Mongolian Vowel Separator - <MVS>
42- '\ufeff ' , // Zero Width No-Break Space - <BOM>
43- '\u2000 ' , // En Quad
44- '\u2001 ' , // Em Quad
45- '\u2002 ' , // En Space - <ENSP>
46- '\u2003 ' , // Em Space - <EMSP>
47- '\u2004 ' , // Tree-Per-Em
48- '\u2005 ' , // Four-Per-Em
49- '\u2006 ' , // Six-Per-Em
50- '\u2007 ' , // Figure Space
51- '\u2008 ' , // Punctuation Space - <PUNCSP>
52- '\u2009 ' , // Thin Space
53- '\u200A ' , // Hair Space
54- '\u200B ' , // Zero Width Space - <ZWSP>
55- '\u2028 ' , // Line Separator
56- '\u2029 ' , // Paragraph Separator
57- '\u202F ' , // Narrow No-Break Space
58- '\u205F ' , // Medium Mathematical Space
59- '\u3000 ' // Ideographic Space
60- ] ;
61-
62- // Detects irregular whitespace in the markdown content and reports diagnostics
63- private void DetectIrregularWhitespace ( string content , string filePath )
64- {
65- var lines = content . Split ( [ "\r \n " , "\n " , "\r " ] , StringSplitOptions . None ) ;
66-
67- for ( var lineIndex = 0 ; lineIndex < lines . Length ; lineIndex ++ )
68- {
69- var line = lines [ lineIndex ] ;
70- for ( var columnIndex = 0 ; columnIndex < line . Length ; columnIndex ++ )
71- {
72- var c = line [ columnIndex ] ;
73- if ( Array . IndexOf ( IrregularWhitespaceChars , c ) >= 0 )
74- {
75- var charName = GetCharacterName ( c ) ;
76- Build . Collector . Write ( new Diagnostic
77- {
78- Severity = Severity . Warning ,
79- File = filePath ,
80- Line = lineIndex + 1 , // 1-based line number
81- Column = columnIndex + 1 , // 1-based column number
82- Length = 1 ,
83- Message = $ "Irregular whitespace character detected: U+{ ( int ) c : X4} ({ charName } ). This may impair Markdown rendering."
84- } ) ;
85- }
86- }
87- }
88- }
89-
90- // Helper to get a friendly name for the whitespace character
91- private static string GetCharacterName ( char c ) => c switch
92- {
93- '\u000B ' => "Line Tabulation (VT)" ,
94- '\u000C ' => "Form Feed (FF)" ,
95- '\u00A0 ' => "No-Break Space (NBSP)" ,
96- '\u0085 ' => "Next Line" ,
97- '\u1680 ' => "Ogham Space Mark" ,
98- '\u180E ' => "Mongolian Vowel Separator (MVS)" ,
99- '\ufeff ' => "Zero Width No-Break Space (BOM)" ,
100- '\u2000 ' => "En Quad" ,
101- '\u2001 ' => "Em Quad" ,
102- '\u2002 ' => "En Space (ENSP)" ,
103- '\u2003 ' => "Em Space (EMSP)" ,
104- '\u2004 ' => "Tree-Per-Em" ,
105- '\u2005 ' => "Four-Per-Em" ,
106- '\u2006 ' => "Six-Per-Em" ,
107- '\u2007 ' => "Figure Space" ,
108- '\u2008 ' => "Punctuation Space (PUNCSP)" ,
109- '\u2009 ' => "Thin Space" ,
110- '\u200A ' => "Hair Space" ,
111- '\u200B ' => "Zero Width Space (ZWSP)" ,
112- '\u2028 ' => "Line Separator" ,
113- '\u2029 ' => "Paragraph Separator" ,
114- '\u202F ' => "Narrow No-Break Space" ,
115- '\u205F ' => "Medium Mathematical Space" ,
116- '\u3000 ' => "Ideographic Space" ,
117- _ => "Unknown"
118- } ;
119-
120-
12134 public Task < MarkdownDocument > MinimalParseAsync ( IFileInfo path , Cancel ctx )
12235 {
12336 var state = new ParserState ( Build )
@@ -159,17 +72,11 @@ public Task<MarkdownDocument> ParseSnippetAsync(IFileInfo path, IFileInfo parent
15972 return ParseAsync ( path , context , Pipeline , ctx ) ;
16073 }
16174
162- public MarkdownDocument ParseStringAsync ( string markdown , IFileInfo path , YamlFrontMatter ? matter )
163- {
164- DetectIrregularWhitespace ( markdown , path . FullName ) ;
165- return ParseMarkdownStringAsync ( markdown , path , matter , Pipeline ) ;
166- }
75+ public MarkdownDocument ParseStringAsync ( string markdown , IFileInfo path , YamlFrontMatter ? matter ) =>
76+ ParseMarkdownStringAsync ( markdown , path , matter , Pipeline ) ;
16777
168- public MarkdownDocument MinimalParseStringAsync ( string markdown , IFileInfo path , YamlFrontMatter ? matter )
169- {
170- DetectIrregularWhitespace ( markdown , path . FullName ) ;
171- return ParseMarkdownStringAsync ( markdown , path , matter , MinimalPipeline ) ;
172- }
78+ public MarkdownDocument MinimalParseStringAsync ( string markdown , IFileInfo path , YamlFrontMatter ? matter ) =>
79+ ParseMarkdownStringAsync ( markdown , path , matter , MinimalPipeline ) ;
17380
17481 private MarkdownDocument ParseMarkdownStringAsync ( string markdown , IFileInfo path , YamlFrontMatter ? matter , MarkdownPipeline pipeline )
17582 {
@@ -185,7 +92,7 @@ private MarkdownDocument ParseMarkdownStringAsync(string markdown, IFileInfo pat
18592 return markdownDocument ;
18693 }
18794
188- private async Task < MarkdownDocument > ParseAsync (
95+ private static async Task < MarkdownDocument > ParseAsync (
18996 IFileInfo path ,
19097 MarkdownParserContext context ,
19198 MarkdownPipeline pipeline ,
@@ -199,12 +106,7 @@ private async Task<MarkdownDocument> ParseAsync(
199106 inputMarkdown = await streamReader . AsTextReader ( ) . ReadToEndAsync ( ctx ) ;
200107 }
201108 else
202- {
203109 inputMarkdown = await path . FileSystem . File . ReadAllTextAsync ( path . FullName , ctx ) ;
204- }
205-
206- // Check for irregular whitespace characters
207- DetectIrregularWhitespace ( inputMarkdown , path . FullName ) ;
208110
209111 var markdownDocument = Markdig . Markdown . Parse ( inputMarkdown , pipeline , context ) ;
210112 return markdownDocument ;
@@ -258,6 +160,7 @@ public MarkdownPipeline Pipeline
258160 . UseEnhancedCodeBlocks ( )
259161 . UseHtmxLinkInlineRenderer ( )
260162 . DisableHtml ( )
163+ . UseWhiteSpaceNormalizer ( )
261164 . UseHardBreaks ( ) ;
262165 _ = builder . BlockParsers . TryRemove < IndentedCodeBlockParser > ( ) ;
263166 _pipelineCached = builder . Build ( ) ;
0 commit comments