|
| 1 | +// Licensed to Elasticsearch B.V under one or more agreements. |
| 2 | +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. |
| 3 | +// See the LICENSE file in the project root for more information |
| 4 | + |
| 5 | +using System.Buffers; |
| 6 | +using Elastic.Markdown.Diagnostics; |
| 7 | +using Markdig; |
| 8 | +using Markdig.Helpers; |
| 9 | +using Markdig.Parsers; |
| 10 | +using Markdig.Parsers.Inlines; |
| 11 | +using Markdig.Renderers; |
| 12 | +using Markdig.Renderers.Html; |
| 13 | +using Markdig.Renderers.Html.Inlines; |
| 14 | +using Markdig.Syntax.Inlines; |
| 15 | + |
| 16 | +namespace Elastic.Markdown.Myst.Linters; |
| 17 | + |
| 18 | +public static class SpaceNormalizerBuilderExtensions |
| 19 | +{ |
| 20 | + public static MarkdownPipelineBuilder UseSpaceNormalizer(this MarkdownPipelineBuilder pipeline) |
| 21 | + { |
| 22 | + pipeline.Extensions.AddIfNotAlready<SpaceNormalizerBuilderExtension>(); |
| 23 | + return pipeline; |
| 24 | + } |
| 25 | +} |
| 26 | + |
| 27 | +public class SpaceNormalizerBuilderExtension : IMarkdownExtension |
| 28 | +{ |
| 29 | + public void Setup(MarkdownPipelineBuilder pipeline) => |
| 30 | + pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new SpaceNormalizerParser()); |
| 31 | + |
| 32 | + public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) => |
| 33 | + renderer.ObjectRenderers.InsertAfter<EmphasisInlineRenderer>(new SpaceNormalizerRenderer()); |
| 34 | +} |
| 35 | + |
| 36 | +public class SpaceNormalizerParser : InlineParser |
| 37 | +{ |
| 38 | + // Collection of irregular space characters that may impair Markdown rendering |
| 39 | + private static readonly char[] IrregularSpaceChars = |
| 40 | + [ |
| 41 | + '\u000B', // Line Tabulation (\v) - <VT> |
| 42 | + '\u000C', // Form Feed (\f) - <FF> |
| 43 | + '\u00A0', // No-Break Space - <NBSP> |
| 44 | + '\u0085', // Next Line |
| 45 | + '\u1680', // Ogham Space Mark |
| 46 | + '\u180E', // Mongolian Vowel Separator - <MVS> |
| 47 | + '\ufeff', // Zero Width No-Break Space - <BOM> |
| 48 | + '\u2000', // En Quad |
| 49 | + '\u2001', // Em Quad |
| 50 | + '\u2002', // En Space - <ENSP> |
| 51 | + '\u2003', // Em Space - <EMSP> |
| 52 | + '\u2004', // Tree-Per-Em |
| 53 | + '\u2005', // Four-Per-Em |
| 54 | + '\u2006', // Six-Per-Em |
| 55 | + '\u2007', // Figure Space |
| 56 | + '\u2008', // Punctuation Space - <PUNCSP> |
| 57 | + '\u2009', // Thin Space |
| 58 | + '\u200A', // Hair Space |
| 59 | + '\u200B', // Zero Width Space - <ZWSP> |
| 60 | + '\u2028', // Line Separator |
| 61 | + '\u2029', // Paragraph Separator |
| 62 | + '\u202F', // Narrow No-Break Space |
| 63 | + '\u205F', // Medium Mathematical Space |
| 64 | + '\u3000' // Ideographic Space |
| 65 | + ]; |
| 66 | + private static readonly SearchValues<char> SpaceSearchValues = SearchValues.Create(IrregularSpaceChars); |
| 67 | + |
| 68 | + // Track which files have already had the hint emitted to avoid duplicates |
| 69 | + private static readonly HashSet<string> FilesWithHintEmitted = []; |
| 70 | + |
| 71 | + public SpaceNormalizerParser() => OpeningCharacters = IrregularSpaceChars; |
| 72 | + |
| 73 | + public override bool Match(InlineProcessor processor, ref StringSlice slice) |
| 74 | + { |
| 75 | + var span = slice.AsSpan().Slice(0, 1); |
| 76 | + if (span.IndexOfAny(SpaceSearchValues) == -1) |
| 77 | + return false; |
| 78 | + |
| 79 | + processor.Inline = IrregularSpace.Instance; |
| 80 | + |
| 81 | + // Emit a single hint per file on first detection |
| 82 | + var context = processor.GetContext(); |
| 83 | + var filePath = context.MarkdownSourcePath.FullName; |
| 84 | + |
| 85 | + lock (FilesWithHintEmitted) |
| 86 | + { |
| 87 | + if (!FilesWithHintEmitted.Contains(filePath)) |
| 88 | + { |
| 89 | + _ = FilesWithHintEmitted.Add(filePath); |
| 90 | + processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + slice.SkipChar(); |
| 95 | + return true; |
| 96 | + } |
| 97 | +} |
| 98 | + |
| 99 | +public class IrregularSpace : LeafInline |
| 100 | +{ |
| 101 | + public static readonly IrregularSpace Instance = new(); |
| 102 | +}; |
| 103 | + |
| 104 | +public class SpaceNormalizerRenderer : HtmlObjectRenderer<IrregularSpace> |
| 105 | +{ |
| 106 | + protected override void Write(HtmlRenderer renderer, IrregularSpace obj) => |
| 107 | + renderer.Write(' '); |
| 108 | +} |
0 commit comments