Skip to content

Commit 8042702

Browse files
committed
Various refactors
1 parent c6ef6ad commit 8042702

File tree

10 files changed

+258
-198
lines changed

10 files changed

+258
-198
lines changed

docs/cli/docset/format.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# format
22

3-
Format documentation files by fixing common issues like irregular whitespace
3+
Format documentation files by fixing common issues like irregular space
44

55
## Usage
66

@@ -20,19 +20,19 @@ docs-builder format [options...] [-h|--help] [--version]
2020

2121
The `format` command automatically detects and fixes formatting issues in your documentation files. The command only processes Markdown files (`.md`) that are included in your `_docset.yml` table of contents, ensuring that only intentional documentation files are modified.
2222

23-
Currently, it handles irregular whitespace characters that may impair Markdown rendering.
23+
Currently, it handles irregular space characters that may impair Markdown rendering.
2424

25-
### Irregular Whitespace Detection
25+
### Irregular Space Detection
2626

27-
The format command detects and replaces 24 types of irregular whitespace characters with regular spaces, including:
27+
The format command detects and replaces 24 types of irregular space characters with regular spaces, including:
2828

2929
- No-Break Space (U+00A0)
3030
- En Space (U+2002)
3131
- Em Space (U+2003)
3232
- Zero Width Space (U+200B)
3333
- Line Separator (U+2028)
3434
- Paragraph Separator (U+2029)
35-
- And 18 other irregular whitespace variants
35+
- And 18 other irregular space variants
3636

3737
These characters can cause unexpected rendering issues in Markdown and are often introduced accidentally through copy-paste operations from other applications.
3838

@@ -62,8 +62,8 @@ The command provides detailed feedback about the formatting process:
6262

6363
```
6464
Formatting documentation in: /path/to/docs
65-
Fixed 2 irregular whitespace(s) in: guide/setup.md
66-
Fixed 1 irregular whitespace(s) in: api/endpoints.md
65+
Fixed 2 irregular space(s) in: guide/setup.md
66+
Fixed 1 irregular space(s) in: api/endpoints.md
6767
6868
Formatting complete:
6969
Files processed: 155

docs/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ navigation_title: Elastic Docs v3
44

55
# Welcome to Elastic Docs v3
66

7-
Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem.
7+
Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem.
88

9-
## What do you want to do today?
9+
## What do youwant to do today?
1010

1111
* [Contribute to Elastic documentation](./contribute/index.md)
1212
* [Learn about migration to Elastic Docs V3](./migration/index.md)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Buffers;
6+
using Elastic.Markdown.Diagnostics;
7+
using Markdig;
8+
using Markdig.Helpers;
9+
using Markdig.Parsers;
10+
using Markdig.Parsers.Inlines;
11+
using Markdig.Renderers;
12+
using Markdig.Renderers.Html;
13+
using Markdig.Renderers.Html.Inlines;
14+
using Markdig.Syntax.Inlines;
15+
16+
namespace Elastic.Markdown.Myst.Linters;
17+
18+
public static class SpaceNormalizerBuilderExtensions
19+
{
20+
public static MarkdownPipelineBuilder UseSpaceNormalizer(this MarkdownPipelineBuilder pipeline)
21+
{
22+
pipeline.Extensions.AddIfNotAlready<SpaceNormalizerBuilderExtension>();
23+
return pipeline;
24+
}
25+
}
26+
27+
public class SpaceNormalizerBuilderExtension : IMarkdownExtension
28+
{
29+
public void Setup(MarkdownPipelineBuilder pipeline) =>
30+
pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new SpaceNormalizerParser());
31+
32+
public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) =>
33+
renderer.ObjectRenderers.InsertAfter<EmphasisInlineRenderer>(new SpaceNormalizerRenderer());
34+
}
35+
36+
public class SpaceNormalizerParser : InlineParser
37+
{
38+
// Collection of irregular space characters that may impair Markdown rendering
39+
private static readonly char[] IrregularSpaceChars =
40+
[
41+
'\u000B', // Line Tabulation (\v) - <VT>
42+
'\u000C', // Form Feed (\f) - <FF>
43+
'\u00A0', // No-Break Space - <NBSP>
44+
'\u0085', // Next Line
45+
'\u1680', // Ogham Space Mark
46+
'\u180E', // Mongolian Vowel Separator - <MVS>
47+
'\ufeff', // Zero Width No-Break Space - <BOM>
48+
'\u2000', // En Quad
49+
'\u2001', // Em Quad
50+
'\u2002', // En Space - <ENSP>
51+
'\u2003', // Em Space - <EMSP>
52+
'\u2004', // Tree-Per-Em
53+
'\u2005', // Four-Per-Em
54+
'\u2006', // Six-Per-Em
55+
'\u2007', // Figure Space
56+
'\u2008', // Punctuation Space - <PUNCSP>
57+
'\u2009', // Thin Space
58+
'\u200A', // Hair Space
59+
'\u200B', // Zero Width Space - <ZWSP>
60+
'\u2028', // Line Separator
61+
'\u2029', // Paragraph Separator
62+
'\u202F', // Narrow No-Break Space
63+
'\u205F', // Medium Mathematical Space
64+
'\u3000' // Ideographic Space
65+
];
66+
private static readonly SearchValues<char> SpaceSearchValues = SearchValues.Create(IrregularSpaceChars);
67+
68+
// Track which files have already had the hint emitted to avoid duplicates
69+
private static readonly HashSet<string> FilesWithHintEmitted = [];
70+
71+
public SpaceNormalizerParser() => OpeningCharacters = IrregularSpaceChars;
72+
73+
public override bool Match(InlineProcessor processor, ref StringSlice slice)
74+
{
75+
var span = slice.AsSpan().Slice(0, 1);
76+
if (span.IndexOfAny(SpaceSearchValues) == -1)
77+
return false;
78+
79+
processor.Inline = IrregularSpace.Instance;
80+
81+
// Emit a single hint per file on first detection
82+
var context = processor.GetContext();
83+
var filePath = context.MarkdownSourcePath.FullName;
84+
85+
lock (FilesWithHintEmitted)
86+
{
87+
if (!FilesWithHintEmitted.Contains(filePath))
88+
{
89+
_ = FilesWithHintEmitted.Add(filePath);
90+
processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances.");
91+
}
92+
}
93+
94+
slice.SkipChar();
95+
return true;
96+
}
97+
}
98+
99+
public class IrregularSpace : LeafInline
100+
{
101+
public static readonly IrregularSpace Instance = new();
102+
};
103+
104+
public class SpaceNormalizerRenderer : HtmlObjectRenderer<IrregularSpace>
105+
{
106+
protected override void Write(HtmlRenderer renderer, IrregularSpace obj) =>
107+
renderer.Write(' ');
108+
}

src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs

Lines changed: 0 additions & 127 deletions
This file was deleted.

src/Elastic.Markdown/Myst/MarkdownParser.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ public static MarkdownPipeline Pipeline
169169
.UseEnhancedCodeBlocks()
170170
.UseHtmxLinkInlineRenderer()
171171
.DisableHtml()
172-
.UseWhiteSpaceNormalizer()
172+
.UseSpaceNormalizer()
173173
.UseHardBreaks();
174174
_ = builder.BlockParsers.TryRemove<IndentedCodeBlockParser>();
175175
PipelineCached = builder.Build();

0 commit comments

Comments
 (0)