Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/_docset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ toc:
- file: index.md
- file: build.md
- file: diff-validate.md
- file: format.md
- file: index-command.md
- file: mv.md
- file: serve.md
Expand Down
83 changes: 83 additions & 0 deletions docs/cli/docset/format.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# format

Format documentation files by fixing common issues like irregular space

## Usage

```
docs-builder format [options...] [-h|--help] [--version]
```

## Options

`-p|--path` `<string>`
: Path to the documentation folder, defaults to pwd. (optional)

`--dry-run` `<bool?>`
: Preview changes without modifying files (optional)

## Description

The `format` command automatically detects and fixes formatting issues in your documentation files. The command only processes Markdown files (`.md`) that are included in your `_docset.yml` table of contents, ensuring that only intentional documentation files are modified.

Currently, it handles irregular space characters that may impair Markdown rendering.

### Irregular Space Detection

The format command detects and replaces 24 types of irregular space characters with regular spaces, including:

- No-Break Space (U+00A0)
- En Space (U+2002)
- Em Space (U+2003)
- Zero Width Space (U+200B)
- Line Separator (U+2028)
- Paragraph Separator (U+2029)
- And 18 other irregular space variants

These characters can cause unexpected rendering issues in Markdown and are often introduced accidentally through copy-paste operations from other applications.

## Examples

### Format current directory

```bash
docs-builder format
```

### Preview changes without modifying files

```bash
docs-builder format --dry-run
```

### Format specific documentation folder

```bash
docs-builder format --path /path/to/docs
```

## Output

The command provides detailed feedback about the formatting process:

```
Formatting documentation in: /path/to/docs
Fixed 2 irregular space(s) in: guide/setup.md
Fixed 1 irregular space(s) in: api/endpoints.md

Formatting complete:
Files processed: 155
Files modified: 2
Total replacements: 3
```

When using `--dry-run`, files are not modified and the command reminds you to run without the flag to apply changes.

## Future Enhancements

The format command is designed to be extended with additional formatting capabilities in the future, such as:

- Line ending normalization
- Trailing whitespace removal
- Consistent heading spacing
- And other formatting fixes
108 changes: 108 additions & 0 deletions src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Buffers;
using Elastic.Markdown.Diagnostics;
using Markdig;
using Markdig.Helpers;
using Markdig.Parsers;
using Markdig.Parsers.Inlines;
using Markdig.Renderers;
using Markdig.Renderers.Html;
using Markdig.Renderers.Html.Inlines;
using Markdig.Syntax.Inlines;

namespace Elastic.Markdown.Myst.Linters;

public static class SpaceNormalizerBuilderExtensions
{
public static MarkdownPipelineBuilder UseSpaceNormalizer(this MarkdownPipelineBuilder pipeline)
{
pipeline.Extensions.AddIfNotAlready<SpaceNormalizerBuilderExtension>();
return pipeline;
}
}

public class SpaceNormalizerBuilderExtension : IMarkdownExtension
{
public void Setup(MarkdownPipelineBuilder pipeline) =>
pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new SpaceNormalizerParser());

public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) =>
renderer.ObjectRenderers.InsertAfter<EmphasisInlineRenderer>(new SpaceNormalizerRenderer());
}

public class SpaceNormalizerParser : InlineParser
{
// Collection of irregular space characters that may impair Markdown rendering
private static readonly char[] IrregularSpaceChars =
[
'\u000B', // Line Tabulation (\v) - <VT>
'\u000C', // Form Feed (\f) - <FF>
'\u00A0', // No-Break Space - <NBSP>
'\u0085', // Next Line
'\u1680', // Ogham Space Mark
'\u180E', // Mongolian Vowel Separator - <MVS>
'\ufeff', // Zero Width No-Break Space - <BOM>
'\u2000', // En Quad
'\u2001', // Em Quad
'\u2002', // En Space - <ENSP>
'\u2003', // Em Space - <EMSP>
'\u2004', // Tree-Per-Em
'\u2005', // Four-Per-Em
'\u2006', // Six-Per-Em
'\u2007', // Figure Space
'\u2008', // Punctuation Space - <PUNCSP>
'\u2009', // Thin Space
'\u200A', // Hair Space
'\u200B', // Zero Width Space - <ZWSP>
'\u2028', // Line Separator
'\u2029', // Paragraph Separator
'\u202F', // Narrow No-Break Space
'\u205F', // Medium Mathematical Space
'\u3000' // Ideographic Space
];
private static readonly SearchValues<char> SpaceSearchValues = SearchValues.Create(IrregularSpaceChars);

// Track which files have already had the hint emitted to avoid duplicates
private static readonly HashSet<string> FilesWithHintEmitted = [];

public SpaceNormalizerParser() => OpeningCharacters = IrregularSpaceChars;

public override bool Match(InlineProcessor processor, ref StringSlice slice)
{
var span = slice.AsSpan().Slice(0, 1);
if (span.IndexOfAny(SpaceSearchValues) == -1)
return false;

processor.Inline = IrregularSpace.Instance;

// Emit a single hint per file on first detection
var context = processor.GetContext();
var filePath = context.MarkdownSourcePath.FullName;

lock (FilesWithHintEmitted)
{
if (!FilesWithHintEmitted.Contains(filePath))
{
_ = FilesWithHintEmitted.Add(filePath);
processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances.");
}
}

slice.SkipChar();
return true;
}
}

public class IrregularSpace : LeafInline
{
public static readonly IrregularSpace Instance = new();
};

public class SpaceNormalizerRenderer : HtmlObjectRenderer<IrregularSpace>
{
protected override void Write(HtmlRenderer renderer, IrregularSpace obj) =>
renderer.Write(' ');
}
127 changes: 0 additions & 127 deletions src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/Elastic.Markdown/Myst/MarkdownParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public static MarkdownPipeline Pipeline
.UseEnhancedCodeBlocks()
.UseHtmxLinkInlineRenderer()
.DisableHtml()
.UseWhiteSpaceNormalizer()
.UseSpaceNormalizer()
.UseHardBreaks();
_ = builder.BlockParsers.TryRemove<IndentedCodeBlockParser>();
PipelineCached = builder.Build();
Expand Down
Loading
Loading