From 22ac68c1ad8684669cd8b4a6644f0afd91ad81e7 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 11:04:14 +0200 Subject: [PATCH 1/8] Add format command --- .../FormatService.cs | 156 ++++++++++++++++++ .../docs-builder/Commands/FormatCommand.cs | 43 +++++ src/tooling/docs-builder/Program.cs | 1 + 3 files changed, 200 insertions(+) create mode 100644 src/authoring/Elastic.Documentation.Refactor/FormatService.cs create mode 100644 src/tooling/docs-builder/Commands/FormatCommand.cs diff --git a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs new file mode 100644 index 000000000..ac55f0d04 --- /dev/null +++ b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs @@ -0,0 +1,156 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Buffers; +using System.IO.Abstractions; +using System.Text; +using Elastic.Documentation.Configuration; +using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Services; +using Microsoft.Extensions.Logging; + +namespace Elastic.Documentation.Refactor; + +public class FormatService( + ILoggerFactory logFactory +) : IService +{ + private readonly ILogger _logger = logFactory.CreateLogger(); + + // Collection of irregular whitespace characters that may impair Markdown rendering + private static readonly char[] IrregularWhitespaceChars = + [ + '\u000B', // Line Tabulation (\v) - + '\u000C', // Form Feed (\f) - + '\u00A0', // No-Break Space - + '\u0085', // Next Line + '\u1680', // Ogham Space Mark + '\u180E', // Mongolian Vowel Separator - + '\ufeff', // Zero Width No-Break Space - + '\u2000', // En Quad + '\u2001', // Em Quad + '\u2002', // En Space - + '\u2003', // Em Space - + '\u2004', // Tree-Per-Em + '\u2005', // Four-Per-Em + '\u2006', // Six-Per-Em + '\u2007', // Figure Space + '\u2008', // Punctuation Space - + '\u2009', // Thin Space + '\u200A', // Hair Space + '\u200B', // Zero Width Space - + '\u2028', // Line Separator + '\u2029', // Paragraph Separator + '\u202F', // Narrow No-Break Space + '\u205F', // Medium Mathematical Space + '\u3000' // Ideographic Space + ]; + + private static readonly SearchValues IrregularWhitespaceSearchValues = SearchValues.Create(IrregularWhitespaceChars); + + public async Task Format( + IDiagnosticsCollector collector, + string? path, + bool? dryRun, + IFileSystem fs, + Cancel ctx + ) + { + var isDryRun = dryRun ?? false; + var rootPath = string.IsNullOrEmpty(path) ? fs.Directory.GetCurrentDirectory() : path; + var rootDir = fs.DirectoryInfo.New(rootPath); + + if (!rootDir.Exists) + { + collector.EmitError(string.Empty, $"Directory not found: {rootPath}"); + return false; + } + + _logger.LogInformation("Formatting documentation in: {Path}", rootDir.FullName); + if (isDryRun) + _logger.LogInformation("Running in dry-run mode - no files will be modified"); + + var markdownFiles = rootDir.GetFiles("*.md", SearchOption.AllDirectories); + var totalFilesProcessed = 0; + var totalFilesModified = 0; + var totalReplacements = 0; + + foreach (var file in markdownFiles) + { + if (ctx.IsCancellationRequested) + break; + + totalFilesProcessed++; + var (modified, replacements) = await ProcessFile(file, isDryRun, fs); + + if (modified) + { + totalFilesModified++; + totalReplacements += replacements; + _logger.LogInformation("Fixed {Count} irregular whitespace(s) in: {File}", replacements, GetRelativePath(rootDir, file)); + } + } + + _logger.LogInformation(""); + _logger.LogInformation("Formatting complete:"); + _logger.LogInformation(" Files processed: {Processed}", totalFilesProcessed); + _logger.LogInformation(" Files modified: {Modified}", totalFilesModified); + _logger.LogInformation(" Total replacements: {Replacements}", totalReplacements); + + if (isDryRun && totalFilesModified > 0) + { + _logger.LogInformation(""); + _logger.LogInformation("Run without --dry-run to apply changes"); + } + + return true; + } + + private static async Task<(bool modified, int replacements)> ProcessFile(IFileInfo file, bool isDryRun, IFileSystem fs) + { + var content = await fs.File.ReadAllTextAsync(file.FullName); + var modified = false; + var replacements = 0; + + // Check if file contains any irregular whitespace + if (content.AsSpan().IndexOfAny(IrregularWhitespaceSearchValues) == -1) + return (false, 0); + + // Replace irregular whitespace with regular spaces + var sb = new StringBuilder(content.Length); + foreach (var c in content) + { + if (IrregularWhitespaceSearchValues.Contains(c)) + { + _ = sb.Append(' '); + replacements++; + modified = true; + } + else + { + _ = sb.Append(c); + } + } + + if (modified && !isDryRun) + { + await fs.File.WriteAllTextAsync(file.FullName, sb.ToString()); + } + + return (modified, replacements); + } + + private static string GetRelativePath(IDirectoryInfo root, IFileInfo file) + { + var rootPath = root.FullName.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + var filePath = file.FullName; + + if (filePath.StartsWith(rootPath, StringComparison.OrdinalIgnoreCase)) + { + return filePath.Substring(rootPath.Length).TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + } + + return filePath; + } +} diff --git a/src/tooling/docs-builder/Commands/FormatCommand.cs b/src/tooling/docs-builder/Commands/FormatCommand.cs new file mode 100644 index 000000000..5ed8616a9 --- /dev/null +++ b/src/tooling/docs-builder/Commands/FormatCommand.cs @@ -0,0 +1,43 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.IO.Abstractions; +using ConsoleAppFramework; +using Elastic.Documentation.Configuration; +using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Refactor; +using Elastic.Documentation.Services; +using Microsoft.Extensions.Logging; + +namespace Documentation.Builder.Commands; + +internal sealed class FormatCommand( + ILoggerFactory logFactory, + IDiagnosticsCollector collector +) +{ + /// + /// Format documentation files by fixing common issues like irregular whitespace + /// + /// -p, Path to the documentation folder, defaults to pwd + /// Preview changes without modifying files + /// + [Command("")] + public async Task Format( + string? path = null, + bool? dryRun = null, + Cancel ctx = default + ) + { + await using var serviceInvoker = new ServiceInvoker(collector); + + var service = new FormatService(logFactory); + var fs = new FileSystem(); + + serviceInvoker.AddCommand(service, (path, dryRun, fs), + async static (s, collector, state, ctx) => await s.Format(collector, state.path, state.dryRun, state.fs, ctx) + ); + return await serviceInvoker.InvokeAsync(ctx); + } +} diff --git a/src/tooling/docs-builder/Program.cs b/src/tooling/docs-builder/Program.cs index 31f6225f4..5e91ca90d 100644 --- a/src/tooling/docs-builder/Program.cs +++ b/src/tooling/docs-builder/Program.cs @@ -38,6 +38,7 @@ app.Add("mv"); app.Add("serve"); app.Add("index"); +app.Add("format"); //assembler commands From 88ac8742431aa4be32ea665bb214e28e3bf05dbb Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 11:10:18 +0200 Subject: [PATCH 2/8] Add docs --- docs/cli/docset/format.md | 81 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 docs/cli/docset/format.md diff --git a/docs/cli/docset/format.md b/docs/cli/docset/format.md new file mode 100644 index 000000000..24d6039c6 --- /dev/null +++ b/docs/cli/docset/format.md @@ -0,0 +1,81 @@ +# format + +Format documentation files by fixing common issues like irregular whitespace + +## Usage + +``` +docs-builder format [options...] [-h|--help] [--version] +``` + +## Options + +`-p|--path` `` +: Path to the documentation folder, defaults to pwd. (optional) + +`--dry-run` `` +: Preview changes without modifying files (optional) + +## Description + +The `format` command automatically detects and fixes formatting issues in your documentation files. Currently, it handles irregular whitespace characters that may impair Markdown rendering. + +### Irregular Whitespace Detection + +The format command detects and replaces 24 types of irregular whitespace characters with regular spaces, including: + +- No-Break Space (U+00A0) +- En Space (U+2002) +- Em Space (U+2003) +- Zero Width Space (U+200B) +- Line Separator (U+2028) +- Paragraph Separator (U+2029) +- And 18 other irregular whitespace variants + +These characters can cause unexpected rendering issues in Markdown and are often introduced accidentally through copy-paste operations from other applications. + +## Examples + +### Format current directory + +```bash +docs-builder format +``` + +### Preview changes without modifying files + +```bash +docs-builder format --dry-run +``` + +### Format specific documentation folder + +```bash +docs-builder format --path /path/to/docs +``` + +## Output + +The command provides detailed feedback about the formatting process: + +``` +Formatting documentation in: /path/to/docs +Fixed 2 irregular whitespace(s) in: guide/setup.md +Fixed 1 irregular whitespace(s) in: api/endpoints.md + +Formatting complete: + Files processed: 155 + Files modified: 2 + Total replacements: 3 +``` + +When using `--dry-run`, files are not modified and the command reminds you to run without the flag to apply changes. + +## Future Enhancements + +The format command is designed to be extended with additional formatting capabilities in the future, such as: + +- Line ending normalization +- Trailing whitespace removal +- Consistent heading spacing +- And other formatting fixes From c6ef6adb8a4db24ef501560ce306eeacae1ecbd0 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 11:22:37 +0200 Subject: [PATCH 3/8] Have the command parse docset --- docs/_docset.yml | 1 + docs/cli/docset/format.md | 4 +- .../FormatService.cs | 38 ++++++------------- .../docs-builder/Commands/FormatCommand.cs | 5 ++- 4 files changed, 19 insertions(+), 29 deletions(-) diff --git a/docs/_docset.yml b/docs/_docset.yml index ed1e8aa7d..4c87c4e85 100644 --- a/docs/_docset.yml +++ b/docs/_docset.yml @@ -123,6 +123,7 @@ toc: - file: index.md - file: build.md - file: diff-validate.md + - file: format.md - file: index-command.md - file: mv.md - file: serve.md diff --git a/docs/cli/docset/format.md b/docs/cli/docset/format.md index 24d6039c6..7972e812f 100644 --- a/docs/cli/docset/format.md +++ b/docs/cli/docset/format.md @@ -18,7 +18,9 @@ docs-builder format [options...] [-h|--help] [--version] ## Description -The `format` command automatically detects and fixes formatting issues in your documentation files. Currently, it handles irregular whitespace characters that may impair Markdown rendering. +The `format` command automatically detects and fixes formatting issues in your documentation files. The command only processes Markdown files (`.md`) that are included in your `_docset.yml` table of contents, ensuring that only intentional documentation files are modified. + +Currently, it handles irregular whitespace characters that may impair Markdown rendering. ### Irregular Whitespace Detection diff --git a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs index ac55f0d04..3157c8785 100644 --- a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs +++ b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs @@ -7,13 +7,16 @@ using System.Text; using Elastic.Documentation.Configuration; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Links.CrossLinks; using Elastic.Documentation.Services; +using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; namespace Elastic.Documentation.Refactor; public class FormatService( - ILoggerFactory logFactory + ILoggerFactory logFactory, + IConfigurationContext configurationContext ) : IService { private readonly ILogger _logger = logFactory.CreateLogger(); @@ -58,37 +61,33 @@ Cancel ctx ) { var isDryRun = dryRun ?? false; - var rootPath = string.IsNullOrEmpty(path) ? fs.Directory.GetCurrentDirectory() : path; - var rootDir = fs.DirectoryInfo.New(rootPath); - if (!rootDir.Exists) - { - collector.EmitError(string.Empty, $"Directory not found: {rootPath}"); - return false; - } + // Create BuildContext to load the documentation set + var context = new BuildContext(collector, fs, fs, configurationContext, ExportOptions.MetadataOnly, path, null); + var set = new DocumentationSet(context, logFactory, NoopCrossLinkResolver.Instance); - _logger.LogInformation("Formatting documentation in: {Path}", rootDir.FullName); + _logger.LogInformation("Formatting documentation in: {Path}", set.SourceDirectory.FullName); if (isDryRun) _logger.LogInformation("Running in dry-run mode - no files will be modified"); - var markdownFiles = rootDir.GetFiles("*.md", SearchOption.AllDirectories); var totalFilesProcessed = 0; var totalFilesModified = 0; var totalReplacements = 0; - foreach (var file in markdownFiles) + // Only process markdown files that are part of the documentation set + foreach (var docFile in set.Files.OfType()) { if (ctx.IsCancellationRequested) break; totalFilesProcessed++; - var (modified, replacements) = await ProcessFile(file, isDryRun, fs); + var (modified, replacements) = await ProcessFile(docFile.SourceFile, isDryRun, fs); if (modified) { totalFilesModified++; totalReplacements += replacements; - _logger.LogInformation("Fixed {Count} irregular whitespace(s) in: {File}", replacements, GetRelativePath(rootDir, file)); + _logger.LogInformation("Fixed {Count} irregular whitespace(s) in: {File}", replacements, docFile.RelativePath); } } @@ -140,17 +139,4 @@ Cancel ctx return (modified, replacements); } - - private static string GetRelativePath(IDirectoryInfo root, IFileInfo file) - { - var rootPath = root.FullName.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); - var filePath = file.FullName; - - if (filePath.StartsWith(rootPath, StringComparison.OrdinalIgnoreCase)) - { - return filePath.Substring(rootPath.Length).TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); - } - - return filePath; - } } diff --git a/src/tooling/docs-builder/Commands/FormatCommand.cs b/src/tooling/docs-builder/Commands/FormatCommand.cs index 5ed8616a9..076fc3010 100644 --- a/src/tooling/docs-builder/Commands/FormatCommand.cs +++ b/src/tooling/docs-builder/Commands/FormatCommand.cs @@ -14,7 +14,8 @@ namespace Documentation.Builder.Commands; internal sealed class FormatCommand( ILoggerFactory logFactory, - IDiagnosticsCollector collector + IDiagnosticsCollector collector, + IConfigurationContext configurationContext ) { /// @@ -32,7 +33,7 @@ public async Task Format( { await using var serviceInvoker = new ServiceInvoker(collector); - var service = new FormatService(logFactory); + var service = new FormatService(logFactory, configurationContext); var fs = new FileSystem(); serviceInvoker.AddCommand(service, (path, dryRun, fs), From 8042702dc7efce66bf3dd7e1b645cb472edc2c1b Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 11:36:51 +0200 Subject: [PATCH 4/8] Various refactors --- docs/cli/docset/format.md | 14 +- docs/index.md | 4 +- .../Myst/Linters/SpaceNormalizer.cs | 108 +++++++++++++++ .../Myst/Linters/WhiteSpaceNormalizer.cs | 127 ------------------ src/Elastic.Markdown/Myst/MarkdownParser.cs | 2 +- .../FormatService.cs | 97 ++++++------- .../Formatters/IFormatter.cs | 23 ++++ .../Formatters/IrregularSpaceFormatter.cs | 73 ++++++++++ ...paceNormalizers.fs => SpaceNormalizers.fs} | 6 +- tests/authoring/authoring.fsproj | 2 +- 10 files changed, 258 insertions(+), 198 deletions(-) create mode 100644 src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs delete mode 100644 src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs create mode 100644 src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs create mode 100644 src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs rename tests/authoring/Linters/{WhiteSpaceNormalizers.fs => SpaceNormalizers.fs} (68%) diff --git a/docs/cli/docset/format.md b/docs/cli/docset/format.md index 7972e812f..9011cb56b 100644 --- a/docs/cli/docset/format.md +++ b/docs/cli/docset/format.md @@ -1,6 +1,6 @@ # format -Format documentation files by fixing common issues like irregular whitespace +Format documentation files by fixing common issues like irregular space ## Usage @@ -20,11 +20,11 @@ docs-builder format [options...] [-h|--help] [--version] The `format` command automatically detects and fixes formatting issues in your documentation files. The command only processes Markdown files (`.md`) that are included in your `_docset.yml` table of contents, ensuring that only intentional documentation files are modified. -Currently, it handles irregular whitespace characters that may impair Markdown rendering. +Currently, it handles irregular space characters that may impair Markdown rendering. -### Irregular Whitespace Detection +### Irregular Space Detection -The format command detects and replaces 24 types of irregular whitespace characters with regular spaces, including: +The format command detects and replaces 24 types of irregular space characters with regular spaces, including: - No-Break Space (U+00A0) - En Space (U+2002) @@ -32,7 +32,7 @@ The format command detects and replaces 24 types of irregular whitespace charact - Zero Width Space (U+200B) - Line Separator (U+2028) - Paragraph Separator (U+2029) -- And 18 other irregular whitespace variants +- And 18 other irregular space variants These characters can cause unexpected rendering issues in Markdown and are often introduced accidentally through copy-paste operations from other applications. @@ -62,8 +62,8 @@ The command provides detailed feedback about the formatting process: ``` Formatting documentation in: /path/to/docs -Fixed 2 irregular whitespace(s) in: guide/setup.md -Fixed 1 irregular whitespace(s) in: api/endpoints.md +Fixed 2 irregular space(s) in: guide/setup.md +Fixed 1 irregular space(s) in: api/endpoints.md Formatting complete: Files processed: 155 diff --git a/docs/index.md b/docs/index.md index 7e999138f..33b55419e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,9 +4,9 @@ navigation_title: Elastic Docs v3 # Welcome to Elastic Docs v3 -Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. +Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. -## What do you want to do today? +## What do you want to do today? * [Contribute to Elastic documentation](./contribute/index.md) * [Learn about migration to Elastic Docs V3](./migration/index.md) diff --git a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs new file mode 100644 index 000000000..ebef4138e --- /dev/null +++ b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs @@ -0,0 +1,108 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Buffers; +using Elastic.Markdown.Diagnostics; +using Markdig; +using Markdig.Helpers; +using Markdig.Parsers; +using Markdig.Parsers.Inlines; +using Markdig.Renderers; +using Markdig.Renderers.Html; +using Markdig.Renderers.Html.Inlines; +using Markdig.Syntax.Inlines; + +namespace Elastic.Markdown.Myst.Linters; + +public static class SpaceNormalizerBuilderExtensions +{ + public static MarkdownPipelineBuilder UseSpaceNormalizer(this MarkdownPipelineBuilder pipeline) + { + pipeline.Extensions.AddIfNotAlready(); + return pipeline; + } +} + +public class SpaceNormalizerBuilderExtension : IMarkdownExtension +{ + public void Setup(MarkdownPipelineBuilder pipeline) => + pipeline.InlineParsers.InsertBefore(new SpaceNormalizerParser()); + + public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) => + renderer.ObjectRenderers.InsertAfter(new SpaceNormalizerRenderer()); +} + +public class SpaceNormalizerParser : InlineParser +{ + // Collection of irregular space characters that may impair Markdown rendering + private static readonly char[] IrregularSpaceChars = + [ + '\u000B', // Line Tabulation (\v) - + '\u000C', // Form Feed (\f) - + '\u00A0', // No-Break Space - + '\u0085', // Next Line + '\u1680', // Ogham Space Mark + '\u180E', // Mongolian Vowel Separator - + '\ufeff', // Zero Width No-Break Space - + '\u2000', // En Quad + '\u2001', // Em Quad + '\u2002', // En Space - + '\u2003', // Em Space - + '\u2004', // Tree-Per-Em + '\u2005', // Four-Per-Em + '\u2006', // Six-Per-Em + '\u2007', // Figure Space + '\u2008', // Punctuation Space - + '\u2009', // Thin Space + '\u200A', // Hair Space + '\u200B', // Zero Width Space - + '\u2028', // Line Separator + '\u2029', // Paragraph Separator + '\u202F', // Narrow No-Break Space + '\u205F', // Medium Mathematical Space + '\u3000' // Ideographic Space + ]; + private static readonly SearchValues SpaceSearchValues = SearchValues.Create(IrregularSpaceChars); + + // Track which files have already had the hint emitted to avoid duplicates + private static readonly HashSet FilesWithHintEmitted = []; + + public SpaceNormalizerParser() => OpeningCharacters = IrregularSpaceChars; + + public override bool Match(InlineProcessor processor, ref StringSlice slice) + { + var span = slice.AsSpan().Slice(0, 1); + if (span.IndexOfAny(SpaceSearchValues) == -1) + return false; + + processor.Inline = IrregularSpace.Instance; + + // Emit a single hint per file on first detection + var context = processor.GetContext(); + var filePath = context.MarkdownSourcePath.FullName; + + lock (FilesWithHintEmitted) + { + if (!FilesWithHintEmitted.Contains(filePath)) + { + _ = FilesWithHintEmitted.Add(filePath); + processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); + } + } + + slice.SkipChar(); + return true; + } +} + +public class IrregularSpace : LeafInline +{ + public static readonly IrregularSpace Instance = new(); +}; + +public class SpaceNormalizerRenderer : HtmlObjectRenderer +{ + protected override void Write(HtmlRenderer renderer, IrregularSpace obj) => + renderer.Write(' '); +} diff --git a/src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs b/src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs deleted file mode 100644 index 25af91bef..000000000 --- a/src/Elastic.Markdown/Myst/Linters/WhiteSpaceNormalizer.cs +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.Buffers; -using Elastic.Markdown.Diagnostics; -using Markdig; -using Markdig.Helpers; -using Markdig.Parsers; -using Markdig.Parsers.Inlines; -using Markdig.Renderers; -using Markdig.Renderers.Html; -using Markdig.Renderers.Html.Inlines; -using Markdig.Syntax.Inlines; - -namespace Elastic.Markdown.Myst.Linters; - -public static class WhiteSpaceNormalizerBuilderExtensions -{ - public static MarkdownPipelineBuilder UseWhiteSpaceNormalizer(this MarkdownPipelineBuilder pipeline) - { - pipeline.Extensions.AddIfNotAlready(); - return pipeline; - } -} - -public class WhiteSpaceNormalizerBuilderExtension : IMarkdownExtension -{ - public void Setup(MarkdownPipelineBuilder pipeline) => - pipeline.InlineParsers.InsertBefore(new WhiteSpaceNormalizerParser()); - - public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) => - renderer.ObjectRenderers.InsertAfter(new WhiteSpaceNormalizerRenderer()); -} - -public class WhiteSpaceNormalizerParser : InlineParser -{ - // Collection of irregular whitespace characters that may impair Markdown rendering - private static readonly char[] IrregularWhitespaceChars = - [ - '\u000B', // Line Tabulation (\v) - - '\u000C', // Form Feed (\f) - - '\u00A0', // No-Break Space - - '\u0085', // Next Line - '\u1680', // Ogham Space Mark - '\u180E', // Mongolian Vowel Separator - - '\ufeff', // Zero Width No-Break Space - - '\u2000', // En Quad - '\u2001', // Em Quad - '\u2002', // En Space - - '\u2003', // Em Space - - '\u2004', // Tree-Per-Em - '\u2005', // Four-Per-Em - '\u2006', // Six-Per-Em - '\u2007', // Figure Space - '\u2008', // Punctuation Space - - '\u2009', // Thin Space - '\u200A', // Hair Space - '\u200B', // Zero Width Space - - '\u2028', // Line Separator - '\u2029', // Paragraph Separator - '\u202F', // Narrow No-Break Space - '\u205F', // Medium Mathematical Space - '\u3000' // Ideographic Space - ]; - private static readonly SearchValues WhiteSpaceSearchValues = SearchValues.Create(IrregularWhitespaceChars); - - public WhiteSpaceNormalizerParser() => OpeningCharacters = IrregularWhitespaceChars; - - public override bool Match(InlineProcessor processor, ref StringSlice slice) - { - var span = slice.AsSpan().Slice(0, 1); - if (span.IndexOfAny(WhiteSpaceSearchValues) == -1) - return false; - - processor.Inline = IrregularWhiteSpace.Instance; - - var c = span[0]; - var charName = GetCharacterName(c); - - processor.EmitHint(processor.Inline, 1, $"Irregular whitespace character detected: U+{(int)c:X4} ({charName}). This may impair Markdown rendering."); - - slice.SkipChar(); - return true; - } - - // Helper to get a friendly name for the whitespace character - private static string GetCharacterName(char c) => c switch - { - '\u000B' => "Line Tabulation (VT)", - '\u000C' => "Form Feed (FF)", - '\u00A0' => "No-Break Space (NBSP)", - '\u0085' => "Next Line", - '\u1680' => "Ogham Space Mark", - '\u180E' => "Mongolian Vowel Separator (MVS)", - '\ufeff' => "Zero Width No-Break Space (BOM)", - '\u2000' => "En Quad", - '\u2001' => "Em Quad", - '\u2002' => "En Space (ENSP)", - '\u2003' => "Em Space (EMSP)", - '\u2004' => "Tree-Per-Em", - '\u2005' => "Four-Per-Em", - '\u2006' => "Six-Per-Em", - '\u2007' => "Figure Space", - '\u2008' => "Punctuation Space (PUNCSP)", - '\u2009' => "Thin Space", - '\u200A' => "Hair Space", - '\u200B' => "Zero Width Space (ZWSP)", - '\u2028' => "Line Separator", - '\u2029' => "Paragraph Separator", - '\u202F' => "Narrow No-Break Space", - '\u205F' => "Medium Mathematical Space", - '\u3000' => "Ideographic Space", - _ => "Unknown" - }; -} - -public class IrregularWhiteSpace : LeafInline -{ - public static readonly IrregularWhiteSpace Instance = new(); -}; - -public class WhiteSpaceNormalizerRenderer : HtmlObjectRenderer -{ - protected override void Write(HtmlRenderer renderer, IrregularWhiteSpace obj) => - renderer.Write(' '); -} diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs index 8eb3dde01..1c530fba4 100644 --- a/src/Elastic.Markdown/Myst/MarkdownParser.cs +++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs @@ -169,7 +169,7 @@ public static MarkdownPipeline Pipeline .UseEnhancedCodeBlocks() .UseHtmxLinkInlineRenderer() .DisableHtml() - .UseWhiteSpaceNormalizer() + .UseSpaceNormalizer() .UseHardBreaks(); _ = builder.BlockParsers.TryRemove(); PipelineCached = builder.Build(); diff --git a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs index 3157c8785..0d91793e4 100644 --- a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs +++ b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs @@ -2,12 +2,11 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System.Buffers; using System.IO.Abstractions; -using System.Text; using Elastic.Documentation.Configuration; using Elastic.Documentation.Diagnostics; using Elastic.Documentation.Links.CrossLinks; +using Elastic.Documentation.Refactor.Formatters; using Elastic.Documentation.Services; using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; @@ -21,37 +20,16 @@ IConfigurationContext configurationContext { private readonly ILogger _logger = logFactory.CreateLogger(); - // Collection of irregular whitespace characters that may impair Markdown rendering - private static readonly char[] IrregularWhitespaceChars = + // List of formatters to apply - easily extensible for future formatting operations + private static readonly IFormatter[] Formatters = [ - '\u000B', // Line Tabulation (\v) - - '\u000C', // Form Feed (\f) - - '\u00A0', // No-Break Space - - '\u0085', // Next Line - '\u1680', // Ogham Space Mark - '\u180E', // Mongolian Vowel Separator - - '\ufeff', // Zero Width No-Break Space - - '\u2000', // En Quad - '\u2001', // Em Quad - '\u2002', // En Space - - '\u2003', // Em Space - - '\u2004', // Tree-Per-Em - '\u2005', // Four-Per-Em - '\u2006', // Six-Per-Em - '\u2007', // Figure Space - '\u2008', // Punctuation Space - - '\u2009', // Thin Space - '\u200A', // Hair Space - '\u200B', // Zero Width Space - - '\u2028', // Line Separator - '\u2029', // Paragraph Separator - '\u202F', // Narrow No-Break Space - '\u205F', // Medium Mathematical Space - '\u3000' // Ideographic Space + new IrregularSpaceFormatter() + // Future formatters can be added here: + // new TrailingWhitespaceFormatter(), + // new LineEndingFormatter(), + // etc. ]; - private static readonly SearchValues IrregularWhitespaceSearchValues = SearchValues.Create(IrregularWhitespaceChars); - public async Task Format( IDiagnosticsCollector collector, string? path, @@ -72,7 +50,11 @@ Cancel ctx var totalFilesProcessed = 0; var totalFilesModified = 0; - var totalReplacements = 0; + var formatterStats = new Dictionary(); + + // Initialize stats for each formatter + foreach (var formatter in Formatters) + formatterStats[formatter.Name] = 0; // Only process markdown files that are part of the documentation set foreach (var docFile in set.Files.OfType()) @@ -81,13 +63,12 @@ Cancel ctx break; totalFilesProcessed++; - var (modified, replacements) = await ProcessFile(docFile.SourceFile, isDryRun, fs); + var (modified, changes) = await ProcessFile(docFile.SourceFile, isDryRun, fs, formatterStats); if (modified) { totalFilesModified++; - totalReplacements += replacements; - _logger.LogInformation("Fixed {Count} irregular whitespace(s) in: {File}", replacements, docFile.RelativePath); + _logger.LogInformation("Formatted {File} ({Changes} change(s))", docFile.RelativePath, changes); } } @@ -95,7 +76,10 @@ Cancel ctx _logger.LogInformation("Formatting complete:"); _logger.LogInformation(" Files processed: {Processed}", totalFilesProcessed); _logger.LogInformation(" Files modified: {Modified}", totalFilesModified); - _logger.LogInformation(" Total replacements: {Replacements}", totalReplacements); + + // Log stats for each formatter that made changes + foreach (var (formatterName, changeCount) in formatterStats.Where(kvp => kvp.Value > 0)) + _logger.LogInformation(" {Formatter} fixes: {Count}", formatterName, changeCount); if (isDryRun && totalFilesModified > 0) { @@ -106,37 +90,36 @@ Cancel ctx return true; } - private static async Task<(bool modified, int replacements)> ProcessFile(IFileInfo file, bool isDryRun, IFileSystem fs) + private static async Task<(bool modified, int totalChanges)> ProcessFile( + IFileInfo file, + bool isDryRun, + IFileSystem fs, + Dictionary stats + ) { var content = await fs.File.ReadAllTextAsync(file.FullName); - var modified = false; - var replacements = 0; - - // Check if file contains any irregular whitespace - if (content.AsSpan().IndexOfAny(IrregularWhitespaceSearchValues) == -1) - return (false, 0); + var originalContent = content; + var totalChanges = 0; - // Replace irregular whitespace with regular spaces - var sb = new StringBuilder(content.Length); - foreach (var c in content) + // Apply each formatter in sequence + foreach (var formatter in Formatters) { - if (IrregularWhitespaceSearchValues.Contains(c)) - { - _ = sb.Append(' '); - replacements++; - modified = true; - } - else + var (formattedContent, changes) = formatter.Format(content); + + if (changes > 0) { - _ = sb.Append(c); + content = formattedContent; + totalChanges += changes; + stats[formatter.Name] += changes; } } + var modified = content != originalContent; + + // Only write if content changed and not in dry-run mode if (modified && !isDryRun) - { - await fs.File.WriteAllTextAsync(file.FullName, sb.ToString()); - } + await fs.File.WriteAllTextAsync(file.FullName, content); - return (modified, replacements); + return (modified, totalChanges); } } diff --git a/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs b/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs new file mode 100644 index 000000000..ebc9923f3 --- /dev/null +++ b/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs @@ -0,0 +1,23 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +namespace Elastic.Documentation.Refactor.Formatters; + +/// +/// Defines a formatter that can process and modify file content +/// +public interface IFormatter +{ + /// + /// Gets the name of this formatter for logging purposes + /// + string Name { get; } + + /// + /// Formats the content and returns the modified content along with the number of changes made + /// + /// The content to format + /// A tuple containing the formatted content and the number of changes made + (string content, int changes) Format(string content); +} diff --git a/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs b/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs new file mode 100644 index 000000000..db74cd836 --- /dev/null +++ b/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs @@ -0,0 +1,73 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Buffers; +using System.Text; + +namespace Elastic.Documentation.Refactor.Formatters; + +/// +/// Formatter that replaces irregular space characters with regular spaces +/// +public class IrregularSpaceFormatter : IFormatter +{ + public string Name => "irregular space"; + + // Collection of irregular space characters that may impair Markdown rendering + private static readonly char[] IrregularSpaceChars = + [ + '\u000B', // Line Tabulation (\v) - + '\u000C', // Form Feed (\f) - + '\u00A0', // No-Break Space - + '\u0085', // Next Line + '\u1680', // Ogham Space Mark + '\u180E', // Mongolian Vowel Separator - + '\ufeff', // Zero Width No-Break Space - + '\u2000', // En Quad + '\u2001', // Em Quad + '\u2002', // En Space - + '\u2003', // Em Space - + '\u2004', // Tree-Per-Em + '\u2005', // Four-Per-Em + '\u2006', // Six-Per-Em + '\u2007', // Figure Space + '\u2008', // Punctuation Space - + '\u2009', // Thin Space + '\u200A', // Hair Space + '\u200B', // Zero Width Space - + '\u2028', // Line Separator + '\u2029', // Paragraph Separator + '\u202F', // Narrow No-Break Space + '\u205F', // Medium Mathematical Space + '\u3000' // Ideographic Space + ]; + + private static readonly SearchValues IrregularSpaceSearchValues = SearchValues.Create(IrregularSpaceChars); + + public (string content, int changes) Format(string content) + { + // Quick check - if no irregular space, return original + if (content.AsSpan().IndexOfAny(IrregularSpaceSearchValues) == -1) + return (content, 0); + + // Replace irregular space with regular spaces + var sb = new StringBuilder(content.Length); + var replacements = 0; + + foreach (var c in content) + { + if (IrregularSpaceSearchValues.Contains(c)) + { + _ = sb.Append(' '); + replacements++; + } + else + { + _ = sb.Append(c); + } + } + + return (sb.ToString(), replacements); + } +} diff --git a/tests/authoring/Linters/WhiteSpaceNormalizers.fs b/tests/authoring/Linters/SpaceNormalizers.fs similarity index 68% rename from tests/authoring/Linters/WhiteSpaceNormalizers.fs rename to tests/authoring/Linters/SpaceNormalizers.fs index 8d9391148..c501e0f64 100644 --- a/tests/authoring/Linters/WhiteSpaceNormalizers.fs +++ b/tests/authoring/Linters/SpaceNormalizers.fs @@ -2,13 +2,13 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -module ``AuthoringTests``.``linters``.``white space normalizers`` +module ``AuthoringTests``.``linters``.``space normalizers`` open Xunit open authoring -type ``white space detection`` () = +type ``space detection`` () = static let markdown = Setup.Markdown $""" not a{'\u000B'}space @@ -20,4 +20,4 @@ not a{'\u000B'}space [] let ``emits a hint when a bad space is used`` () = - markdown |> hasHint "Irregular whitespace character detected: U+000B (Line Tabulation (VT)). This may impair Markdown rendering." + markdown |> hasHint "Irregular space detected. Run 'docs-builder format' to automatically fix all instances." diff --git a/tests/authoring/authoring.fsproj b/tests/authoring/authoring.fsproj index dedc4f06f..4bf239acb 100644 --- a/tests/authoring/authoring.fsproj +++ b/tests/authoring/authoring.fsproj @@ -53,7 +53,7 @@ - + From f1cce8968007e4ab00a64de104d149c9420afa0c Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 11:38:21 +0200 Subject: [PATCH 5/8] Remove test spaces --- docs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 33b55419e..7e999138f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,9 +4,9 @@ navigation_title: Elastic Docs v3 # Welcome to Elastic Docs v3 -Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. +Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. -## What do you want to do today? +## What do you want to do today? * [Contribute to Elastic documentation](./contribute/index.md) * [Learn about migration to Elastic Docs V3](./migration/index.md) From 22db85b7d04d6ae5920292d8284cb680bd112465 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri-Benedetti Date: Tue, 21 Oct 2025 14:31:47 +0200 Subject: [PATCH 6/8] Update src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs Co-authored-by: Martijn Laarman --- src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs index ebef4138e..5d32cf0cc 100644 --- a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs +++ b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs @@ -82,13 +82,15 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) var context = processor.GetContext(); var filePath = context.MarkdownSourcePath.FullName; + if (FilesWithHintEmitted.Contains(filePath)) + return; lock (FilesWithHintEmitted) { - if (!FilesWithHintEmitted.Contains(filePath)) - { - _ = FilesWithHintEmitted.Add(filePath); - processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); - } + if (FilesWithHintEmitted.Contains(filePath)) + return; + + _ = FilesWithHintEmitted.Add(filePath); + processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); } slice.SkipChar(); From c749b896068cd597bcae0beb80ea4df07819e82a Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 15:43:32 +0200 Subject: [PATCH 7/8] Add --check and --write commands --- docs/cli/docset/format.md | 63 +++++++++++++----- .../Myst/Linters/SpaceNormalizer.cs | 12 ++-- .../FormatService.cs | 64 ++++++++++++------- .../docs-builder/Commands/FormatCommand.cs | 19 ++++-- 4 files changed, 106 insertions(+), 52 deletions(-) diff --git a/docs/cli/docset/format.md b/docs/cli/docset/format.md index 9011cb56b..b5a88154b 100644 --- a/docs/cli/docset/format.md +++ b/docs/cli/docset/format.md @@ -5,21 +5,29 @@ Format documentation files by fixing common issues like irregular space ## Usage ``` -docs-builder format [options...] [-h|--help] [--version] +docs-builder format --check [options...] +docs-builder format --write [options...] ``` ## Options +`--check` +: Check if files need formatting without modifying them. Exits with code 1 if formatting is needed, 0 if all files are properly formatted. (required, mutually exclusive with --write) + +`--write` +: Write formatting changes to files. (required, mutually exclusive with --check) + `-p|--path` `` : Path to the documentation folder, defaults to pwd. (optional) -`--dry-run` `` -: Preview changes without modifying files (optional) - ## Description The `format` command automatically detects and fixes formatting issues in your documentation files. The command only processes Markdown files (`.md`) that are included in your `_docset.yml` table of contents, ensuring that only intentional documentation files are modified. +You must specify exactly one of `--check` or `--write`: +- `--check` validates formatting without modifying files, useful for CI/CD pipelines +- `--write` applies formatting changes to files + Currently, it handles irregular space characters that may impair Markdown rendering. ### Irregular Space Detection @@ -38,41 +46,64 @@ These characters can cause unexpected rendering issues in Markdown and are often ## Examples -### Format current directory +### Check if formatting is needed (CI/CD) + +```bash +docs-builder format --check +``` + +Exit codes: +- `0`: All files are properly formatted +- `1`: Some files need formatting + +### Apply formatting changes ```bash -docs-builder format +docs-builder format --write ``` -### Preview changes without modifying files +### Check specific documentation folder ```bash -docs-builder format --dry-run +docs-builder format --check --path /path/to/docs ``` ### Format specific documentation folder ```bash -docs-builder format --path /path/to/docs +docs-builder format --write --path /path/to/docs ``` ## Output -The command provides detailed feedback about the formatting process: +### Check mode output + +When using `--check`, the command reports which files need formatting: + +``` +Checking documentation in: /path/to/docs + +Formatting needed: + Files needing formatting: 2 + irregular space fixes needed: 3 + +Run 'docs-builder format --write' to apply changes +``` + +### Write mode output + +When using `--write`, the command reports the changes made: ``` Formatting documentation in: /path/to/docs -Fixed 2 irregular space(s) in: guide/setup.md -Fixed 1 irregular space(s) in: api/endpoints.md +Formatted index.md (2 change(s)) Formatting complete: Files processed: 155 - Files modified: 2 - Total replacements: 3 + Files modified: 1 + irregular space fixes: 2 ``` -When using `--dry-run`, files are not modified and the command reminds you to run without the flag to apply changes. - ## Future Enhancements The format command is designed to be extended with additional formatting capabilities in the future, such as: diff --git a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs index 5d32cf0cc..ebef4138e 100644 --- a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs +++ b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs @@ -82,15 +82,13 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) var context = processor.GetContext(); var filePath = context.MarkdownSourcePath.FullName; - if (FilesWithHintEmitted.Contains(filePath)) - return; lock (FilesWithHintEmitted) { - if (FilesWithHintEmitted.Contains(filePath)) - return; - - _ = FilesWithHintEmitted.Add(filePath); - processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); + if (!FilesWithHintEmitted.Contains(filePath)) + { + _ = FilesWithHintEmitted.Add(filePath); + processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); + } } slice.SkipChar(); diff --git a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs index 0d91793e4..64924ba3c 100644 --- a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs +++ b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs @@ -33,20 +33,17 @@ IConfigurationContext configurationContext public async Task Format( IDiagnosticsCollector collector, string? path, - bool? dryRun, + bool checkOnly, IFileSystem fs, Cancel ctx ) { - var isDryRun = dryRun ?? false; - // Create BuildContext to load the documentation set var context = new BuildContext(collector, fs, fs, configurationContext, ExportOptions.MetadataOnly, path, null); var set = new DocumentationSet(context, logFactory, NoopCrossLinkResolver.Instance); - _logger.LogInformation("Formatting documentation in: {Path}", set.SourceDirectory.FullName); - if (isDryRun) - _logger.LogInformation("Running in dry-run mode - no files will be modified"); + var mode = checkOnly ? "Checking" : "Formatting"; + _logger.LogInformation("{Mode} documentation in: {Path}", mode, set.SourceDirectory.FullName); var totalFilesProcessed = 0; var totalFilesModified = 0; @@ -63,36 +60,55 @@ Cancel ctx break; totalFilesProcessed++; - var (modified, changes) = await ProcessFile(docFile.SourceFile, isDryRun, fs, formatterStats); + var (modified, changes) = await ProcessFile(docFile.SourceFile, checkOnly, fs, formatterStats); if (modified) - { totalFilesModified++; - _logger.LogInformation("Formatted {File} ({Changes} change(s))", docFile.RelativePath, changes); - } } _logger.LogInformation(""); - _logger.LogInformation("Formatting complete:"); - _logger.LogInformation(" Files processed: {Processed}", totalFilesProcessed); - _logger.LogInformation(" Files modified: {Modified}", totalFilesModified); - - // Log stats for each formatter that made changes - foreach (var (formatterName, changeCount) in formatterStats.Where(kvp => kvp.Value > 0)) - _logger.LogInformation(" {Formatter} fixes: {Count}", formatterName, changeCount); - if (isDryRun && totalFilesModified > 0) + if (checkOnly) { - _logger.LogInformation(""); - _logger.LogInformation("Run without --dry-run to apply changes"); + if (totalFilesModified > 0) + { + _logger.LogInformation("Formatting needed:"); + _logger.LogInformation(" Files needing formatting: {Modified}", totalFilesModified); + + // Log stats for each formatter that would make changes + foreach (var (formatterName, changeCount) in formatterStats.Where(kvp => kvp.Value > 0)) + _logger.LogInformation(" {Formatter} fixes needed: {Count}", formatterName, changeCount); + + _logger.LogInformation(""); + + // Emit error to trigger exit code 1 + collector.EmitError(string.Empty, $"{totalFilesModified} file(s) need formatting. Run 'docs-builder format --write' to apply changes."); + + return false; + } + else + { + _logger.LogInformation("All files are properly formatted"); + return true; + } } + else + { + _logger.LogInformation("Formatting complete:"); + _logger.LogInformation(" Files processed: {Processed}", totalFilesProcessed); + _logger.LogInformation(" Files modified: {Modified}", totalFilesModified); + + // Log stats for each formatter that made changes + foreach (var (formatterName, changeCount) in formatterStats.Where(kvp => kvp.Value > 0)) + _logger.LogInformation(" {Formatter} fixes: {Count}", formatterName, changeCount); - return true; + return true; + } } private static async Task<(bool modified, int totalChanges)> ProcessFile( IFileInfo file, - bool isDryRun, + bool checkOnly, IFileSystem fs, Dictionary stats ) @@ -116,8 +132,8 @@ Dictionary stats var modified = content != originalContent; - // Only write if content changed and not in dry-run mode - if (modified && !isDryRun) + // Only write if content changed and in write mode + if (modified && !checkOnly) await fs.File.WriteAllTextAsync(file.FullName, content); return (modified, totalChanges); diff --git a/src/tooling/docs-builder/Commands/FormatCommand.cs b/src/tooling/docs-builder/Commands/FormatCommand.cs index 076fc3010..7672c7458 100644 --- a/src/tooling/docs-builder/Commands/FormatCommand.cs +++ b/src/tooling/docs-builder/Commands/FormatCommand.cs @@ -19,25 +19,34 @@ IConfigurationContext configurationContext ) { /// - /// Format documentation files by fixing common issues like irregular whitespace + /// Format documentation files by fixing common issues like irregular space /// /// -p, Path to the documentation folder, defaults to pwd - /// Preview changes without modifying files + /// Check if files need formatting without modifying them (exits with code 1 if formatting needed) + /// Write formatting changes to files /// [Command("")] public async Task Format( string? path = null, - bool? dryRun = null, + bool check = false, + bool write = false, Cancel ctx = default ) { + // Validate that exactly one of --check or --write is specified + if (check == write) + { + collector.EmitError(string.Empty, "Must specify exactly one of --check or --write"); + return 1; + } + await using var serviceInvoker = new ServiceInvoker(collector); var service = new FormatService(logFactory, configurationContext); var fs = new FileSystem(); - serviceInvoker.AddCommand(service, (path, dryRun, fs), - async static (s, collector, state, ctx) => await s.Format(collector, state.path, state.dryRun, state.fs, ctx) + serviceInvoker.AddCommand(service, (path, check, fs), + async static (s, collector, state, ctx) => await s.Format(collector, state.path, state.check, state.fs, ctx) ); return await serviceInvoker.InvokeAsync(ctx); } From 1a14a333f9c393960ad6e4f96aa782863eb89283 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferri Benedetti Date: Tue, 21 Oct 2025 15:53:45 +0200 Subject: [PATCH 8/8] Further refactors --- docs/index.md | 2 +- .../Myst/Linters/SpaceNormalizer.cs | 2 +- .../Elastic.Documentation.Refactor/FormatService.cs | 10 +++++----- .../Formatters/IFormatter.cs | 13 ++++++++++--- .../Formatters/IrregularSpaceFormatter.cs | 6 +++--- tests/authoring/Linters/SpaceNormalizers.fs | 2 +- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/index.md b/docs/index.md index 7e999138f..ead514e83 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,7 @@ navigation_title: Elastic Docs v3 # Welcome to Elastic Docs v3 -Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. +Elastic Docs V3 is our next-generation documentation platform designed to improve the experience of learning, using, and contributing to Elastic products. Built on a foundation of modern authoring tools and scalable infrastructure, V3 offers faster builds, streamlined versioning, and enhanced navigation to guide users through Elastic’s complex ecosystem. ## What do you want to do today? diff --git a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs index ebef4138e..5a842a2dc 100644 --- a/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs +++ b/src/Elastic.Markdown/Myst/Linters/SpaceNormalizer.cs @@ -87,7 +87,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) if (!FilesWithHintEmitted.Contains(filePath)) { _ = FilesWithHintEmitted.Add(filePath); - processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format' to automatically fix all instances."); + processor.EmitHint(processor.Inline, 1, "Irregular space detected. Run 'docs-builder format --write' to automatically fix all instances."); } } diff --git a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs index 64924ba3c..bd7dbf4b1 100644 --- a/src/authoring/Elastic.Documentation.Refactor/FormatService.cs +++ b/src/authoring/Elastic.Documentation.Refactor/FormatService.cs @@ -120,13 +120,13 @@ Dictionary stats // Apply each formatter in sequence foreach (var formatter in Formatters) { - var (formattedContent, changes) = formatter.Format(content); + var result = formatter.Format(content); - if (changes > 0) + if (result.Changes > 0) { - content = formattedContent; - totalChanges += changes; - stats[formatter.Name] += changes; + content = result.Content; + totalChanges += result.Changes; + stats[formatter.Name] += result.Changes; } } diff --git a/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs b/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs index ebc9923f3..b5304b964 100644 --- a/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs +++ b/src/authoring/Elastic.Documentation.Refactor/Formatters/IFormatter.cs @@ -4,6 +4,13 @@ namespace Elastic.Documentation.Refactor.Formatters; +/// +/// Result of a formatting operation +/// +/// The formatted content +/// The number of changes made +public record FormatResult(string Content, int Changes); + /// /// Defines a formatter that can process and modify file content /// @@ -15,9 +22,9 @@ public interface IFormatter string Name { get; } /// - /// Formats the content and returns the modified content along with the number of changes made + /// Formats the content and returns the result /// /// The content to format - /// A tuple containing the formatted content and the number of changes made - (string content, int changes) Format(string content); + /// The format result containing the formatted content and number of changes + FormatResult Format(string content); } diff --git a/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs b/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs index db74cd836..6d4212e55 100644 --- a/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs +++ b/src/authoring/Elastic.Documentation.Refactor/Formatters/IrregularSpaceFormatter.cs @@ -45,11 +45,11 @@ public class IrregularSpaceFormatter : IFormatter private static readonly SearchValues IrregularSpaceSearchValues = SearchValues.Create(IrregularSpaceChars); - public (string content, int changes) Format(string content) + public FormatResult Format(string content) { // Quick check - if no irregular space, return original if (content.AsSpan().IndexOfAny(IrregularSpaceSearchValues) == -1) - return (content, 0); + return new FormatResult(content, 0); // Replace irregular space with regular spaces var sb = new StringBuilder(content.Length); @@ -68,6 +68,6 @@ public class IrregularSpaceFormatter : IFormatter } } - return (sb.ToString(), replacements); + return new FormatResult(sb.ToString(), replacements); } } diff --git a/tests/authoring/Linters/SpaceNormalizers.fs b/tests/authoring/Linters/SpaceNormalizers.fs index c501e0f64..89dff9870 100644 --- a/tests/authoring/Linters/SpaceNormalizers.fs +++ b/tests/authoring/Linters/SpaceNormalizers.fs @@ -20,4 +20,4 @@ not a{'\u000B'}space [] let ``emits a hint when a bad space is used`` () = - markdown |> hasHint "Irregular space detected. Run 'docs-builder format' to automatically fix all instances." + markdown |> hasHint "Irregular space detected. Run 'docs-builder format --write' to automatically fix all instances."