|
| 1 | +// Licensed to Elasticsearch B.V under one or more agreements. |
| 2 | +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. |
| 3 | +// See the LICENSE file in the project root for more information |
| 4 | + |
| 5 | +using System.Buffers; |
| 6 | +using System.IO.Abstractions; |
| 7 | +using System.Text; |
| 8 | +using Elastic.Documentation.Configuration; |
| 9 | +using Elastic.Documentation.Diagnostics; |
| 10 | +using Elastic.Documentation.Services; |
| 11 | +using Microsoft.Extensions.Logging; |
| 12 | + |
| 13 | +namespace Elastic.Documentation.Refactor; |
| 14 | + |
| 15 | +public class FormatService( |
| 16 | + ILoggerFactory logFactory |
| 17 | +) : IService |
| 18 | +{ |
| 19 | + private readonly ILogger _logger = logFactory.CreateLogger<FormatService>(); |
| 20 | + |
| 21 | + // Collection of irregular whitespace characters that may impair Markdown rendering |
| 22 | + private static readonly char[] IrregularWhitespaceChars = |
| 23 | + [ |
| 24 | + '\u000B', // Line Tabulation (\v) - <VT> |
| 25 | + '\u000C', // Form Feed (\f) - <FF> |
| 26 | + '\u00A0', // No-Break Space - <NBSP> |
| 27 | + '\u0085', // Next Line |
| 28 | + '\u1680', // Ogham Space Mark |
| 29 | + '\u180E', // Mongolian Vowel Separator - <MVS> |
| 30 | + '\ufeff', // Zero Width No-Break Space - <BOM> |
| 31 | + '\u2000', // En Quad |
| 32 | + '\u2001', // Em Quad |
| 33 | + '\u2002', // En Space - <ENSP> |
| 34 | + '\u2003', // Em Space - <EMSP> |
| 35 | + '\u2004', // Tree-Per-Em |
| 36 | + '\u2005', // Four-Per-Em |
| 37 | + '\u2006', // Six-Per-Em |
| 38 | + '\u2007', // Figure Space |
| 39 | + '\u2008', // Punctuation Space - <PUNCSP> |
| 40 | + '\u2009', // Thin Space |
| 41 | + '\u200A', // Hair Space |
| 42 | + '\u200B', // Zero Width Space - <ZWSP> |
| 43 | + '\u2028', // Line Separator |
| 44 | + '\u2029', // Paragraph Separator |
| 45 | + '\u202F', // Narrow No-Break Space |
| 46 | + '\u205F', // Medium Mathematical Space |
| 47 | + '\u3000' // Ideographic Space |
| 48 | + ]; |
| 49 | + |
| 50 | + private static readonly SearchValues<char> IrregularWhitespaceSearchValues = SearchValues.Create(IrregularWhitespaceChars); |
| 51 | + |
| 52 | + public async Task<bool> Format( |
| 53 | + IDiagnosticsCollector collector, |
| 54 | + string? path, |
| 55 | + bool? dryRun, |
| 56 | + IFileSystem fs, |
| 57 | + Cancel ctx |
| 58 | + ) |
| 59 | + { |
| 60 | + var isDryRun = dryRun ?? false; |
| 61 | + var rootPath = string.IsNullOrEmpty(path) ? fs.Directory.GetCurrentDirectory() : path; |
| 62 | + var rootDir = fs.DirectoryInfo.New(rootPath); |
| 63 | + |
| 64 | + if (!rootDir.Exists) |
| 65 | + { |
| 66 | + collector.EmitError(string.Empty, $"Directory not found: {rootPath}"); |
| 67 | + return false; |
| 68 | + } |
| 69 | + |
| 70 | + _logger.LogInformation("Formatting documentation in: {Path}", rootDir.FullName); |
| 71 | + if (isDryRun) |
| 72 | + _logger.LogInformation("Running in dry-run mode - no files will be modified"); |
| 73 | + |
| 74 | + var markdownFiles = rootDir.GetFiles("*.md", SearchOption.AllDirectories); |
| 75 | + var totalFilesProcessed = 0; |
| 76 | + var totalFilesModified = 0; |
| 77 | + var totalReplacements = 0; |
| 78 | + |
| 79 | + foreach (var file in markdownFiles) |
| 80 | + { |
| 81 | + if (ctx.IsCancellationRequested) |
| 82 | + break; |
| 83 | + |
| 84 | + totalFilesProcessed++; |
| 85 | + var (modified, replacements) = await ProcessFile(file, isDryRun, fs); |
| 86 | + |
| 87 | + if (modified) |
| 88 | + { |
| 89 | + totalFilesModified++; |
| 90 | + totalReplacements += replacements; |
| 91 | + _logger.LogInformation("Fixed {Count} irregular whitespace(s) in: {File}", replacements, GetRelativePath(rootDir, file)); |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + _logger.LogInformation(""); |
| 96 | + _logger.LogInformation("Formatting complete:"); |
| 97 | + _logger.LogInformation(" Files processed: {Processed}", totalFilesProcessed); |
| 98 | + _logger.LogInformation(" Files modified: {Modified}", totalFilesModified); |
| 99 | + _logger.LogInformation(" Total replacements: {Replacements}", totalReplacements); |
| 100 | + |
| 101 | + if (isDryRun && totalFilesModified > 0) |
| 102 | + { |
| 103 | + _logger.LogInformation(""); |
| 104 | + _logger.LogInformation("Run without --dry-run to apply changes"); |
| 105 | + } |
| 106 | + |
| 107 | + return true; |
| 108 | + } |
| 109 | + |
| 110 | + private static async Task<(bool modified, int replacements)> ProcessFile(IFileInfo file, bool isDryRun, IFileSystem fs) |
| 111 | + { |
| 112 | + var content = await fs.File.ReadAllTextAsync(file.FullName); |
| 113 | + var modified = false; |
| 114 | + var replacements = 0; |
| 115 | + |
| 116 | + // Check if file contains any irregular whitespace |
| 117 | + if (content.AsSpan().IndexOfAny(IrregularWhitespaceSearchValues) == -1) |
| 118 | + return (false, 0); |
| 119 | + |
| 120 | + // Replace irregular whitespace with regular spaces |
| 121 | + var sb = new StringBuilder(content.Length); |
| 122 | + foreach (var c in content) |
| 123 | + { |
| 124 | + if (IrregularWhitespaceSearchValues.Contains(c)) |
| 125 | + { |
| 126 | + _ = sb.Append(' '); |
| 127 | + replacements++; |
| 128 | + modified = true; |
| 129 | + } |
| 130 | + else |
| 131 | + { |
| 132 | + _ = sb.Append(c); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + if (modified && !isDryRun) |
| 137 | + { |
| 138 | + await fs.File.WriteAllTextAsync(file.FullName, sb.ToString()); |
| 139 | + } |
| 140 | + |
| 141 | + return (modified, replacements); |
| 142 | + } |
| 143 | + |
| 144 | + private static string GetRelativePath(IDirectoryInfo root, IFileInfo file) |
| 145 | + { |
| 146 | + var rootPath = root.FullName.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); |
| 147 | + var filePath = file.FullName; |
| 148 | + |
| 149 | + if (filePath.StartsWith(rootPath, StringComparison.OrdinalIgnoreCase)) |
| 150 | + { |
| 151 | + return filePath.Substring(rootPath.Length).TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); |
| 152 | + } |
| 153 | + |
| 154 | + return filePath; |
| 155 | + } |
| 156 | +} |
0 commit comments