From ab987b0d40091120d9dac46a4beba5d15c0026df Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Fri, 23 May 2025 00:49:45 +0200 Subject: [PATCH 1/4] Assembler: clone-all from link-index registry --- .../Assembler/Repository.cs | 11 +- .../docs-assembler/Cli/RepositoryCommands.cs | 11 +- .../Sourcing/RepositorySourcesFetcher.cs | 194 ++++++++++-------- 3 files changed, 129 insertions(+), 87 deletions(-) diff --git a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs index 93d0d5667..44f050af4 100644 --- a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs +++ b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs @@ -2,6 +2,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Runtime.Serialization; using YamlDotNet.Serialization; namespace Elastic.Documentation.Configuration.Assembler; @@ -12,6 +13,14 @@ public record NarrativeRepository : Repository public override string Name { get; set; } = RepositoryName; } +public enum CheckoutStrategy +{ + [EnumMember(Value = "partial")] + Partial, + [EnumMember(Value = "full")] + Full +} + public record Repository { [YamlIgnore] @@ -27,7 +36,7 @@ public record Repository public string GitReferenceNext { get; set; } = "main"; [YamlMember(Alias = "checkout_strategy")] - public string CheckoutStrategy { get; set; } = "partial"; + public CheckoutStrategy CheckoutStrategy { get; set; } = CheckoutStrategy.Partial; [YamlMember(Alias = "skip")] public bool Skip { get; set; } diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs index 99a9bdbf8..f23d2f71b 100644 --- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs @@ -3,8 +3,10 @@ // See the LICENSE file in the project root for more information using System.Collections.Concurrent; +using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO.Abstractions; using System.Net.Mime; using Actions.Core.Services; @@ -39,11 +41,13 @@ private void AssignOutputLogger() /// Clones all repositories /// Treat warnings as errors and fail the build on warnings /// The environment to build + /// If true fetch the latest commit of the branch instead of the link registry entry ref /// [Command("clone-all")] public async Task CloneAll( bool? strict = null, string? environment = null, + bool? fetchLatest = null, Cancel ctx = default ) { @@ -55,7 +59,8 @@ public async Task CloneAll( var assembleContext = new AssembleContext(environment, collector, new FileSystem(), new FileSystem(), null, null); var cloner = new AssemblerRepositorySourcer(logger, assembleContext); - _ = await cloner.AcquireAllLatest(ctx); + + _ = await cloner.CloneAll(fetchLatest ?? false, ctx); await collector.StopAsync(ctx); @@ -138,7 +143,6 @@ public async Task UpdateLinkIndexAll(ContentSource contentSource, Cancel ct // It's only used to get the list of repositories. var assembleContext = new AssembleContext("prod", collector, new FileSystem(), new FileSystem(), null, null); var cloner = new RepositorySourcer(logger, assembleContext.CheckoutDirectory, new FileSystem(), collector); - var dict = new ConcurrentDictionary(); var repositories = new Dictionary(assembleContext.Configuration.ReferenceRepositories) { { NarrativeRepository.RepositoryName, assembleContext.Configuration.Narrative } @@ -152,8 +156,7 @@ await Parallel.ForEachAsync(repositories, { try { - var name = kv.Key.Trim(); - var checkout = cloner.CloneOrUpdateRepository(kv.Value, name, kv.Value.GetBranch(contentSource), dict); + var checkout = cloner.CloneRef(kv.Value, kv.Value.GetBranch(contentSource), true); var outputPath = Directory.CreateTempSubdirectory(checkout.Repository.Name).FullName; var context = new BuildContext( collector, diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs index 388306865..aafa7fa2c 100644 --- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs +++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -8,6 +8,7 @@ using System.IO.Abstractions; using Elastic.Documentation.Configuration.Assembler; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.LinkIndex; using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; using ProcNet; @@ -46,128 +47,158 @@ public IReadOnlyCollection GetAll() return checkouts; } - public async Task> AcquireAllLatest(Cancel ctx = default) + public async Task> CloneAll(bool fetchLatest, Cancel ctx = default) { - _logger.LogInformation( - "Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy", + _logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy", PublishEnvironment.Name, PublishEnvironment.ContentSource.ToStringFast(true) ); + var checkouts = new ConcurrentBag(); + + ILinkIndexReader linkIndexReader = Aws3LinkIndexReader.CreateAnonymous(); + var linkRegistry = await linkIndexReader.GetRegistry(ctx); var repositories = new Dictionary(Configuration.ReferenceRepositories) { { NarrativeRepository.RepositoryName, Configuration.Narrative } }; - return await RepositorySourcer.AcquireAllLatest(repositories, PublishEnvironment.ContentSource, ctx); - } -} - -public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector) -{ - private readonly ILogger _logger = logger.CreateLogger(); - public async Task> AcquireAllLatest(Dictionary repositories, ContentSource source, Cancel ctx = default) - { - var dict = new ConcurrentDictionary(); - var checkouts = new ConcurrentBag(); await Parallel.ForEachAsync(repositories, new ParallelOptions { CancellationToken = ctx, MaxDegreeOfParallelism = Environment.ProcessorCount - }, async (kv, c) => + }, async (repo, c) => { await Task.Run(() => { - var name = kv.Key.Trim(); - var repo = kv.Value; - var clone = CloneOrUpdateRepository(kv.Value, name, repo.GetBranch(source), dict); - checkouts.Add(clone); + if (!linkRegistry.Repositories.TryGetValue(repo.Key, out var entry)) + { + context.Collector.EmitError("", $"'{repo.Key}' does not exist in link index"); + return; + } + var branch = repo.Value.GetBranch(PublishEnvironment.ContentSource); + var gitRef = branch; + if (!fetchLatest) + { + if (!entry.TryGetValue(branch, out var entryInfo)) + { + context.Collector.EmitError("", $"'{repo.Key}' does not have a '{branch}' entry in link index"); + return; + } + gitRef = entryInfo.GitReference; + } + checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest)); }, c); }).ConfigureAwait(false); - - return checkouts.ToList().AsReadOnly(); + return checkouts; } +} - public Checkout CloneOrUpdateRepository(Repository repository, string name, string branch, ConcurrentDictionary dict) - { - var fs = readFileSystem; - var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, name)); - var relativePath = Path.GetRelativePath(Paths.WorkingDirectoryRoot.FullName, checkoutFolder.FullName); - var sw = Stopwatch.StartNew(); - _ = dict.AddOrUpdate($"{name} ({branch})", sw, (_, _) => sw); +public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector) +{ + private readonly ILogger _logger = logger.CreateLogger(); - string? head; - if (checkoutFolder.Exists) + // + // Clones the repository to the checkout directory and checks out the specified git reference. + // + // The repository to clone. + // The git reference to check out. Branch, commit or tag + public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1) + { + var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name)); + if (attempt > 3) { - if (!TryUpdateSource(name, branch, relativePath, checkoutFolder, out head)) - head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder); + collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts"); + return new Checkout + { + Directory = checkoutFolder, + HeadReference = gitRef, + Repository = repository, + }; } - else - head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder); - - sw.Stop(); - - return new Checkout + _logger.LogInformation("{RepositoryName}: Cloning repository {RepositoryName}@{Commit} to {CheckoutFolder}", repository.Name, repository.Name, gitRef, + checkoutFolder.FullName); + if (!checkoutFolder.Exists) { - Repository = repository, - Directory = checkoutFolder, - HeadReference = head - }; - } - - private bool TryUpdateSource(string name, string branch, string relativePath, IDirectoryInfo checkoutFolder, [NotNullWhen(true)] out string? head) - { - head = null; - try - { - _logger.LogInformation("Pull: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath); - // --allow-unrelated-histories due to shallow clones not finding a common ancestor - ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff"); + checkoutFolder.Create(); + checkoutFolder.Refresh(); } - catch (Exception e) + var isGitInitialized = GitInit(repository, checkoutFolder); + string? head = null; + if (isGitInitialized) { - _logger.LogError(e, "Failed to update {Name} from {RelativePath}, falling back to recreating from scratch", name, relativePath); - if (checkoutFolder.Exists) + try + { + head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + } + catch (Exception e) { + _logger.LogError(e, "{RepositoryName}: Failed to acquire current commit, falling back to recreating from scratch", repository.Name); checkoutFolder.Delete(true); checkoutFolder.Refresh(); + return CloneRef(repository, gitRef, pull, attempt + 1); } - return false; } - head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + if (head != null && head == gitRef) + _logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef); + else + { + FetchAndCheckout(repository, gitRef, checkoutFolder); + if (!pull) + { + return new Checkout + { + Directory = checkoutFolder, + HeadReference = gitRef, + Repository = repository, + }; + } + try + { + ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef); + } + catch (Exception e) + { + _logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch", + repository.Name, gitRef, checkoutFolder.FullName); + checkoutFolder.Delete(true); + checkoutFolder.Refresh(); + return CloneRef(repository, gitRef, pull, attempt + 1); + } + } - return true; + return new Checkout + { + Directory = checkoutFolder, + HeadReference = gitRef, + Repository = repository, + }; } - private string CheckoutFromScratch(Repository repository, string name, string branch, string relativePath, IDirectoryInfo checkoutFolder) + /// + /// Initializes the git repository if it is not already initialized. + /// Returns true if the repository was already initialized. + /// + private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder) { - _logger.LogInformation("Checkout: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath); - switch (repository.CheckoutStrategy) - { - case "full": - Exec("git", "clone", repository.Origin, checkoutFolder.FullName, - "--depth", "1", "--single-branch", - "--branch", branch - ); - break; - case "partial": - Exec( - "git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName - ); - - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone"); - ExecIn(checkoutFolder, "git", "checkout", branch); - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); - break; - } - - return Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git")); + if (isGitAlreadyInitialized) + return true; + ExecIn(checkoutFolder, "git", "init"); + ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin); + return false; } - private void Exec(string binary, params string[] args) => ExecIn(null, binary, args); + private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder) + { + ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef); + if (repository.CheckoutStrategy == CheckoutStrategy.Partial) + ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); + ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef); + } private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args) { @@ -221,7 +252,6 @@ string CaptureOutput() return line; } } - } public class NoopConsoleWriter : IConsoleOutWriter From fd36b000f442a2cc333689fdaf87da98871d2bc2 Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Fri, 23 May 2025 01:05:49 +0200 Subject: [PATCH 2/4] Add comments --- .../docs-assembler/Sourcing/RepositorySourcesFetcher.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs index aafa7fa2c..6e1bf4370 100644 --- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs +++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -141,8 +141,9 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false return CloneRef(repository, gitRef, pull, attempt + 1); } } - + // Repository already checked out the same commit if (head != null && head == gitRef) + // nothing to do, already at the right commit _logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef); else { From fbb5a14efe26eb1caf9b5fa5cdbd8aea950fc8ea Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Fri, 23 May 2025 01:07:55 +0200 Subject: [PATCH 3/4] Return empty HeadReference on failure --- src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs index 6e1bf4370..3d846020d 100644 --- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs +++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -114,7 +114,7 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false return new Checkout { Directory = checkoutFolder, - HeadReference = gitRef, + HeadReference = "", Repository = repository, }; } From 502398b3f88cc87fc6aedd7ef8b286e9691d66fa Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Fri, 23 May 2025 11:03:49 +0200 Subject: [PATCH 4/4] Use switch case for checkout strategy and disable sparse-checkout for CheckoutStrategy.Full --- .../Sourcing/RepositorySourcesFetcher.cs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs index 3d846020d..0ea5adc66 100644 --- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs +++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -196,8 +196,17 @@ private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder) private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder) { ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef); - if (repository.CheckoutStrategy == CheckoutStrategy.Partial) - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); + switch (repository.CheckoutStrategy) + { + case CheckoutStrategy.Full: + ExecIn(checkoutFolder, "git", "sparse-checkout", "disable"); + break; + case CheckoutStrategy.Partial: + ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); + break; + default: + throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null); + } ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef); }