Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Elastic.Documentation/Links/LinkRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
using System.Text.Json.Serialization;
using Elastic.Documentation.Serialization;
Expand Down
8 changes: 6 additions & 2 deletions src/tooling/docs-assembler/Cli/RepositoryCommands.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using Documentation.Assembler.Navigation;
using Documentation.Assembler.Sourcing;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Links;
using Elastic.Documentation.Tooling.Diagnostics.Console;
using Elastic.Markdown;
using Elastic.Markdown.Exporters;
Expand Down Expand Up @@ -104,9 +105,10 @@ public async Task<int> BuildAll(
await assembleContext.Collector.StopAsync(ctx);
return 1;
}

var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
var checkouts = cloner.GetAll().ToArray();
var checkoutResult = cloner.GetAll();
var checkouts = checkoutResult.Checkouts.ToArray();

if (checkouts.Length == 0)
throw new Exception("No checkouts found");

Expand All @@ -123,6 +125,8 @@ public async Task<int> BuildAll(
var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper);
await builder.BuildAllAsync(assembleSources.AssembleSets, ctx);

await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx);

var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory);
sitemapBuilder.Generate();

Expand Down
96 changes: 96 additions & 0 deletions src/tooling/docs-assembler/Sourcing/GitFacade.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.IO.Abstractions;
using Elastic.Documentation.Diagnostics;
using ProcNet;

namespace Documentation.Assembler.Sourcing;


public interface IGitRepository
{
void Init();
string GetCurrentCommit();
void GitAddOrigin(string origin);
bool IsInitialized();
void Pull(string branch);
void Fetch(string reference);
void EnableSparseCheckout(string folder);
void DisableSparseCheckout();
void Checkout(string reference);
}

// This git repository implementation is optimized for pull and fetching single commits.
// It uses `git pull --depth 1` and `git fetch --depth 1` to minimize the amount of data transferred.
public class SingleCommitOptimizedGitRepository(DiagnosticsCollector collector, IDirectoryInfo workingDirectory) : IGitRepository
{
public string GetCurrentCommit() => Capture("git", "rev-parse", "HEAD");

public void Init() => ExecIn("git", "init");
public bool IsInitialized() => Directory.Exists(Path.Combine(workingDirectory.FullName, ".git"));
public void Pull(string branch) => ExecIn("git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", branch);
public void Fetch(string reference) => ExecIn("git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", reference);
public void EnableSparseCheckout(string folder) => ExecIn("git", "sparse-checkout", "set", folder);
public void DisableSparseCheckout() => ExecIn("git", "sparse-checkout", "disable");
public void Checkout(string reference) => ExecIn("git", "checkout", "--force", reference);

public void GitAddOrigin(string origin) => ExecIn("git", "remote", "add", "origin", origin);

private void ExecIn(string binary, params string[] args)
{
var arguments = new ExecArguments(binary, args)
{
WorkingDirectory = workingDirectory.FullName,
Environment = new Dictionary<string, string>
{
// Disable git editor prompts:
// There are cases where `git pull` would prompt for an editor to write a commit message.
// This env variable prevents that.
{ "GIT_EDITOR", "true" }
},
};
var result = Proc.Exec(arguments);
if (result != 0)
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
}
private string Capture(string binary, params string[] args)
{
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
Exception? e = null;
for (var i = 0; i <= 9; i++)
{
try
{
return CaptureOutput();
}
catch (Exception ex)
{
if (ex is not null)
e = ex;
}
}

if (e is not null)
collector.EmitError("", "failure capturing stdout", e);

return string.Empty;

string CaptureOutput()
{
var arguments = new StartArguments(binary, args)
{
WorkingDirectory = workingDirectory.FullName,
Timeout = TimeSpan.FromSeconds(3),
WaitForExit = TimeSpan.FromSeconds(3),
ConsoleOutWriter = NoopConsoleWriter.Instance
};
var result = Proc.Start(arguments);
var line = result.ExitCode != 0
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
return line;
}
}
}
137 changes: 59 additions & 78 deletions src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
// See the LICENSE file in the project root for more information

using System.Collections.Concurrent;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO.Abstractions;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Diagnostics;
using Elastic.Documentation.LinkIndex;
using Elastic.Markdown.IO;
using Elastic.Documentation.Links;
using Microsoft.Extensions.Logging;
using ProcNet;
using ProcNet.Std;
Expand All @@ -25,29 +23,42 @@ public class AssemblerRepositorySourcer(ILoggerFactory logger, AssembleContext c

private RepositorySourcer RepositorySourcer => new(logger, context.CheckoutDirectory, context.ReadFileSystem, context.Collector);

public IReadOnlyCollection<Checkout> GetAll()
public CheckoutResult GetAll()
{
var fs = context.ReadFileSystem;
var repositories = Configuration.ReferenceRepositories.Values.Concat<Repository>([Configuration.Narrative]);
var checkouts = new List<Checkout>();
var linkRegistrySnapshotPath = Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName);
if (!fs.File.Exists(linkRegistrySnapshotPath))
throw new FileNotFoundException("Link-index snapshot not found. Run the clone-all command first.", linkRegistrySnapshotPath);
var linkRegistrySnapshotStr = File.ReadAllText(linkRegistrySnapshotPath);
var linkRegistry = LinkRegistry.Deserialize(linkRegistrySnapshotStr);
foreach (var repo in repositories)
{
var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(context.CheckoutDirectory.FullName, repo.Name));
IGitRepository gitFacade = new SingleCommitOptimizedGitRepository(context.Collector, checkoutFolder);
if (!checkoutFolder.Exists)
{
context.Collector.EmitError(checkoutFolder.FullName, $"'{repo.Name}' does not exist in link index checkout directory");
continue;
}
var head = gitFacade.GetCurrentCommit();
var checkout = new Checkout
{
Repository = repo,
Directory = checkoutFolder,
//TODO read from links.json and ensure we check out exactly that git reference
//+ validate that git reference belongs to the appropriate branch
HeadReference = Guid.NewGuid().ToString("N")
HeadReference = head
};
checkouts.Add(checkout);
}

return checkouts;
return new CheckoutResult
{
Checkouts = checkouts,
LinkRegistrySnapshot = linkRegistry
};
}

public async Task<IReadOnlyCollection<Checkout>> CloneAll(bool fetchLatest, Cancel ctx = default)
public async Task<CheckoutResult> CloneAll(bool fetchLatest, Cancel ctx = default)
{
_logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
PublishEnvironment.Name,
Expand Down Expand Up @@ -91,8 +102,23 @@ await Task.Run(() =>
checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
}, c);
}).ConfigureAwait(false);
return checkouts;
await File.WriteAllTextAsync(
Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName),
LinkRegistry.Serialize(linkRegistry),
ctx
);
return new CheckoutResult
{
Checkouts = checkouts,
LinkRegistrySnapshot = linkRegistry
};
}

public async Task WriteLinkRegistrySnapshot(LinkRegistry linkRegistrySnapshot, Cancel ctx = default) => await File.WriteAllTextAsync(
Path.Combine(context.OutputDirectory.FullName, "docs", CheckoutResult.LinkRegistrySnapshotFileName),
LinkRegistry.Serialize(linkRegistrySnapshot),
ctx
);
}


Expand All @@ -108,6 +134,7 @@ public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDir
public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
{
var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
IGitRepository git = new SingleCommitOptimizedGitRepository(collector, checkoutFolder);
if (attempt > 3)
{
collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
Expand All @@ -125,13 +152,13 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
checkoutFolder.Create();
checkoutFolder.Refresh();
}
var isGitInitialized = GitInit(repository, checkoutFolder);
var isGitInitialized = GitInit(git, repository);
string? head = null;
if (isGitInitialized)
{
try
{
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
head = git.GetCurrentCommit();
}
catch (Exception e)
{
Expand All @@ -147,7 +174,7 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
_logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
else
{
FetchAndCheckout(repository, gitRef, checkoutFolder);
FetchAndCheckout(git, repository, gitRef);
if (!pull)
{
return new Checkout
Expand All @@ -159,11 +186,11 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
}
try
{
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
git.Pull(gitRef);
}
catch (Exception e)
{
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {Path}, falling back to recreating from scratch",
repository.Name, gitRef, checkoutFolder.FullName);
checkoutFolder.Delete(true);
checkoutFolder.Refresh();
Expand All @@ -183,84 +210,31 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
/// Initializes the git repository if it is not already initialized.
/// Returns true if the repository was already initialized.
/// </summary>
private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
private static bool GitInit(IGitRepository git, Repository repository)
{
var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
var isGitAlreadyInitialized = git.IsInitialized();
if (isGitAlreadyInitialized)
return true;
ExecIn(checkoutFolder, "git", "init");
ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
git.Init();
git.GitAddOrigin(repository.Origin);
return false;
}

private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
private static void FetchAndCheckout(IGitRepository git, Repository repository, string gitRef)
{
ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
git.Fetch(gitRef);
switch (repository.CheckoutStrategy)
{
case CheckoutStrategy.Full:
ExecIn(checkoutFolder, "git", "sparse-checkout", "disable");
git.DisableSparseCheckout();
break;
case CheckoutStrategy.Partial:
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
git.EnableSparseCheckout("docs");
break;
default:
throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null);
}
ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
}

private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
{
var arguments = new ExecArguments(binary, args)
{
WorkingDirectory = workingDirectory?.FullName
};
var result = Proc.Exec(arguments);
if (result != 0)
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
}

// ReSharper disable once UnusedMember.Local
private string Capture(IDirectoryInfo? workingDirectory, string binary, params string[] args)
{
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
Exception? e = null;
for (var i = 0; i <= 9; i++)
{
try
{
return CaptureOutput();
}
catch (Exception ex)
{
if (ex is not null)
e = ex;
}
}

if (e is not null)
collector.EmitError("", "failure capturing stdout", e);


return string.Empty;

string CaptureOutput()
{
var arguments = new StartArguments(binary, args)
{
WorkingDirectory = workingDirectory?.FullName,
//WaitForStreamReadersTimeout = TimeSpan.FromSeconds(3),
Timeout = TimeSpan.FromSeconds(3),
WaitForExit = TimeSpan.FromSeconds(3),
ConsoleOutWriter = NoopConsoleWriter.Instance
};
var result = Proc.Start(arguments);
var line = result.ExitCode != 0
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
return line;
}
git.Checkout(gitRef);
}
}

Expand All @@ -272,3 +246,10 @@ public void Write(Exception e) { }

public void Write(ConsoleOut consoleOut) { }
}

public record CheckoutResult
{
public static string LinkRegistrySnapshotFileName => "link-index.snapshot.json";
public required LinkRegistry LinkRegistrySnapshot { get; init; }
public required IReadOnlyCollection<Checkout> Checkouts { get; init; }
}
Loading