Skip to content

Commit 78ae348

Browse files
reakaleekMpdreamz
andauthored
Also write the used link-index to the filesystem and add it as part of the new CheckoutResult record (#1316)
* Also write the used link-index to the filesystem and add it as part of the new CheckoutResult record * Refactor git operations * Fix naming * Refactor writing linkRegistrySnapshot to AssemblerRepositorySourcer method * Fix method signature * Update src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs Co-authored-by: Martijn Laarman <[email protected]> * Use context's FileSystem --------- Co-authored-by: Martijn Laarman <[email protected]>
1 parent 59b71a1 commit 78ae348

File tree

4 files changed

+162
-80
lines changed

4 files changed

+162
-80
lines changed

src/Elastic.Documentation/Links/LinkRegistry.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5+
using System.Diagnostics.CodeAnalysis;
56
using System.Text.Json;
67
using System.Text.Json.Serialization;
78
using Elastic.Documentation.Serialization;

src/tooling/docs-assembler/Cli/RepositoryCommands.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
using Documentation.Assembler.Navigation;
1919
using Documentation.Assembler.Sourcing;
2020
using Elastic.Documentation.Configuration.Assembler;
21+
using Elastic.Documentation.Links;
2122
using Elastic.Documentation.Tooling.Diagnostics.Console;
2223
using Elastic.Markdown;
2324
using Elastic.Markdown.Exporters;
@@ -104,9 +105,10 @@ public async Task<int> BuildAll(
104105
await assembleContext.Collector.StopAsync(ctx);
105106
return 1;
106107
}
107-
108108
var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
109-
var checkouts = cloner.GetAll().ToArray();
109+
var checkoutResult = cloner.GetAll();
110+
var checkouts = checkoutResult.Checkouts.ToArray();
111+
110112
if (checkouts.Length == 0)
111113
throw new Exception("No checkouts found");
112114

@@ -123,6 +125,8 @@ public async Task<int> BuildAll(
123125
var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper);
124126
await builder.BuildAllAsync(assembleSources.AssembleSets, ctx);
125127

128+
await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx);
129+
126130
var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory);
127131
sitemapBuilder.Generate();
128132

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.IO.Abstractions;
6+
using Elastic.Documentation.Diagnostics;
7+
using ProcNet;
8+
9+
namespace Documentation.Assembler.Sourcing;
10+
11+
12+
public interface IGitRepository
13+
{
14+
void Init();
15+
string GetCurrentCommit();
16+
void GitAddOrigin(string origin);
17+
bool IsInitialized();
18+
void Pull(string branch);
19+
void Fetch(string reference);
20+
void EnableSparseCheckout(string folder);
21+
void DisableSparseCheckout();
22+
void Checkout(string reference);
23+
}
24+
25+
// This git repository implementation is optimized for pull and fetching single commits.
26+
// It uses `git pull --depth 1` and `git fetch --depth 1` to minimize the amount of data transferred.
27+
public class SingleCommitOptimizedGitRepository(DiagnosticsCollector collector, IDirectoryInfo workingDirectory) : IGitRepository
28+
{
29+
public string GetCurrentCommit() => Capture("git", "rev-parse", "HEAD");
30+
31+
public void Init() => ExecIn("git", "init");
32+
public bool IsInitialized() => Directory.Exists(Path.Combine(workingDirectory.FullName, ".git"));
33+
public void Pull(string branch) => ExecIn("git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", branch);
34+
public void Fetch(string reference) => ExecIn("git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", reference);
35+
public void EnableSparseCheckout(string folder) => ExecIn("git", "sparse-checkout", "set", folder);
36+
public void DisableSparseCheckout() => ExecIn("git", "sparse-checkout", "disable");
37+
public void Checkout(string reference) => ExecIn("git", "checkout", "--force", reference);
38+
39+
public void GitAddOrigin(string origin) => ExecIn("git", "remote", "add", "origin", origin);
40+
41+
private void ExecIn(string binary, params string[] args)
42+
{
43+
var arguments = new ExecArguments(binary, args)
44+
{
45+
WorkingDirectory = workingDirectory.FullName,
46+
Environment = new Dictionary<string, string>
47+
{
48+
// Disable git editor prompts:
49+
// There are cases where `git pull` would prompt for an editor to write a commit message.
50+
// This env variable prevents that.
51+
{ "GIT_EDITOR", "true" }
52+
},
53+
};
54+
var result = Proc.Exec(arguments);
55+
if (result != 0)
56+
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
57+
}
58+
private string Capture(string binary, params string[] args)
59+
{
60+
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
61+
Exception? e = null;
62+
for (var i = 0; i <= 9; i++)
63+
{
64+
try
65+
{
66+
return CaptureOutput();
67+
}
68+
catch (Exception ex)
69+
{
70+
if (ex is not null)
71+
e = ex;
72+
}
73+
}
74+
75+
if (e is not null)
76+
collector.EmitError("", "failure capturing stdout", e);
77+
78+
return string.Empty;
79+
80+
string CaptureOutput()
81+
{
82+
var arguments = new StartArguments(binary, args)
83+
{
84+
WorkingDirectory = workingDirectory.FullName,
85+
Timeout = TimeSpan.FromSeconds(3),
86+
WaitForExit = TimeSpan.FromSeconds(3),
87+
ConsoleOutWriter = NoopConsoleWriter.Instance
88+
};
89+
var result = Proc.Start(arguments);
90+
var line = result.ExitCode != 0
91+
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
92+
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
93+
return line;
94+
}
95+
}
96+
}

src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs

Lines changed: 59 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33
// See the LICENSE file in the project root for more information
44

55
using System.Collections.Concurrent;
6-
using System.Diagnostics;
7-
using System.Diagnostics.CodeAnalysis;
86
using System.IO.Abstractions;
97
using Elastic.Documentation.Configuration.Assembler;
108
using Elastic.Documentation.Diagnostics;
119
using Elastic.Documentation.LinkIndex;
12-
using Elastic.Markdown.IO;
10+
using Elastic.Documentation.Links;
1311
using Microsoft.Extensions.Logging;
1412
using ProcNet;
1513
using ProcNet.Std;
@@ -25,29 +23,42 @@ public class AssemblerRepositorySourcer(ILoggerFactory logger, AssembleContext c
2523

2624
private RepositorySourcer RepositorySourcer => new(logger, context.CheckoutDirectory, context.ReadFileSystem, context.Collector);
2725

28-
public IReadOnlyCollection<Checkout> GetAll()
26+
public CheckoutResult GetAll()
2927
{
3028
var fs = context.ReadFileSystem;
3129
var repositories = Configuration.ReferenceRepositories.Values.Concat<Repository>([Configuration.Narrative]);
3230
var checkouts = new List<Checkout>();
31+
var linkRegistrySnapshotPath = Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName);
32+
if (!fs.File.Exists(linkRegistrySnapshotPath))
33+
throw new FileNotFoundException("Link-index snapshot not found. Run the clone-all command first.", linkRegistrySnapshotPath);
34+
var linkRegistrySnapshotStr = File.ReadAllText(linkRegistrySnapshotPath);
35+
var linkRegistry = LinkRegistry.Deserialize(linkRegistrySnapshotStr);
3336
foreach (var repo in repositories)
3437
{
3538
var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(context.CheckoutDirectory.FullName, repo.Name));
39+
IGitRepository gitFacade = new SingleCommitOptimizedGitRepository(context.Collector, checkoutFolder);
40+
if (!checkoutFolder.Exists)
41+
{
42+
context.Collector.EmitError(checkoutFolder.FullName, $"'{repo.Name}' does not exist in link index checkout directory");
43+
continue;
44+
}
45+
var head = gitFacade.GetCurrentCommit();
3646
var checkout = new Checkout
3747
{
3848
Repository = repo,
3949
Directory = checkoutFolder,
40-
//TODO read from links.json and ensure we check out exactly that git reference
41-
//+ validate that git reference belongs to the appropriate branch
42-
HeadReference = Guid.NewGuid().ToString("N")
50+
HeadReference = head
4351
};
4452
checkouts.Add(checkout);
4553
}
46-
47-
return checkouts;
54+
return new CheckoutResult
55+
{
56+
Checkouts = checkouts,
57+
LinkRegistrySnapshot = linkRegistry
58+
};
4859
}
4960

50-
public async Task<IReadOnlyCollection<Checkout>> CloneAll(bool fetchLatest, Cancel ctx = default)
61+
public async Task<CheckoutResult> CloneAll(bool fetchLatest, Cancel ctx = default)
5162
{
5263
_logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
5364
PublishEnvironment.Name,
@@ -91,8 +102,23 @@ await Task.Run(() =>
91102
checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
92103
}, c);
93104
}).ConfigureAwait(false);
94-
return checkouts;
105+
await context.WriteFileSystem.File.WriteAllTextAsync(
106+
Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName),
107+
LinkRegistry.Serialize(linkRegistry),
108+
ctx
109+
);
110+
return new CheckoutResult
111+
{
112+
Checkouts = checkouts,
113+
LinkRegistrySnapshot = linkRegistry
114+
};
95115
}
116+
117+
public async Task WriteLinkRegistrySnapshot(LinkRegistry linkRegistrySnapshot, Cancel ctx = default) => await context.WriteFileSystem.File.WriteAllTextAsync(
118+
Path.Combine(context.OutputDirectory.FullName, "docs", CheckoutResult.LinkRegistrySnapshotFileName),
119+
LinkRegistry.Serialize(linkRegistrySnapshot),
120+
ctx
121+
);
96122
}
97123

98124

@@ -108,6 +134,7 @@ public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDir
108134
public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
109135
{
110136
var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
137+
IGitRepository git = new SingleCommitOptimizedGitRepository(collector, checkoutFolder);
111138
if (attempt > 3)
112139
{
113140
collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
@@ -125,13 +152,13 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
125152
checkoutFolder.Create();
126153
checkoutFolder.Refresh();
127154
}
128-
var isGitInitialized = GitInit(repository, checkoutFolder);
155+
var isGitInitialized = GitInit(git, repository);
129156
string? head = null;
130157
if (isGitInitialized)
131158
{
132159
try
133160
{
134-
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
161+
head = git.GetCurrentCommit();
135162
}
136163
catch (Exception e)
137164
{
@@ -147,7 +174,7 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
147174
_logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
148175
else
149176
{
150-
FetchAndCheckout(repository, gitRef, checkoutFolder);
177+
FetchAndCheckout(git, repository, gitRef);
151178
if (!pull)
152179
{
153180
return new Checkout
@@ -159,11 +186,11 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
159186
}
160187
try
161188
{
162-
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
189+
git.Pull(gitRef);
163190
}
164191
catch (Exception e)
165192
{
166-
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
193+
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {Path}, falling back to recreating from scratch",
167194
repository.Name, gitRef, checkoutFolder.FullName);
168195
checkoutFolder.Delete(true);
169196
checkoutFolder.Refresh();
@@ -183,84 +210,31 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
183210
/// Initializes the git repository if it is not already initialized.
184211
/// Returns true if the repository was already initialized.
185212
/// </summary>
186-
private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
213+
private static bool GitInit(IGitRepository git, Repository repository)
187214
{
188-
var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
215+
var isGitAlreadyInitialized = git.IsInitialized();
189216
if (isGitAlreadyInitialized)
190217
return true;
191-
ExecIn(checkoutFolder, "git", "init");
192-
ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
218+
git.Init();
219+
git.GitAddOrigin(repository.Origin);
193220
return false;
194221
}
195222

196-
private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
223+
private static void FetchAndCheckout(IGitRepository git, Repository repository, string gitRef)
197224
{
198-
ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
225+
git.Fetch(gitRef);
199226
switch (repository.CheckoutStrategy)
200227
{
201228
case CheckoutStrategy.Full:
202-
ExecIn(checkoutFolder, "git", "sparse-checkout", "disable");
229+
git.DisableSparseCheckout();
203230
break;
204231
case CheckoutStrategy.Partial:
205-
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
232+
git.EnableSparseCheckout("docs");
206233
break;
207234
default:
208235
throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null);
209236
}
210-
ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
211-
}
212-
213-
private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
214-
{
215-
var arguments = new ExecArguments(binary, args)
216-
{
217-
WorkingDirectory = workingDirectory?.FullName
218-
};
219-
var result = Proc.Exec(arguments);
220-
if (result != 0)
221-
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
222-
}
223-
224-
// ReSharper disable once UnusedMember.Local
225-
private string Capture(IDirectoryInfo? workingDirectory, string binary, params string[] args)
226-
{
227-
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
228-
Exception? e = null;
229-
for (var i = 0; i <= 9; i++)
230-
{
231-
try
232-
{
233-
return CaptureOutput();
234-
}
235-
catch (Exception ex)
236-
{
237-
if (ex is not null)
238-
e = ex;
239-
}
240-
}
241-
242-
if (e is not null)
243-
collector.EmitError("", "failure capturing stdout", e);
244-
245-
246-
return string.Empty;
247-
248-
string CaptureOutput()
249-
{
250-
var arguments = new StartArguments(binary, args)
251-
{
252-
WorkingDirectory = workingDirectory?.FullName,
253-
//WaitForStreamReadersTimeout = TimeSpan.FromSeconds(3),
254-
Timeout = TimeSpan.FromSeconds(3),
255-
WaitForExit = TimeSpan.FromSeconds(3),
256-
ConsoleOutWriter = NoopConsoleWriter.Instance
257-
};
258-
var result = Proc.Start(arguments);
259-
var line = result.ExitCode != 0
260-
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
261-
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
262-
return line;
263-
}
237+
git.Checkout(gitRef);
264238
}
265239
}
266240

@@ -272,3 +246,10 @@ public void Write(Exception e) { }
272246

273247
public void Write(ConsoleOut consoleOut) { }
274248
}
249+
250+
public record CheckoutResult
251+
{
252+
public static string LinkRegistrySnapshotFileName => "link-index.snapshot.json";
253+
public required LinkRegistry LinkRegistrySnapshot { get; init; }
254+
public required IReadOnlyCollection<Checkout> Checkouts { get; init; }
255+
}

0 commit comments

Comments
 (0)