Skip to content

Commit ab987b0

Browse files
committed
Assembler: clone-all from link-index registry
1 parent da2667b commit ab987b0

File tree

3 files changed

+129
-87
lines changed

3 files changed

+129
-87
lines changed

src/Elastic.Documentation.Configuration/Assembler/Repository.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5+
using System.Runtime.Serialization;
56
using YamlDotNet.Serialization;
67

78
namespace Elastic.Documentation.Configuration.Assembler;
@@ -12,6 +13,14 @@ public record NarrativeRepository : Repository
1213
public override string Name { get; set; } = RepositoryName;
1314
}
1415

16+
public enum CheckoutStrategy
17+
{
18+
[EnumMember(Value = "partial")]
19+
Partial,
20+
[EnumMember(Value = "full")]
21+
Full
22+
}
23+
1524
public record Repository
1625
{
1726
[YamlIgnore]
@@ -27,7 +36,7 @@ public record Repository
2736
public string GitReferenceNext { get; set; } = "main";
2837

2938
[YamlMember(Alias = "checkout_strategy")]
30-
public string CheckoutStrategy { get; set; } = "partial";
39+
public CheckoutStrategy CheckoutStrategy { get; set; } = CheckoutStrategy.Partial;
3140

3241
[YamlMember(Alias = "skip")]
3342
public bool Skip { get; set; }

src/tooling/docs-assembler/Cli/RepositoryCommands.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
// See the LICENSE file in the project root for more information
44

55
using System.Collections.Concurrent;
6+
using System.ComponentModel;
67
using System.Diagnostics;
78
using System.Diagnostics.CodeAnalysis;
9+
using System.Globalization;
810
using System.IO.Abstractions;
911
using System.Net.Mime;
1012
using Actions.Core.Services;
@@ -39,11 +41,13 @@ private void AssignOutputLogger()
3941
/// <summary> Clones all repositories </summary>
4042
/// <param name="strict"> Treat warnings as errors and fail the build on warnings</param>
4143
/// <param name="environment"> The environment to build</param>
44+
/// <param name="fetchLatest"> If true fetch the latest commit of the branch instead of the link registry entry ref</param>
4245
/// <param name="ctx"></param>
4346
[Command("clone-all")]
4447
public async Task<int> CloneAll(
4548
bool? strict = null,
4649
string? environment = null,
50+
bool? fetchLatest = null,
4751
Cancel ctx = default
4852
)
4953
{
@@ -55,7 +59,8 @@ public async Task<int> CloneAll(
5559

5660
var assembleContext = new AssembleContext(environment, collector, new FileSystem(), new FileSystem(), null, null);
5761
var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
58-
_ = await cloner.AcquireAllLatest(ctx);
62+
63+
_ = await cloner.CloneAll(fetchLatest ?? false, ctx);
5964

6065
await collector.StopAsync(ctx);
6166

@@ -138,7 +143,6 @@ public async Task<int> UpdateLinkIndexAll(ContentSource contentSource, Cancel ct
138143
// It's only used to get the list of repositories.
139144
var assembleContext = new AssembleContext("prod", collector, new FileSystem(), new FileSystem(), null, null);
140145
var cloner = new RepositorySourcer(logger, assembleContext.CheckoutDirectory, new FileSystem(), collector);
141-
var dict = new ConcurrentDictionary<string, Stopwatch>();
142146
var repositories = new Dictionary<string, Repository>(assembleContext.Configuration.ReferenceRepositories)
143147
{
144148
{ NarrativeRepository.RepositoryName, assembleContext.Configuration.Narrative }
@@ -152,8 +156,7 @@ await Parallel.ForEachAsync(repositories,
152156
{
153157
try
154158
{
155-
var name = kv.Key.Trim();
156-
var checkout = cloner.CloneOrUpdateRepository(kv.Value, name, kv.Value.GetBranch(contentSource), dict);
159+
var checkout = cloner.CloneRef(kv.Value, kv.Value.GetBranch(contentSource), true);
157160
var outputPath = Directory.CreateTempSubdirectory(checkout.Repository.Name).FullName;
158161
var context = new BuildContext(
159162
collector,

src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs

Lines changed: 112 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.IO.Abstractions;
99
using Elastic.Documentation.Configuration.Assembler;
1010
using Elastic.Documentation.Diagnostics;
11+
using Elastic.Documentation.LinkIndex;
1112
using Elastic.Markdown.IO;
1213
using Microsoft.Extensions.Logging;
1314
using ProcNet;
@@ -46,128 +47,158 @@ public IReadOnlyCollection<Checkout> GetAll()
4647
return checkouts;
4748
}
4849

49-
public async Task<IReadOnlyCollection<Checkout>> AcquireAllLatest(Cancel ctx = default)
50+
public async Task<IReadOnlyCollection<Checkout>> CloneAll(bool fetchLatest, Cancel ctx = default)
5051
{
51-
_logger.LogInformation(
52-
"Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
52+
_logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
5353
PublishEnvironment.Name,
5454
PublishEnvironment.ContentSource.ToStringFast(true)
5555
);
56+
var checkouts = new ConcurrentBag<Checkout>();
57+
58+
ILinkIndexReader linkIndexReader = Aws3LinkIndexReader.CreateAnonymous();
59+
var linkRegistry = await linkIndexReader.GetRegistry(ctx);
5660

5761
var repositories = new Dictionary<string, Repository>(Configuration.ReferenceRepositories)
5862
{
5963
{ NarrativeRepository.RepositoryName, Configuration.Narrative }
6064
};
61-
return await RepositorySourcer.AcquireAllLatest(repositories, PublishEnvironment.ContentSource, ctx);
62-
}
63-
}
64-
65-
public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
66-
{
67-
private readonly ILogger<RepositorySourcer> _logger = logger.CreateLogger<RepositorySourcer>();
6865

69-
public async Task<IReadOnlyCollection<Checkout>> AcquireAllLatest(Dictionary<string, Repository> repositories, ContentSource source, Cancel ctx = default)
70-
{
71-
var dict = new ConcurrentDictionary<string, Stopwatch>();
72-
var checkouts = new ConcurrentBag<Checkout>();
7366
await Parallel.ForEachAsync(repositories,
7467
new ParallelOptions
7568
{
7669
CancellationToken = ctx,
7770
MaxDegreeOfParallelism = Environment.ProcessorCount
78-
}, async (kv, c) =>
71+
}, async (repo, c) =>
7972
{
8073
await Task.Run(() =>
8174
{
82-
var name = kv.Key.Trim();
83-
var repo = kv.Value;
84-
var clone = CloneOrUpdateRepository(kv.Value, name, repo.GetBranch(source), dict);
85-
checkouts.Add(clone);
75+
if (!linkRegistry.Repositories.TryGetValue(repo.Key, out var entry))
76+
{
77+
context.Collector.EmitError("", $"'{repo.Key}' does not exist in link index");
78+
return;
79+
}
80+
var branch = repo.Value.GetBranch(PublishEnvironment.ContentSource);
81+
var gitRef = branch;
82+
if (!fetchLatest)
83+
{
84+
if (!entry.TryGetValue(branch, out var entryInfo))
85+
{
86+
context.Collector.EmitError("", $"'{repo.Key}' does not have a '{branch}' entry in link index");
87+
return;
88+
}
89+
gitRef = entryInfo.GitReference;
90+
}
91+
checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
8692
}, c);
8793
}).ConfigureAwait(false);
88-
89-
return checkouts.ToList().AsReadOnly();
94+
return checkouts;
9095
}
96+
}
9197

92-
public Checkout CloneOrUpdateRepository(Repository repository, string name, string branch, ConcurrentDictionary<string, Stopwatch> dict)
93-
{
94-
var fs = readFileSystem;
95-
var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, name));
96-
var relativePath = Path.GetRelativePath(Paths.WorkingDirectoryRoot.FullName, checkoutFolder.FullName);
97-
var sw = Stopwatch.StartNew();
9898

99-
_ = dict.AddOrUpdate($"{name} ({branch})", sw, (_, _) => sw);
99+
public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
100+
{
101+
private readonly ILogger<RepositorySourcer> _logger = logger.CreateLogger<RepositorySourcer>();
100102

101-
string? head;
102-
if (checkoutFolder.Exists)
103+
// <summary>
104+
// Clones the repository to the checkout directory and checks out the specified git reference.
105+
// </summary>
106+
// <param name="repository">The repository to clone.</param>
107+
// <param name="gitRef">The git reference to check out. Branch, commit or tag</param>
108+
public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
109+
{
110+
var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
111+
if (attempt > 3)
103112
{
104-
if (!TryUpdateSource(name, branch, relativePath, checkoutFolder, out head))
105-
head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
113+
collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
114+
return new Checkout
115+
{
116+
Directory = checkoutFolder,
117+
HeadReference = gitRef,
118+
Repository = repository,
119+
};
106120
}
107-
else
108-
head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
109-
110-
sw.Stop();
111-
112-
return new Checkout
121+
_logger.LogInformation("{RepositoryName}: Cloning repository {RepositoryName}@{Commit} to {CheckoutFolder}", repository.Name, repository.Name, gitRef,
122+
checkoutFolder.FullName);
123+
if (!checkoutFolder.Exists)
113124
{
114-
Repository = repository,
115-
Directory = checkoutFolder,
116-
HeadReference = head
117-
};
118-
}
119-
120-
private bool TryUpdateSource(string name, string branch, string relativePath, IDirectoryInfo checkoutFolder, [NotNullWhen(true)] out string? head)
121-
{
122-
head = null;
123-
try
124-
{
125-
_logger.LogInformation("Pull: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
126-
// --allow-unrelated-histories due to shallow clones not finding a common ancestor
127-
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff");
125+
checkoutFolder.Create();
126+
checkoutFolder.Refresh();
128127
}
129-
catch (Exception e)
128+
var isGitInitialized = GitInit(repository, checkoutFolder);
129+
string? head = null;
130+
if (isGitInitialized)
130131
{
131-
_logger.LogError(e, "Failed to update {Name} from {RelativePath}, falling back to recreating from scratch", name, relativePath);
132-
if (checkoutFolder.Exists)
132+
try
133+
{
134+
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
135+
}
136+
catch (Exception e)
133137
{
138+
_logger.LogError(e, "{RepositoryName}: Failed to acquire current commit, falling back to recreating from scratch", repository.Name);
134139
checkoutFolder.Delete(true);
135140
checkoutFolder.Refresh();
141+
return CloneRef(repository, gitRef, pull, attempt + 1);
136142
}
137-
return false;
138143
}
139144

140-
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
145+
if (head != null && head == gitRef)
146+
_logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
147+
else
148+
{
149+
FetchAndCheckout(repository, gitRef, checkoutFolder);
150+
if (!pull)
151+
{
152+
return new Checkout
153+
{
154+
Directory = checkoutFolder,
155+
HeadReference = gitRef,
156+
Repository = repository,
157+
};
158+
}
159+
try
160+
{
161+
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
162+
}
163+
catch (Exception e)
164+
{
165+
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
166+
repository.Name, gitRef, checkoutFolder.FullName);
167+
checkoutFolder.Delete(true);
168+
checkoutFolder.Refresh();
169+
return CloneRef(repository, gitRef, pull, attempt + 1);
170+
}
171+
}
141172

142-
return true;
173+
return new Checkout
174+
{
175+
Directory = checkoutFolder,
176+
HeadReference = gitRef,
177+
Repository = repository,
178+
};
143179
}
144180

145-
private string CheckoutFromScratch(Repository repository, string name, string branch, string relativePath, IDirectoryInfo checkoutFolder)
181+
/// <summary>
182+
/// Initializes the git repository if it is not already initialized.
183+
/// Returns true if the repository was already initialized.
184+
/// </summary>
185+
private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
146186
{
147-
_logger.LogInformation("Checkout: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
148-
switch (repository.CheckoutStrategy)
149-
{
150-
case "full":
151-
Exec("git", "clone", repository.Origin, checkoutFolder.FullName,
152-
"--depth", "1", "--single-branch",
153-
"--branch", branch
154-
);
155-
break;
156-
case "partial":
157-
Exec(
158-
"git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName
159-
);
160-
161-
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone");
162-
ExecIn(checkoutFolder, "git", "checkout", branch);
163-
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
164-
break;
165-
}
166-
167-
return Capture(checkoutFolder, "git", "rev-parse", "HEAD");
187+
var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
188+
if (isGitAlreadyInitialized)
189+
return true;
190+
ExecIn(checkoutFolder, "git", "init");
191+
ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
192+
return false;
168193
}
169194

170-
private void Exec(string binary, params string[] args) => ExecIn(null, binary, args);
195+
private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
196+
{
197+
ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
198+
if (repository.CheckoutStrategy == CheckoutStrategy.Partial)
199+
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
200+
ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
201+
}
171202

172203
private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
173204
{
@@ -221,7 +252,6 @@ string CaptureOutput()
221252
return line;
222253
}
223254
}
224-
225255
}
226256

227257
public class NoopConsoleWriter : IConsoleOutWriter

0 commit comments

Comments
 (0)