Skip to content

Commit 60309b3

Browse files
committed
Add specialized index commands
1 parent 7c32cc6 commit 60309b3

File tree

12 files changed

+589
-39
lines changed

12 files changed

+589
-39
lines changed

src/Elastic.Documentation.Configuration/DocumentationEndpoints.cs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,45 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5+
using System.Security.Cryptography.X509Certificates;
6+
57
namespace Elastic.Documentation.Configuration;
68

7-
public record DocumentationEndpoints
9+
public class DocumentationEndpoints
810
{
911
public required ElasticsearchEndpoint Elasticsearch { get; init; }
1012
}
1113

12-
public record ElasticsearchEndpoint
14+
public class ElasticsearchEndpoint
1315
{
14-
public static ElasticsearchEndpoint Default { get; } = new ElasticsearchEndpoint { Uri = new Uri("https://localhost:9200") };
16+
public static ElasticsearchEndpoint Default { get; } = new() { Uri = new Uri("https://localhost:9200") };
17+
18+
public required Uri Uri { get; set; }
19+
public string? Username { get; set; }
20+
public string? Password { get; set; }
21+
public string? ApiKey { get; set; }
22+
23+
// inference options
24+
public int SearchNumThreads { get; set; } = 8;
25+
public int IndexNumThreads { get; set; } = 8;
26+
27+
// index options
28+
public string IndexNamePrefix { get; set; } = "semantic-docs";
29+
30+
// channel buffer options
31+
public int BufferSize { get; set; } = 100;
32+
public int MaxRetries { get; set; } = 3;
33+
34+
35+
// connection options
36+
public bool DebugMode { get; set; }
37+
public string? CertificateFingerprint { get; set; }
38+
public string? ProxyAddress { get; set; }
39+
public string? ProxyPassword { get; set; }
40+
public string? ProxyUsername { get; set; }
1541

16-
public required Uri Uri { get; init; }
17-
public string? Username { get; init; }
18-
public string? Password { get; init; }
19-
public string? ApiKey { get; init; }
42+
public bool DisableSslVerification { get; set; }
43+
public X509Certificate? Certificate { get; set; }
44+
public bool CertificateIsNotRoot { get; set; }
45+
public int? BootstrapTimeout { get; set; }
2046
}

src/Elastic.Documentation/Exporter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public enum Exporter
1111
Html,
1212
LLMText,
1313
Elasticsearch,
14-
SemanticElasticsearch,
14+
ElasticsearchNoSemantic,
1515
Configuration,
1616
DocumentationState,
1717
LinkMetadata,

src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
using System.IO.Abstractions;
66
using Elastic.Channels;
77
using Elastic.Documentation.Configuration;
8-
using Elastic.Documentation.Configuration.Assembler;
98
using Elastic.Documentation.Diagnostics;
109
using Elastic.Documentation.Search;
1110
using Elastic.Documentation.Serialization;
@@ -20,23 +19,23 @@
2019

2120
namespace Elastic.Markdown.Exporters;
2221

23-
public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, IDiagnosticsCollector collector, DocumentationEndpoints endpoints)
22+
public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, IDiagnosticsCollector collector, string indexNamespace, DocumentationEndpoints endpoints)
2423
: ElasticsearchMarkdownExporterBase<CatalogIndexChannelOptions<DocumentationDocument>, CatalogIndexChannel<DocumentationDocument>>
2524
(logFactory, collector, endpoints)
2625
{
2726
/// <inheritdoc />
2827
protected override CatalogIndexChannelOptions<DocumentationDocument> NewOptions(DistributedTransport transport) => new(transport)
2928
{
3029
GetMapping = () => CreateMapping(null),
31-
IndexFormat = "documentation{0:yyyy.MM.dd.HHmmss}",
32-
ActiveSearchAlias = "documentation"
30+
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
31+
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
3332
};
3433

3534
/// <inheritdoc />
3635
protected override CatalogIndexChannel<DocumentationDocument> NewChannel(CatalogIndexChannelOptions<DocumentationDocument> options) => new(options);
3736
}
3837

39-
public class ElasticsearchMarkdownSemanticExporter(PublishEnvironment environment, ILoggerFactory logFactory, IDiagnosticsCollector collector, DocumentationEndpoints endpoints)
38+
public class ElasticsearchMarkdownSemanticExporter(ILoggerFactory logFactory, IDiagnosticsCollector collector, string indexNamespace, DocumentationEndpoints endpoints)
4039
: ElasticsearchMarkdownExporterBase<SemanticIndexChannelOptions<DocumentationDocument>, SemanticIndexChannel<DocumentationDocument>>
4140
(logFactory, collector, endpoints)
4241
{
@@ -45,20 +44,23 @@ public class ElasticsearchMarkdownSemanticExporter(PublishEnvironment environmen
4544
{
4645
GetMapping = (inferenceId, _) => CreateMapping(inferenceId),
4746
GetMappingSettings = (_, _) => CreateMappingSetting(),
48-
IndexFormat = $"semantic-docs-{environment.Name}-{{0:yyyy.MM.dd.HHmmss}}",
49-
ActiveSearchAlias = $"semantic-docs-{environment.Name}",
50-
IndexNumThreads = IndexNumThreads,
51-
InferenceCreateTimeout = TimeSpan.FromMinutes(4)
47+
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
48+
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
49+
IndexNumThreads = Endpoint.IndexNumThreads,
50+
SearchNumThreads = Endpoint.SearchNumThreads,
51+
InferenceCreateTimeout = TimeSpan.FromMinutes(Endpoint.BootstrapTimeout ?? 4)
5252
};
5353

5454
/// <inheritdoc />
5555
protected override SemanticIndexChannel<DocumentationDocument> NewChannel(SemanticIndexChannelOptions<DocumentationDocument> options) => new(options);
5656
}
5757

58+
5859
public abstract class ElasticsearchMarkdownExporterBase<TChannelOptions, TChannel>(
5960
ILoggerFactory logFactory,
6061
IDiagnosticsCollector collector,
61-
DocumentationEndpoints endpoints)
62+
DocumentationEndpoints endpoints
63+
)
6264
: IMarkdownExporter, IDisposable
6365
where TChannelOptions : CatalogIndexChannelOptionsBase<DocumentationDocument>
6466
where TChannel : CatalogIndexChannel<DocumentationDocument, TChannelOptions>
@@ -69,7 +71,7 @@ public abstract class ElasticsearchMarkdownExporterBase<TChannelOptions, TChanne
6971
protected abstract TChannelOptions NewOptions(DistributedTransport transport);
7072
protected abstract TChannel NewChannel(TChannelOptions options);
7173

72-
protected int IndexNumThreads => 8;
74+
protected ElasticsearchEndpoint Endpoint { get; } = endpoints.Elasticsearch;
7375

7476
protected static string CreateMappingSetting() =>
7577
// language=json
@@ -97,7 +99,6 @@ protected static string CreateMappingSetting() =>
9799
""";
98100

99101
protected static string CreateMapping(string? inferenceId) =>
100-
// langugage=json
101102
$$"""
102103
{
103104
"properties": {
@@ -131,15 +132,13 @@ protected static string CreateMapping(string? inferenceId) =>
131132
""";
132133

133134
private static string AbstractMapping() =>
134-
// langugage=json
135135
"""
136136
, "abstract": {
137137
"type": "text"
138138
}
139139
""";
140140

141141
private static string InferenceMapping(string inferenceId) =>
142-
// langugage=json
143142
$"""
144143
"type": "semantic_text",
145144
"inference_id": "{inferenceId}"
@@ -159,12 +158,26 @@ public async ValueTask StartAsync(Cancel ctx = default)
159158
return;
160159

161160
var es = endpoints.Elasticsearch;
161+
162162
var configuration = new ElasticsearchConfiguration(es.Uri)
163163
{
164164
Authentication = es.ApiKey is { } apiKey
165165
? new ApiKey(apiKey)
166-
: es.Username is { } username && es.Password is { } password
166+
: es is { Username: { } username, Password: { } password }
167167
? new BasicAuthentication(username, password)
168+
: null,
169+
EnableHttpCompression = true,
170+
DebugMode = Endpoint.DebugMode,
171+
CertificateFingerprint = Endpoint.CertificateFingerprint,
172+
ProxyAddress = Endpoint.ProxyAddress,
173+
ProxyPassword = Endpoint.ProxyPassword,
174+
ProxyUsername = Endpoint.ProxyUsername,
175+
ServerCertificateValidationCallback = Endpoint.DisableSslVerification
176+
? CertificateValidations.AllowAll
177+
: Endpoint.Certificate is { } cert
178+
? Endpoint.CertificateIsNotRoot
179+
? CertificateValidations.AuthorityPartOfChain(cert)
180+
: CertificateValidations.AuthorityIsRoot(cert)
168181
: null
169182
};
170183

@@ -175,9 +188,9 @@ public async ValueTask StartAsync(Cancel ctx = default)
175188
var options = NewOptions(transport);
176189
options.BufferOptions = new BufferOptions
177190
{
178-
OutboundBufferMaxSize = 100,
179-
ExportMaxConcurrency = IndexNumThreads,
180-
ExportMaxRetries = 3
191+
OutboundBufferMaxSize = Endpoint.BufferSize,
192+
ExportMaxConcurrency = Endpoint.IndexNumThreads,
193+
ExportMaxRetries = Endpoint.MaxRetries,
181194
};
182195
options.SerializerContext = SourceGenerationContext.Default;
183196
options.ExportBufferCallback = () => _logger.LogInformation("Exported buffer to Elasticsearch");
@@ -206,7 +219,7 @@ public async ValueTask StopAsync(Cancel ctx = default)
206219
_logger.LogInformation("Applying aliases to {Index}", _channel.IndexName);
207220
var swapped = await _channel.ApplyAliasesAsync(ctx);
208221
if (!swapped)
209-
collector.EmitGlobalError($"{nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {_channel.IndexName}");
222+
collector.EmitGlobalError($"${nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {_channel.IndexName}");
210223
}
211224

212225
public void Dispose()

src/Elastic.Markdown/Exporters/ExporterExtensions.cs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public static IReadOnlyCollection<IMarkdownExporter> CreateMarkdownExporters(
1515
this IReadOnlySet<Exporter> exportOptions,
1616
ILoggerFactory logFactory,
1717
IDocumentationConfigurationContext context,
18-
PublishEnvironment? environment = null
18+
string indexNamespace
1919
)
2020
{
2121
var markdownExporters = new List<IMarkdownExporter>(3);
@@ -24,13 +24,9 @@ public static IReadOnlyCollection<IMarkdownExporter> CreateMarkdownExporters(
2424
if (exportOptions.Contains(Exporter.Configuration))
2525
markdownExporters.Add(new ConfigurationExporter(logFactory, context.ConfigurationFileProvider, context));
2626
if (exportOptions.Contains(Exporter.Elasticsearch))
27-
markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, context.Endpoints));
28-
if (exportOptions.Contains(Exporter.SemanticElasticsearch))
29-
{
30-
if (environment is null)
31-
throw new ArgumentNullException(nameof(environment), "A publish environment is required when using the semantic elasticsearch exporter");
32-
markdownExporters.Add(new ElasticsearchMarkdownSemanticExporter(environment, logFactory, context.Collector, context.Endpoints));
33-
}
27+
markdownExporters.Add(new ElasticsearchMarkdownSemanticExporter(logFactory, context.Collector, indexNamespace, context.Endpoints));
28+
if (exportOptions.Contains(Exporter.ElasticsearchNoSemantic))
29+
markdownExporters.Add(new ElasticsearchMarkdownExporter(logFactory, context.Collector, indexNamespace, context.Endpoints));
3430
return markdownExporters;
3531
}
3632
}

src/services/Elastic.Documentation.Assembler/Building/AssemblerBuilder.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
using Elastic.Documentation.Serialization;
1414
using Elastic.Markdown;
1515
using Elastic.Markdown.Exporters;
16+
using Elastic.Markdown.Helpers;
1617
using Microsoft.Extensions.Logging;
1718

1819
namespace Elastic.Documentation.Assembler.Building;
@@ -40,7 +41,7 @@ public async Task BuildAllAsync(PublishEnvironment environment, FrozenDictionary
4041

4142
var redirects = new Dictionary<string, string>();
4243

43-
var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, environment);
44+
var markdownExporters = exportOptions.CreateMarkdownExporters(logFactory, context, environment.Name);
4445

4546
var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx));
4647
await Task.WhenAll(tasks);
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.IO.Abstractions;
6+
using System.Security.Cryptography.X509Certificates;
7+
using Actions.Core.Services;
8+
using Elastic.Documentation.Assembler.Building;
9+
using Elastic.Documentation.Configuration;
10+
using Elastic.Documentation.Configuration.Assembler;
11+
using Elastic.Documentation.Diagnostics;
12+
using Microsoft.Extensions.Logging;
13+
using static Elastic.Documentation.Exporter;
14+
15+
namespace Elastic.Documentation.Assembler.Indexing;
16+
17+
public class AssemblerIndexService(
18+
ILoggerFactory logFactory,
19+
AssemblyConfiguration assemblyConfiguration,
20+
IConfigurationContext configurationContext,
21+
ICoreService githubActionsService
22+
) : AssemblerBuildService(logFactory, assemblyConfiguration, configurationContext, githubActionsService)
23+
{
24+
private readonly IConfigurationContext _configurationContext = configurationContext;
25+
26+
/// <summary>
27+
/// Index documentation to Elasticsearch, calls `docs-builder assembler build --exporters elasticsearch`. Exposes more options
28+
/// </summary>
29+
/// <param name="collector"></param>
30+
/// <param name="fileSystem"></param>
31+
/// <param name="endpoint">Elasticsearch endpoint, alternatively set env DOCUMENTATION_ELASTIC_URL</param>
32+
/// <param name="environment">The --environment used to clone ends up being part of the index name</param>
33+
/// <param name="apiKey">Elasticsearch API key, alternatively set env DOCUMENTATION_ELASTIC_APIKEY</param>
34+
/// <param name="username">Elasticsearch username (basic auth), alternatively set env DOCUMENTATION_ELASTIC_USERNAME</param>
35+
/// <param name="password">Elasticsearch password (basic auth), alternatively set env DOCUMENTATION_ELASTIC_PASSWORD</param>
36+
/// <param name="noSemantic">Index without semantic fields</param>
37+
/// <param name="searchNumThreads">The number of search threads the inference endpoint should use. Defaults: 8</param>
38+
/// <param name="indexNumThreads">The number of index threads the inference endpoint should use. Defaults: 8</param>
39+
/// <param name="bootstrapTimeout">Timeout in minutes for the inference endpoint creation. Defaults: 4</param>
40+
/// <param name="indexNamePrefix">The prefix for the computed index/alias names. Defaults: semantic-docs</param>
41+
/// <param name="bufferSize">The number of documents to send to ES as part of the bulk. Defaults: 100</param>
42+
/// <param name="maxRetries">The number of times failed bulk items should be retried. Defaults: 3</param>
43+
/// <param name="debugMode">Buffer ES request/responses for better error messages and pass ?pretty to all requests</param>
44+
/// <param name="proxyAddress">Route requests through a proxy server</param>
45+
/// <param name="proxyPassword">Proxy server password</param>
46+
/// <param name="proxyUsername">Proxy server username</param>
47+
/// <param name="disableSslVerification">Disable SSL certificate validation (EXPERT OPTION)</param>
48+
/// <param name="certificateFingerprint">Pass a self-signed certificate fingerprint to validate the SSL connection</param>
49+
/// <param name="certificatePath">Pass a self-signed certificate to validate the SSL connection</param>
50+
/// <param name="certificateNotRoot">If the certificate is not root but only part of the validation chain pass this</param>
51+
/// <param name="ctx"></param>
52+
/// <returns></returns>
53+
public async Task<bool> Index(IDiagnosticsCollector collector,
54+
FileSystem fileSystem,
55+
string? endpoint = null,
56+
string? environment = null,
57+
string? apiKey = null,
58+
string? username = null,
59+
string? password = null,
60+
// inference options
61+
bool? noSemantic = null,
62+
int? searchNumThreads = null,
63+
int? indexNumThreads = null,
64+
int? bootstrapTimeout = null,
65+
// index options
66+
string? indexNamePrefix = null,
67+
// channel buffer options
68+
int? bufferSize = null,
69+
int? maxRetries = null,
70+
// connection options
71+
bool? debugMode = null,
72+
string? proxyAddress = null,
73+
string? proxyPassword = null,
74+
string? proxyUsername = null,
75+
bool? disableSslVerification = null,
76+
string? certificateFingerprint = null,
77+
string? certificatePath = null,
78+
bool? certificateNotRoot = null,
79+
Cancel ctx = default
80+
)
81+
{
82+
var cfg = _configurationContext.Endpoints.Elasticsearch;
83+
if (!string.IsNullOrEmpty(endpoint))
84+
{
85+
if (!Uri.TryCreate(endpoint, UriKind.Absolute, out var uri))
86+
collector.EmitGlobalError($"'{endpoint}' is not a valid URI");
87+
else
88+
cfg.Uri = uri;
89+
}
90+
91+
if (!string.IsNullOrEmpty(apiKey))
92+
cfg.ApiKey = apiKey;
93+
if (!string.IsNullOrEmpty(username))
94+
cfg.Username = username;
95+
if (!string.IsNullOrEmpty(password))
96+
cfg.Password = password;
97+
98+
if (searchNumThreads.HasValue)
99+
cfg.SearchNumThreads = searchNumThreads.Value;
100+
if (indexNumThreads.HasValue)
101+
cfg.IndexNumThreads = indexNumThreads.Value;
102+
if (!string.IsNullOrEmpty(indexNamePrefix))
103+
cfg.IndexNamePrefix = indexNamePrefix;
104+
if (bufferSize.HasValue)
105+
cfg.BufferSize = bufferSize.Value;
106+
if (maxRetries.HasValue)
107+
cfg.MaxRetries = maxRetries.Value;
108+
if (debugMode.HasValue)
109+
cfg.DebugMode = debugMode.Value;
110+
if (!string.IsNullOrEmpty(certificateFingerprint))
111+
cfg.CertificateFingerprint = certificateFingerprint;
112+
if (!string.IsNullOrEmpty(proxyAddress))
113+
cfg.ProxyAddress = proxyAddress;
114+
if (!string.IsNullOrEmpty(proxyPassword))
115+
cfg.ProxyPassword = proxyPassword;
116+
if (!string.IsNullOrEmpty(proxyUsername))
117+
cfg.ProxyUsername = proxyUsername;
118+
if (disableSslVerification.HasValue)
119+
cfg.DisableSslVerification = disableSslVerification.Value;
120+
if (!string.IsNullOrEmpty(certificatePath))
121+
{
122+
if (!fileSystem.File.Exists(certificatePath))
123+
collector.EmitGlobalError($"'{certificatePath}' does not exist");
124+
var bytes = await fileSystem.File.ReadAllBytesAsync(certificatePath, ctx);
125+
var loader = X509CertificateLoader.LoadCertificate(bytes);
126+
cfg.Certificate = loader;
127+
}
128+
129+
if (certificateNotRoot.HasValue)
130+
cfg.CertificateIsNotRoot = certificateNotRoot.Value;
131+
if (bootstrapTimeout.HasValue)
132+
cfg.BootstrapTimeout = bootstrapTimeout.Value;
133+
134+
var exporters = new HashSet<Exporter> { noSemantic.GetValueOrDefault(false) ? ElasticsearchNoSemantic : Elasticsearch };
135+
136+
return await BuildAll(collector, strict: false, environment, metadataOnly: true, exporters, fileSystem, ctx);
137+
}
138+
}

0 commit comments

Comments
 (0)