diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs
index 28c23ee3f..25660a76e 100644
--- a/src/Elastic.Documentation/Search/DocumentationDocument.cs
+++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs
@@ -4,6 +4,7 @@
using System.Text.Json.Serialization;
using Elastic.Documentation.AppliesTo;
+using Elastic.Documentation.Extensions;
namespace Elastic.Documentation.Search;
@@ -18,6 +19,14 @@ public record ParentDocument
public record DocumentationDocument
{
+ // TODO make this required once all doc_sets have published again
+ [JsonPropertyName("url")]
+ public string Url { get; set; } = string.Empty;
+
+ // TODO make this required once all doc_sets have published again
+ [JsonPropertyName("hash")]
+ public string Hash { get; set; } = string.Empty;
+
[JsonPropertyName("title")]
public string? Title { get; set; }
@@ -30,9 +39,6 @@ public record DocumentationDocument
[JsonPropertyName("links")]
public string[] Links { get; set; } = [];
- [JsonPropertyName("url")]
- public string? Url { get; set; }
-
[JsonPropertyName("applies_to")]
public ApplicableTo? Applies { get; set; }
diff --git a/src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs
index b3747212d..b253447f2 100644
--- a/src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs
+++ b/src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs
@@ -6,6 +6,7 @@
using Elastic.Channels;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Diagnostics;
+using Elastic.Documentation.Extensions;
using Elastic.Documentation.Search;
using Elastic.Documentation.Serialization;
using Elastic.Ingest.Elasticsearch;
@@ -26,8 +27,9 @@ public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, IDiagnosti
///
protected override CatalogIndexChannelOptions NewOptions(DistributedTransport transport) => new(transport)
{
+ BulkOperationIdLookup = d => d.Url,
GetMapping = () => CreateMapping(null),
- GetMappingSettings = () => CreateMappingSetting(),
+ GetMappingSettings = CreateMappingSetting,
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
};
@@ -43,13 +45,14 @@ public class ElasticsearchMarkdownSemanticExporter(ILoggerFactory logFactory, ID
///
protected override SemanticIndexChannelOptions NewOptions(DistributedTransport transport) => new(transport)
{
+ BulkOperationIdLookup = d => d.Url,
GetMapping = (inferenceId, _) => CreateMapping(inferenceId),
GetMappingSettings = (_, _) => CreateMappingSetting(),
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
IndexNumThreads = Endpoint.IndexNumThreads,
SearchNumThreads = Endpoint.SearchNumThreads,
- InferenceCreateTimeout = TimeSpan.FromMinutes(Endpoint.BootstrapTimeout ?? 4)
+ InferenceCreateTimeout = TimeSpan.FromMinutes(Endpoint.BootstrapTimeout ?? 4),
};
///
@@ -86,7 +89,8 @@ protected static string CreateMappingSetting() =>
"lowercase",
"synonyms_filter"
]
- }
+ },
+ "hierarchy_analyzer": { "tokenizer": "path_tokenizer" }
},
"filter": {
"synonyms_filter": {
@@ -94,6 +98,12 @@ protected static string CreateMappingSetting() =>
"synonyms_set": "docs",
"updateable": true
}
+ },
+ "tokenizer": {
+ "path_tokenizer": {
+ "type": "path_hierarchy",
+ "delimiter": "/"
+ }
}
}
}
@@ -103,22 +113,22 @@ protected static string CreateMapping(string? inferenceId) =>
$$"""
{
"properties": {
- "title": {
- "type": "text",
- "search_analyzer": "synonyms_analyzer",
+ "url" : {
+ "type": "keyword",
"fields": {
- "keyword": {
- "type": "keyword"
- }
- {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}}
+ "match": { "type": "text" },
+ "prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" }
}
},
- "url": {
+ "hash" : { "type" : "keyword" },
+ "title": {
"type": "text",
+ "search_analyzer": "synonyms_analyzer",
"fields": {
"keyword": {
"type": "keyword"
}
+ {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}}
}
},
"url_segment_count": {
@@ -275,16 +285,18 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext,
.Where(text => !string.IsNullOrEmpty(text))
.ToArray();
+ var @abstract = !string.IsNullOrEmpty(body)
+ ? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings)
+ : string.Empty;
+
var doc = new DocumentationDocument
{
- Title = file.Title,
Url = url,
+ Hash = ShortId.Create(url, body),
+ Title = file.Title,
Body = body,
Description = fileContext.SourceFile.YamlFrontMatter?.Description,
-
- Abstract = !string.IsNullOrEmpty(body)
- ? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings)
- : string.Empty,
+ Abstract = @abstract,
Applies = fileContext.SourceFile.YamlFrontMatter?.AppliesTo,
UrlSegmentCount = url.Split('/', StringSplitOptions.RemoveEmptyEntries).Length,
Parents = navigation.GetParentsOfMarkdownFile(file).Select(i => new ParentDocument
@@ -292,7 +304,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext,
Title = i.NavigationTitle,
Url = i.Url
}).Reverse().ToArray(),
- Headings = headings
+ Headings = headings,
};
return await TryWrite(doc, ctx);
}