Skip to content

Commit 6688f0b

Browse files
authored
Ensure Elasticsearch documents have an _id and track content hash for partial updates (#2012)
1 parent 876216b commit 6688f0b

File tree

2 files changed

+38
-20
lines changed

2 files changed

+38
-20
lines changed

src/Elastic.Documentation/Search/DocumentationDocument.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System.Text.Json.Serialization;
66
using Elastic.Documentation.AppliesTo;
7+
using Elastic.Documentation.Extensions;
78

89
namespace Elastic.Documentation.Search;
910

@@ -18,6 +19,14 @@ public record ParentDocument
1819

1920
public record DocumentationDocument
2021
{
22+
// TODO make this required once all doc_sets have published again
23+
[JsonPropertyName("url")]
24+
public string Url { get; set; } = string.Empty;
25+
26+
// TODO make this required once all doc_sets have published again
27+
[JsonPropertyName("hash")]
28+
public string Hash { get; set; } = string.Empty;
29+
2130
[JsonPropertyName("title")]
2231
public string? Title { get; set; }
2332

@@ -30,9 +39,6 @@ public record DocumentationDocument
3039
[JsonPropertyName("links")]
3140
public string[] Links { get; set; } = [];
3241

33-
[JsonPropertyName("url")]
34-
public string? Url { get; set; }
35-
3642
[JsonPropertyName("applies_to")]
3743
public ApplicableTo? Applies { get; set; }
3844

src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using Elastic.Channels;
77
using Elastic.Documentation.Configuration;
88
using Elastic.Documentation.Diagnostics;
9+
using Elastic.Documentation.Extensions;
910
using Elastic.Documentation.Search;
1011
using Elastic.Documentation.Serialization;
1112
using Elastic.Ingest.Elasticsearch;
@@ -26,8 +27,9 @@ public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, IDiagnosti
2627
/// <inheritdoc />
2728
protected override CatalogIndexChannelOptions<DocumentationDocument> NewOptions(DistributedTransport transport) => new(transport)
2829
{
30+
BulkOperationIdLookup = d => d.Url,
2931
GetMapping = () => CreateMapping(null),
30-
GetMappingSettings = () => CreateMappingSetting(),
32+
GetMappingSettings = CreateMappingSetting,
3133
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
3234
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
3335
};
@@ -43,13 +45,14 @@ public class ElasticsearchMarkdownSemanticExporter(ILoggerFactory logFactory, ID
4345
/// <inheritdoc />
4446
protected override SemanticIndexChannelOptions<DocumentationDocument> NewOptions(DistributedTransport transport) => new(transport)
4547
{
48+
BulkOperationIdLookup = d => d.Url,
4649
GetMapping = (inferenceId, _) => CreateMapping(inferenceId),
4750
GetMappingSettings = (_, _) => CreateMappingSetting(),
4851
IndexFormat = $"{Endpoint.IndexNamePrefix.ToLowerInvariant()}-{indexNamespace.ToLowerInvariant()}-{{0:yyyy.MM.dd.HHmmss}}",
4952
ActiveSearchAlias = $"{Endpoint.IndexNamePrefix}-{indexNamespace.ToLowerInvariant()}",
5053
IndexNumThreads = Endpoint.IndexNumThreads,
5154
SearchNumThreads = Endpoint.SearchNumThreads,
52-
InferenceCreateTimeout = TimeSpan.FromMinutes(Endpoint.BootstrapTimeout ?? 4)
55+
InferenceCreateTimeout = TimeSpan.FromMinutes(Endpoint.BootstrapTimeout ?? 4),
5356
};
5457

5558
/// <inheritdoc />
@@ -86,14 +89,21 @@ protected static string CreateMappingSetting() =>
8689
"lowercase",
8790
"synonyms_filter"
8891
]
89-
}
92+
},
93+
"hierarchy_analyzer": { "tokenizer": "path_tokenizer" }
9094
},
9195
"filter": {
9296
"synonyms_filter": {
9397
"type": "synonym",
9498
"synonyms_set": "docs",
9599
"updateable": true
96100
}
101+
},
102+
"tokenizer": {
103+
"path_tokenizer": {
104+
"type": "path_hierarchy",
105+
"delimiter": "/"
106+
}
97107
}
98108
}
99109
}
@@ -103,22 +113,22 @@ protected static string CreateMapping(string? inferenceId) =>
103113
$$"""
104114
{
105115
"properties": {
106-
"title": {
107-
"type": "text",
108-
"search_analyzer": "synonyms_analyzer",
116+
"url" : {
117+
"type": "keyword",
109118
"fields": {
110-
"keyword": {
111-
"type": "keyword"
112-
}
113-
{{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}}
119+
"match": { "type": "text" },
120+
"prefix": { "type": "text", "analyzer" : "hierarchy_analyzer" }
114121
}
115122
},
116-
"url": {
123+
"hash" : { "type" : "keyword" },
124+
"title": {
117125
"type": "text",
126+
"search_analyzer": "synonyms_analyzer",
118127
"fields": {
119128
"keyword": {
120129
"type": "keyword"
121130
}
131+
{{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}}
122132
}
123133
},
124134
"url_segment_count": {
@@ -275,24 +285,26 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
275285
.Where(text => !string.IsNullOrEmpty(text))
276286
.ToArray();
277287

288+
var @abstract = !string.IsNullOrEmpty(body)
289+
? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings)
290+
: string.Empty;
291+
278292
var doc = new DocumentationDocument
279293
{
280-
Title = file.Title,
281294
Url = url,
295+
Hash = ShortId.Create(url, body),
296+
Title = file.Title,
282297
Body = body,
283298
Description = fileContext.SourceFile.YamlFrontMatter?.Description,
284-
285-
Abstract = !string.IsNullOrEmpty(body)
286-
? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings)
287-
: string.Empty,
299+
Abstract = @abstract,
288300
Applies = fileContext.SourceFile.YamlFrontMatter?.AppliesTo,
289301
UrlSegmentCount = url.Split('/', StringSplitOptions.RemoveEmptyEntries).Length,
290302
Parents = navigation.GetParentsOfMarkdownFile(file).Select(i => new ParentDocument
291303
{
292304
Title = i.NavigationTitle,
293305
Url = i.Url
294306
}).Reverse().ToArray(),
295-
Headings = headings
307+
Headings = headings,
296308
};
297309
return await TryWrite(doc, ctx);
298310
}

0 commit comments

Comments
 (0)