Skip to content

Commit dde17f9

Browse files
committed
Use search highlighting
1 parent abb1c81 commit dde17f9

File tree

14 files changed

+226
-193
lines changed

14 files changed

+226
-193
lines changed

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/SearchResults.tsx

Lines changed: 81 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ import {
1414
} from '@elastic/eui'
1515
import { css } from '@emotion/react'
1616
import { useDebounce } from '@uidotdev/usehooks'
17-
import * as React from 'react'
18-
import { useEffect, useMemo, useState } from 'react'
17+
import DOMPurify from 'dompurify'
18+
import { useEffect, useMemo, useState, memo } from 'react'
1919

2020
export const SearchResults = () => {
2121
const searchTerm = useSearchTerm()
@@ -103,34 +103,23 @@ interface SearchResultListItemProps {
103103

104104
function SearchResultListItem({ item: result }: SearchResultListItemProps) {
105105
const { euiTheme } = useEuiTheme()
106-
const searchTerm = useSearchTerm()
107-
const highlightSearchTerms = useMemo(
108-
() =>
109-
searchTerm
110-
.toLowerCase()
111-
.split(' ')
112-
.filter((i) => i.length > 1),
113-
[searchTerm]
114-
)
115-
116-
if (highlightSearchTerms.includes('esql')) {
117-
highlightSearchTerms.push('es|ql')
118-
}
119-
120-
if (highlightSearchTerms.includes('dotnet')) {
121-
highlightSearchTerms.push('.net')
122-
}
106+
const titleFontSize = useEuiFontSize('m')
123107
return (
124-
<li>
108+
<li
109+
css={css`
110+
:not(:first-child) {
111+
border-top: 1px dotted ${euiTheme.colors.borderBasePrimary};
112+
}
113+
`}
114+
>
125115
<div
126116
tabIndex={0}
127117
css={css`
128118
display: flex;
129119
align-items: flex-start;
130120
gap: ${euiTheme.size.s};
131121
padding-inline: ${euiTheme.size.s};
132-
padding-block: ${euiTheme.size.xs};
133-
border-radius: ${euiTheme.border.radius.small};
122+
padding-block: ${euiTheme.size.m};
134123
:hover {
135124
background-color: ${euiTheme.colors.backgroundTransparentSubdued};
136125
`}
@@ -148,41 +137,56 @@ function SearchResultListItem({ item: result }: SearchResultListItemProps) {
148137
text-align: left;
149138
`}
150139
>
151-
<EuiLink
152-
tabIndex={-1}
153-
href={result.url}
140+
<Breadcrumbs parents={result.parents} />
141+
<div
154142
css={css`
155-
.euiMark {
156-
background-color: ${euiTheme.colors
157-
.backgroundLightWarning};
158-
font-weight: inherit;
159-
}
143+
padding-block: ${euiTheme.size.xs};
144+
font-size: ${titleFontSize.fontSize};
160145
`}
161146
>
162-
<EuiHighlight
163-
search={highlightSearchTerms}
164-
highlightAll={true}
147+
<EuiLink tabIndex={-1} href={result.url}>
148+
<span>{result.title}</span>
149+
</EuiLink>
150+
</div>
151+
152+
<EuiText size="xs">
153+
<div
154+
css={css`
155+
font-family: ${euiTheme.font.family};
156+
// color: ${euiTheme.colors.textSubdued};
157+
position: relative;
158+
159+
/* 2 lines with ellipsis */
160+
display: -webkit-box;
161+
-webkit-line-clamp: 2;
162+
-webkit-box-orient: vertical;
163+
overflow: hidden;
164+
165+
width: 90%;
166+
167+
mark {
168+
background-color: transparent;
169+
font-weight: ${euiTheme.font.weight.bold};
170+
color: ${euiTheme.colors.ink};
171+
}
172+
`}
165173
>
166-
{result.title}
167-
</EuiHighlight>
168-
</EuiLink>
169-
<Breadcrumbs
170-
parents={result.parents}
171-
highlightSearchTerms={highlightSearchTerms}
172-
/>
174+
{result.highlightedBody ? (
175+
<SanitizedHtmlContent
176+
htmlContent={result.highlightedBody}
177+
/>
178+
) : (
179+
<span>{result.description}</span>
180+
)}
181+
</div>
182+
</EuiText>
173183
</div>
174184
</div>
175185
</li>
176186
)
177187
}
178188

179-
function Breadcrumbs({
180-
parents,
181-
highlightSearchTerms,
182-
}: {
183-
parents: SearchResultItem['parents']
184-
highlightSearchTerms: string[]
185-
}) {
189+
function Breadcrumbs({ parents }: { parents: SearchResultItem['parents'] }) {
186190
const { euiTheme } = useEuiTheme()
187191
const { fontSize: smallFontsize } = useEuiFontSize('xs')
188192
return (
@@ -224,16 +228,40 @@ function Breadcrumbs({
224228
}
225229
`}
226230
>
227-
<EuiHighlight
228-
search={highlightSearchTerms}
229-
highlightAll={true}
230-
>
231-
{parent.title}
232-
</EuiHighlight>
231+
{parent.title}
233232
</EuiText>
234233
</EuiLink>
235234
</li>
236235
))}
237236
</ul>
238237
)
239238
}
239+
240+
const SanitizedHtmlContent = memo(
241+
({ htmlContent }: { htmlContent: string }) => {
242+
const processed = useMemo(() => {
243+
if (!htmlContent) return ''
244+
245+
const sanitized = DOMPurify.sanitize(htmlContent, {
246+
ALLOWED_TAGS: ['mark'],
247+
ALLOWED_ATTR: [],
248+
KEEP_CONTENT: true
249+
})
250+
251+
// Check if text starts mid-sentence (lowercase first letter)
252+
const temp = document.createElement('div')
253+
temp.innerHTML = sanitized
254+
const text = temp.textContent || ''
255+
const firstChar = text.trim()[0]
256+
257+
// Add leading ellipsis if starts with lowercase
258+
if (firstChar && /[a-z]/.test(firstChar)) {
259+
return '… ' + sanitized
260+
}
261+
262+
return sanitized
263+
}, [htmlContent])
264+
265+
return <div dangerouslySetInnerHTML={{ __html: processed }} />
266+
}
267+
)

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const SearchResultItem = z.object({
1313
description: z.string(),
1414
score: z.number(),
1515
parents: z.array(SearchResultItemParent),
16+
highlightedBody: z.string().nullish(),
1617
})
1718

1819
export type SearchResultItem = z.infer<typeof SearchResultItem>

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/SearchOrAskAiModal.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ export const SearchOrAskAiModal = () => {
4848
css={css`
4949
flex-grow: 1;
5050
overflow-y: scroll;
51-
max-height: 80vh;
51+
max-height: 70vh;
5252
${useEuiOverflowScroll('y')}
5353
`}
5454
>

src/Elastic.Documentation.Site/package-lock.json

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Elastic.Documentation.Site/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"@tanstack/react-query": "^5.87.4",
9090
"@uidotdev/usehooks": "2.4.1",
9191
"clipboard": "2.0.11",
92+
"dompurify": "3.2.7",
9293
"highlight.js": "11.11.1",
9394
"htmx-ext-head-support": "2.0.4",
9495
"htmx-ext-preload": "2.1.1",

src/Elastic.Documentation/Search/DocumentationDocument.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ public record DocumentationDocument
3939
[JsonPropertyName("body")]
4040
public string? Body { get; set; }
4141

42+
// Stripped body is the body with markdown removed, suitable for search indexing
43+
[JsonPropertyName("stripped_body")]
44+
public string? StrippedBody { get; set; }
45+
4246
[JsonPropertyName("url_segment_count")]
4347
public int? UrlSegmentCount { get; set; }
4448

src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
using Elastic.Ingest.Elasticsearch;
1212
using Elastic.Ingest.Elasticsearch.Catalog;
1313
using Elastic.Ingest.Elasticsearch.Semantic;
14+
using Elastic.Markdown.Helpers;
1415
using Elastic.Markdown.IO;
1516
using Elastic.Transport;
1617
using Elastic.Transport.Products.Elasticsearch;
@@ -86,13 +87,24 @@ protected static string CreateMappingSetting() =>
8687
"lowercase",
8788
"synonyms_filter"
8889
]
90+
},
91+
"highlight_analyzer": {
92+
"tokenizer": "standard",
93+
"filter": [
94+
"lowercase",
95+
"english_stop"
96+
]
8997
}
9098
},
9199
"filter": {
92100
"synonyms_filter": {
93101
"type": "synonym",
94102
"synonyms_set": "docs",
95103
"updateable": true
104+
},
105+
"english_stop": {
106+
"type": "stop",
107+
"stopwords": "_english_"
96108
}
97109
}
98110
}
@@ -126,6 +138,11 @@ protected static string CreateMapping(string? inferenceId) =>
126138
},
127139
"body": {
128140
"type": "text"
141+
},
142+
"stripped_body": {
143+
"type": "text",
144+
"search_analyzer": "highlight_analyzer",
145+
"term_vector": "with_positions_offsets"
129146
}
130147
{{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}}
131148
}
@@ -267,11 +284,16 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
267284

268285
IPositionalNavigation navigation = fileContext.DocumentationSet;
269286

270-
//use LLM text if it was already provided (because we run with both llm and elasticsearch output)
271-
var body = fileContext.LLMText ??= LlmMarkdownExporter.ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext);
287+
// Remove the first h1 because we already have the title
288+
// and we don't want it to appear in the body
289+
var h1 = fileContext.Document.Descendants<HeadingBlock>().FirstOrDefault(h => h.Level == 1);
290+
if (h1 is not null)
291+
_ = fileContext.Document.Remove(h1);
292+
293+
var body = LlmMarkdownExporter.ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext);
272294

273295
var headings = fileContext.Document.Descendants<HeadingBlock>()
274-
.Select(h => h.GetData("header") as string ?? string.Empty)
296+
.Select(h => h.GetData("header") as string ?? string.Empty) // TODO: this
275297
.Where(text => !string.IsNullOrEmpty(text))
276298
.ToArray();
277299

@@ -280,8 +302,8 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
280302
Title = file.Title,
281303
Url = url,
282304
Body = body,
305+
StrippedBody = body.StripMarkdown(),
283306
Description = fileContext.SourceFile.YamlFrontMatter?.Description,
284-
285307
Abstract = !string.IsNullOrEmpty(body)
286308
? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings)
287309
: string.Empty,
@@ -306,4 +328,3 @@ public async ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Canc
306328
return await _channel.RefreshAsync(ctx);
307329
}
308330
}
309-

src/Elastic.Markdown/Exporters/IMarkdownExporter.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ public record MarkdownExportFileContext
1717
public required MarkdownDocument Document { get; init; }
1818
public required MarkdownFile SourceFile { get; init; }
1919
public required IFileInfo DefaultOutputFile { get; init; }
20-
public string? LLMText { get; set; }
2120
public required DocumentationSet DocumentationSet { get; init; }
2221
}
2322

src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,26 @@ public class LlmMarkdownExporter : IMarkdownExporter
1919
{
2020
private const string LlmsTxtTemplate = """
2121
# Elastic Documentation
22-
22+
2323
> Elastic provides an open source search, analytics, and AI platform, and out-of-the-box solutions for observability and security. The Search AI platform combines the power of search and generative AI to provide near real-time search and analysis with relevance to reduce your time to value.
2424
>
2525
>Elastic offers the following solutions or types of projects:
2626
>
27-
>* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities.
27+
>* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities.
2828
>* [Elastic Observability](https://www.elastic.co/docs/solutions/observability): Gain comprehensive visibility into applications, infrastructure, and user experience through logs, metrics, traces, and other telemetry data, all in a single interface.
2929
>* [Elastic Security](https://www.elastic.co/docs/solutions/security): Combine SIEM, endpoint security, and cloud security to provide comprehensive tools for threat detection and prevention, investigation, and response.
30-
30+
3131
The documentation is organized to guide you through your journey with Elastic, from learning the basics to deploying and managing complex solutions. Here is a detailed breakdown of the documentation structure:
32-
33-
* [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation.
34-
* [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security.
35-
* [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data.
36-
* [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting.
37-
* [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks.
38-
* [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords.
39-
* [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems.
40-
* [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations.
41-
* [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release.
32+
33+
* [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation.
34+
* [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security.
35+
* [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data.
36+
* [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting.
37+
* [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks.
38+
* [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords.
39+
* [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems.
40+
* [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations.
41+
* [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release.
4242
* [**Reference**](https://www.elastic.co/docs/reference): Reference material for core tasks and manuals for optional products.
4343
""";
4444

src/Elastic.Markdown/Helpers/Markdown.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5+
using System.Text.RegularExpressions;
6+
57
namespace Elastic.Markdown.Helpers;
68

7-
public static class MarkdownStringExtensions
9+
public static partial class MarkdownStringExtensions
810
{
911
public static string StripMarkdown(this string markdown)
1012
{

0 commit comments

Comments
 (0)