Skip to content

Commit 293a943

Browse files
reakaleekCopilot
andauthored
Add highlighted body text to search results (#2008)
* Use search highlighting * Revert Markdown.cs * Remove DisableDirectStreaming again * Run prettier * Remove unused import * Apply suggestions from code review Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent af355d3 commit 293a943

File tree

13 files changed

+220
-191
lines changed

13 files changed

+220
-191
lines changed

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/SearchResults.tsx

Lines changed: 80 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import { useSearchTerm } from '../search.store'
22
import { SearchResultItem, useSearchQuery } from './useSearchQuery'
33
import {
44
useEuiFontSize,
5-
EuiHighlight,
65
EuiLink,
76
EuiLoadingSpinner,
87
EuiSpacer,
@@ -14,8 +13,8 @@ import {
1413
} from '@elastic/eui'
1514
import { css } from '@emotion/react'
1615
import { useDebounce } from '@uidotdev/usehooks'
17-
import * as React from 'react'
18-
import { useEffect, useMemo, useState } from 'react'
16+
import DOMPurify from 'dompurify'
17+
import { useEffect, useMemo, useState, memo } from 'react'
1918

2019
export const SearchResults = () => {
2120
const searchTerm = useSearchTerm()
@@ -103,34 +102,23 @@ interface SearchResultListItemProps {
103102

104103
function SearchResultListItem({ item: result }: SearchResultListItemProps) {
105104
const { euiTheme } = useEuiTheme()
106-
const searchTerm = useSearchTerm()
107-
const highlightSearchTerms = useMemo(
108-
() =>
109-
searchTerm
110-
.toLowerCase()
111-
.split(' ')
112-
.filter((i) => i.length > 1),
113-
[searchTerm]
114-
)
115-
116-
if (highlightSearchTerms.includes('esql')) {
117-
highlightSearchTerms.push('es|ql')
118-
}
119-
120-
if (highlightSearchTerms.includes('dotnet')) {
121-
highlightSearchTerms.push('.net')
122-
}
105+
const titleFontSize = useEuiFontSize('m')
123106
return (
124-
<li>
107+
<li
108+
css={css`
109+
:not(:first-child) {
110+
border-top: 1px dotted ${euiTheme.colors.borderBasePrimary};
111+
}
112+
`}
113+
>
125114
<div
126115
tabIndex={0}
127116
css={css`
128117
display: flex;
129118
align-items: flex-start;
130119
gap: ${euiTheme.size.s};
131120
padding-inline: ${euiTheme.size.s};
132-
padding-block: ${euiTheme.size.xs};
133-
border-radius: ${euiTheme.border.radius.small};
121+
padding-block: ${euiTheme.size.m};
134122
:hover {
135123
background-color: ${euiTheme.colors.backgroundTransparentSubdued};
136124
`}
@@ -148,41 +136,55 @@ function SearchResultListItem({ item: result }: SearchResultListItemProps) {
148136
text-align: left;
149137
`}
150138
>
151-
<EuiLink
152-
tabIndex={-1}
153-
href={result.url}
139+
<Breadcrumbs parents={result.parents} />
140+
<div
154141
css={css`
155-
.euiMark {
156-
background-color: ${euiTheme.colors
157-
.backgroundLightWarning};
158-
font-weight: inherit;
159-
}
142+
padding-block: ${euiTheme.size.xs};
143+
font-size: ${titleFontSize.fontSize};
160144
`}
161145
>
162-
<EuiHighlight
163-
search={highlightSearchTerms}
164-
highlightAll={true}
146+
<EuiLink tabIndex={-1} href={result.url}>
147+
<span>{result.title}</span>
148+
</EuiLink>
149+
</div>
150+
151+
<EuiText size="xs">
152+
<div
153+
css={css`
154+
font-family: ${euiTheme.font.family};
155+
position: relative;
156+
157+
/* 2 lines with ellipsis */
158+
display: -webkit-box;
159+
-webkit-line-clamp: 2;
160+
-webkit-box-orient: vertical;
161+
overflow: hidden;
162+
163+
width: 90%;
164+
165+
mark {
166+
background-color: transparent;
167+
font-weight: ${euiTheme.font.weight.bold};
168+
color: ${euiTheme.colors.ink};
169+
}
170+
`}
165171
>
166-
{result.title}
167-
</EuiHighlight>
168-
</EuiLink>
169-
<Breadcrumbs
170-
parents={result.parents}
171-
highlightSearchTerms={highlightSearchTerms}
172-
/>
172+
{result.highlightedBody ? (
173+
<SanitizedHtmlContent
174+
htmlContent={result.highlightedBody}
175+
/>
176+
) : (
177+
<span>{result.description}</span>
178+
)}
179+
</div>
180+
</EuiText>
173181
</div>
174182
</div>
175183
</li>
176184
)
177185
}
178186

179-
function Breadcrumbs({
180-
parents,
181-
highlightSearchTerms,
182-
}: {
183-
parents: SearchResultItem['parents']
184-
highlightSearchTerms: string[]
185-
}) {
187+
function Breadcrumbs({ parents }: { parents: SearchResultItem['parents'] }) {
186188
const { euiTheme } = useEuiTheme()
187189
const { fontSize: smallFontsize } = useEuiFontSize('xs')
188190
return (
@@ -224,16 +226,40 @@ function Breadcrumbs({
224226
}
225227
`}
226228
>
227-
<EuiHighlight
228-
search={highlightSearchTerms}
229-
highlightAll={true}
230-
>
231-
{parent.title}
232-
</EuiHighlight>
229+
{parent.title}
233230
</EuiText>
234231
</EuiLink>
235232
</li>
236233
))}
237234
</ul>
238235
)
239236
}
237+
238+
const SanitizedHtmlContent = memo(
239+
({ htmlContent }: { htmlContent: string }) => {
240+
const processed = useMemo(() => {
241+
if (!htmlContent) return ''
242+
243+
const sanitized = DOMPurify.sanitize(htmlContent, {
244+
ALLOWED_TAGS: ['mark'],
245+
ALLOWED_ATTR: [],
246+
KEEP_CONTENT: true,
247+
})
248+
249+
// Check if text starts mid-sentence (lowercase first letter)
250+
const temp = document.createElement('div')
251+
temp.innerHTML = sanitized
252+
const text = temp.textContent || ''
253+
const firstChar = text.trim()[0]
254+
255+
// Add leading ellipsis if starts with lowercase
256+
if (firstChar && /[a-z]/.test(firstChar)) {
257+
return '… ' + sanitized
258+
}
259+
260+
return sanitized
261+
}, [htmlContent])
262+
263+
return <div dangerouslySetInnerHTML={{ __html: processed }} />
264+
}
265+
)

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const SearchResultItem = z.object({
1313
description: z.string(),
1414
score: z.number(),
1515
parents: z.array(SearchResultItemParent),
16+
highlightedBody: z.string().nullish(),
1617
})
1718

1819
export type SearchResultItem = z.infer<typeof SearchResultItem>

src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/SearchOrAskAiModal.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ export const SearchOrAskAiModal = () => {
4848
css={css`
4949
flex-grow: 1;
5050
overflow-y: scroll;
51-
max-height: 80vh;
51+
max-height: 70vh;
5252
${useEuiOverflowScroll('y')}
5353
`}
5454
>

src/Elastic.Documentation.Site/package-lock.json

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Elastic.Documentation.Site/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"@tanstack/react-query": "^5.87.4",
9090
"@uidotdev/usehooks": "2.4.1",
9191
"clipboard": "2.0.11",
92+
"dompurify": "3.2.7",
9293
"highlight.js": "11.11.1",
9394
"htmx-ext-head-support": "2.0.4",
9495
"htmx-ext-preload": "2.1.1",

src/Elastic.Documentation/Search/DocumentationDocument.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ public record DocumentationDocument
4545
[JsonPropertyName("body")]
4646
public string? Body { get; set; }
4747

48+
// Stripped body is the body with markdown removed, suitable for search indexing
49+
[JsonPropertyName("stripped_body")]
50+
public string? StrippedBody { get; set; }
51+
4852
[JsonPropertyName("url_segment_count")]
4953
public int? UrlSegmentCount { get; set; }
5054

src/Elastic.Markdown/Exporters/ElasticsearchMarkdownExporter.cs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using Elastic.Ingest.Elasticsearch;
1313
using Elastic.Ingest.Elasticsearch.Catalog;
1414
using Elastic.Ingest.Elasticsearch.Semantic;
15+
using Elastic.Markdown.Helpers;
1516
using Elastic.Markdown.IO;
1617
using Elastic.Transport;
1718
using Elastic.Transport.Products.Elasticsearch;
@@ -90,13 +91,24 @@ protected static string CreateMappingSetting() =>
9091
"synonyms_filter"
9192
]
9293
},
94+
"highlight_analyzer": {
95+
"tokenizer": "standard",
96+
"filter": [
97+
"lowercase",
98+
"english_stop"
99+
]
100+
},
93101
"hierarchy_analyzer": { "tokenizer": "path_tokenizer" }
94102
},
95103
"filter": {
96104
"synonyms_filter": {
97105
"type": "synonym",
98106
"synonyms_set": "docs",
99107
"updateable": true
108+
},
109+
"english_stop": {
110+
"type": "stop",
111+
"stopwords": "_english_"
100112
}
101113
},
102114
"tokenizer": {
@@ -136,6 +148,11 @@ protected static string CreateMapping(string? inferenceId) =>
136148
},
137149
"body": {
138150
"type": "text"
151+
},
152+
"stripped_body": {
153+
"type": "text",
154+
"search_analyzer": "highlight_analyzer",
155+
"term_vector": "with_positions_offsets"
139156
}
140157
{{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}}
141158
}
@@ -277,11 +294,16 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
277294

278295
IPositionalNavigation navigation = fileContext.DocumentationSet;
279296

280-
//use LLM text if it was already provided (because we run with both llm and elasticsearch output)
281-
var body = fileContext.LLMText ??= LlmMarkdownExporter.ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext);
297+
// Remove the first h1 because we already have the title
298+
// and we don't want it to appear in the body
299+
var h1 = fileContext.Document.Descendants<HeadingBlock>().FirstOrDefault(h => h.Level == 1);
300+
if (h1 is not null)
301+
_ = fileContext.Document.Remove(h1);
302+
303+
var body = LlmMarkdownExporter.ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext);
282304

283305
var headings = fileContext.Document.Descendants<HeadingBlock>()
284-
.Select(h => h.GetData("header") as string ?? string.Empty)
306+
.Select(h => h.GetData("header") as string ?? string.Empty) // TODO: Confirm that 'header' data is correctly set for all HeadingBlock instances and that this extraction is reliable.
285307
.Where(text => !string.IsNullOrEmpty(text))
286308
.ToArray();
287309

@@ -295,6 +317,7 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
295317
Hash = ShortId.Create(url, body),
296318
Title = file.Title,
297319
Body = body,
320+
StrippedBody = body.StripMarkdown(),
298321
Description = fileContext.SourceFile.YamlFrontMatter?.Description,
299322
Abstract = @abstract,
300323
Applies = fileContext.SourceFile.YamlFrontMatter?.AppliesTo,
@@ -318,4 +341,3 @@ public async ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Canc
318341
return await _channel.RefreshAsync(ctx);
319342
}
320343
}
321-

src/Elastic.Markdown/Exporters/IMarkdownExporter.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ public record MarkdownExportFileContext
1717
public required MarkdownDocument Document { get; init; }
1818
public required MarkdownFile SourceFile { get; init; }
1919
public required IFileInfo DefaultOutputFile { get; init; }
20-
public string? LLMText { get; set; }
2120
public required DocumentationSet DocumentationSet { get; init; }
2221
}
2322

src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,26 @@ public class LlmMarkdownExporter : IMarkdownExporter
1919
{
2020
private const string LlmsTxtTemplate = """
2121
# Elastic Documentation
22-
22+
2323
> Elastic provides an open source search, analytics, and AI platform, and out-of-the-box solutions for observability and security. The Search AI platform combines the power of search and generative AI to provide near real-time search and analysis with relevance to reduce your time to value.
2424
>
2525
>Elastic offers the following solutions or types of projects:
2626
>
27-
>* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities.
27+
>* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities.
2828
>* [Elastic Observability](https://www.elastic.co/docs/solutions/observability): Gain comprehensive visibility into applications, infrastructure, and user experience through logs, metrics, traces, and other telemetry data, all in a single interface.
2929
>* [Elastic Security](https://www.elastic.co/docs/solutions/security): Combine SIEM, endpoint security, and cloud security to provide comprehensive tools for threat detection and prevention, investigation, and response.
30-
30+
3131
The documentation is organized to guide you through your journey with Elastic, from learning the basics to deploying and managing complex solutions. Here is a detailed breakdown of the documentation structure:
32-
33-
* [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation.
34-
* [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security.
35-
* [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data.
36-
* [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting.
37-
* [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks.
38-
* [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords.
39-
* [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems.
40-
* [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations.
41-
* [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release.
32+
33+
* [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation.
34+
* [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security.
35+
* [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data.
36+
* [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting.
37+
* [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks.
38+
* [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords.
39+
* [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems.
40+
* [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations.
41+
* [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release.
4242
* [**Reference**](https://www.elastic.co/docs/reference): Reference material for core tasks and manuals for optional products.
4343
""";
4444

src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ public record SearchResultItem
6262
public required string Description { get; init; }
6363
public required SearchResultItemParent[] Parents { get; init; }
6464
public float Score { get; init; }
65+
public string? HighlightedBody { get; init; }
6566
}

0 commit comments

Comments
 (0)