Skip to content

Commit 5539000

Browse files
[Security Assistant] Migrate semantic_text to use highlighter instead of inner_hits (#204962)
## Summary Switch to use elastic/elasticsearch#118064 when retrieving Knowledge base Index entry docs Followed testing instructions from #198020 Results: <img width="1498" alt="Zrzut ekranu 2024-12-19 o 16 32 28" src="https://github.com/user-attachments/assets/a16bf729-ac30-4ea7-9b11-6e9ecca842dc" /> <img width="1495" alt="Zrzut ekranu 2024-12-19 o 16 32 38" src="https://github.com/user-attachments/assets/016c08c3-9865-4461-86a5-638e9559b202" /> <img width="1502" alt="Zrzut ekranu 2024-12-19 o 16 32 43" src="https://github.com/user-attachments/assets/37a14a2d-191d-420c-940d-1de649e082fd" /> <img width="1491" alt="Zrzut ekranu 2024-12-19 o 16 32 47" src="https://github.com/user-attachments/assets/e2be1e95-6fc8-4149-b1ff-2e8b8a9a0a8d" /> <img width="1494" alt="Zrzut ekranu 2024-12-19 o 16 32 50" src="https://github.com/user-attachments/assets/38b17f44-e349-46ab-8069-80d1a3fd42ae" />
1 parent 23c958c commit 5539000

File tree

3 files changed

+19
-45
lines changed

3 files changed

+19
-45
lines changed

x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.test.tsx

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ describe('getStructuredToolForIndexEntry', () => {
159159
indexEntry: mockIndexEntry,
160160
esClient: mockEsClient,
161161
logger: mockLogger,
162-
elserId: 'elser123',
163162
});
164163

165164
expect(tool).toBeInstanceOf(DynamicStructuredTool);
@@ -181,15 +180,8 @@ describe('getStructuredToolForIndexEntry', () => {
181180
field1: 'value1',
182181
field2: 2,
183182
},
184-
inner_hits: {
185-
'test.test': {
186-
hits: {
187-
hits: [
188-
{ _source: { text: 'Inner text 1' } },
189-
{ _source: { text: 'Inner text 2' } },
190-
],
191-
},
192-
},
183+
highlight: {
184+
test: ['Inner text 1', 'Inner text 2'],
193185
},
194186
},
195187
],
@@ -202,7 +194,6 @@ describe('getStructuredToolForIndexEntry', () => {
202194
indexEntry: mockIndexEntry,
203195
esClient: mockEsClient,
204196
logger: mockLogger,
205-
elserId: 'elser123',
206197
});
207198

208199
const input = { query: 'testQuery', field1: 'value1', field2: 2 };
@@ -220,7 +211,6 @@ describe('getStructuredToolForIndexEntry', () => {
220211
indexEntry: mockIndexEntry,
221212
esClient: mockEsClient,
222213
logger: mockLogger,
223-
elserId: 'elser123',
224214
});
225215

226216
const input = { query: 'testQuery', field1: 'value1', field2: 2 };

x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
*/
77

88
import { z } from '@kbn/zod';
9-
import { get } from 'lodash';
109
import { DynamicStructuredTool } from '@langchain/core/tools';
1110
import { errors } from '@elastic/elasticsearch';
1211
import { QueryDslQueryContainer, SearchRequest } from '@elastic/elasticsearch/lib/api/types';
@@ -140,12 +139,10 @@ export const getStructuredToolForIndexEntry = ({
140139
indexEntry,
141140
esClient,
142141
logger,
143-
elserId,
144142
}: {
145143
indexEntry: IndexEntry;
146144
esClient: ElasticsearchClient;
147145
logger: Logger;
148-
elserId: string;
149146
}): DynamicStructuredTool => {
150147
const inputSchema = indexEntry.inputSchema?.reduce((prev, input) => {
151148
const fieldType =
@@ -182,28 +179,27 @@ export const getStructuredToolForIndexEntry = ({
182179
const params: SearchRequest = {
183180
index: indexEntry.index,
184181
size: 10,
185-
retriever: {
186-
standard: {
187-
query: {
188-
nested: {
189-
path: `${indexEntry.field}.inference.chunks`,
190-
query: {
191-
sparse_vector: {
192-
inference_id: elserId,
193-
field: `${indexEntry.field}.inference.chunks.embeddings`,
194-
query: input.query,
195-
},
196-
},
197-
inner_hits: {
198-
size: 2,
199-
name: `${indexEntry.name}.${indexEntry.field}`,
200-
_source: [`${indexEntry.field}.inference.chunks.text`],
182+
query: {
183+
bool: {
184+
must: [
185+
{
186+
semantic: {
187+
field: indexEntry.field,
188+
query: input.query,
201189
},
202190
},
203-
},
191+
],
204192
filter,
205193
},
206194
},
195+
highlight: {
196+
fields: {
197+
[indexEntry.field]: {
198+
type: 'semantic',
199+
number_of_fragments: 2,
200+
},
201+
},
202+
},
207203
};
208204

209205
try {
@@ -217,18 +213,8 @@ export const getStructuredToolForIndexEntry = ({
217213
}, {});
218214
}
219215

220-
// We want to send relevant inner hits (chunks) to the LLM as a context
221-
const innerHitPath = `${indexEntry.name}.${indexEntry.field}`;
222-
if (hit.inner_hits?.[innerHitPath]) {
223-
return {
224-
text: hit.inner_hits[innerHitPath].hits.hits
225-
.map((innerHit) => innerHit._source.text)
226-
.join('\n --- \n'),
227-
};
228-
}
229-
230216
return {
231-
text: get(hit._source, `${indexEntry.field}.inference.chunks[0].text`),
217+
text: hit.highlight?.[indexEntry.field].join('\n --- \n'),
232218
};
233219
});
234220

x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
652652
}
653653

654654
try {
655-
const elserId = ASSISTANT_ELSER_INFERENCE_ID;
656655
const userFilter = getKBUserFilter(user);
657656
const results = await this.findDocuments<EsIndexEntry>({
658657
// Note: This is a magic number to set some upward bound as to not blow the context with too
@@ -682,7 +681,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
682681
indexEntry,
683682
esClient,
684683
logger: this.options.logger,
685-
elserId,
686684
});
687685
})
688686
);

0 commit comments

Comments
 (0)