Skip to content

Commit af1464f

Browse files
Feat/aws kendra vector search (#5088)
* Add AWS Kendra vector store integration * Fix import paths in Kendra vector store * Add AWS Kendra dependencies to package.json * Update pnpm-lock.yaml with AWS Kendra dependencies * Fix linter warnings in Kendra vector store * Fix code formatting in Kendra vector store * Update pnpm-lock.yaml --------- Co-authored-by: Henry Heng <[email protected]> Co-authored-by: Henry <[email protected]>
1 parent bf1ddc3 commit af1464f

File tree

4 files changed

+840
-522
lines changed

4 files changed

+840
-522
lines changed
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
import { flatten } from 'lodash'
2+
import { AmazonKendraRetriever } from '@langchain/aws'
3+
import { KendraClient, BatchPutDocumentCommand, BatchDeleteDocumentCommand } from '@aws-sdk/client-kendra'
4+
import { Document } from '@langchain/core/documents'
5+
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
6+
import { FLOWISE_CHATID, getCredentialData, getCredentialParam } from '../../../src/utils'
7+
import { howToUseFileUpload } from '../VectorStoreUtils'
8+
import { MODEL_TYPE, getRegions } from '../../../src/modelLoader'
9+
10+
class Kendra_VectorStores implements INode {
11+
label: string
12+
name: string
13+
version: number
14+
description: string
15+
type: string
16+
icon: string
17+
category: string
18+
badge: string
19+
baseClasses: string[]
20+
inputs: INodeParams[]
21+
credential: INodeParams
22+
outputs: INodeOutputsValue[]
23+
24+
constructor() {
25+
this.label = 'AWS Kendra'
26+
this.name = 'kendra'
27+
this.version = 1.0
28+
this.type = 'Kendra'
29+
this.icon = 'kendra.svg'
30+
this.category = 'Vector Stores'
31+
this.description = `Use AWS Kendra's intelligent search service for document retrieval and semantic search`
32+
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
33+
this.credential = {
34+
label: 'AWS Credential',
35+
name: 'credential',
36+
type: 'credential',
37+
credentialNames: ['awsApi'],
38+
optional: true
39+
}
40+
this.inputs = [
41+
{
42+
label: 'Document',
43+
name: 'document',
44+
type: 'Document',
45+
list: true,
46+
optional: true
47+
},
48+
{
49+
label: 'Region',
50+
name: 'region',
51+
type: 'asyncOptions',
52+
loadMethod: 'listRegions',
53+
default: 'us-east-1'
54+
},
55+
{
56+
label: 'Kendra Index ID',
57+
name: 'indexId',
58+
type: 'string',
59+
placeholder: 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx',
60+
description: 'The ID of your AWS Kendra index'
61+
},
62+
{
63+
label: 'File Upload',
64+
name: 'fileUpload',
65+
description: 'Allow file upload on the chat',
66+
hint: {
67+
label: 'How to use',
68+
value: howToUseFileUpload
69+
},
70+
type: 'boolean',
71+
additionalParams: true,
72+
optional: true
73+
},
74+
{
75+
label: 'Top K',
76+
name: 'topK',
77+
description: 'Number of top results to fetch. Default to 10',
78+
placeholder: '10',
79+
type: 'number',
80+
additionalParams: true,
81+
optional: true
82+
},
83+
{
84+
label: 'Attribute Filter',
85+
name: 'attributeFilter',
86+
description: 'Optional filter to apply when retrieving documents',
87+
type: 'json',
88+
optional: true,
89+
additionalParams: true
90+
}
91+
]
92+
// Note: Kendra doesn't support MMR search, but keeping the structure consistent
93+
this.outputs = [
94+
{
95+
label: 'Kendra Retriever',
96+
name: 'retriever',
97+
baseClasses: this.baseClasses
98+
},
99+
{
100+
label: 'Kendra Vector Store',
101+
name: 'vectorStore',
102+
baseClasses: [this.type, 'BaseRetriever']
103+
}
104+
]
105+
}
106+
107+
loadMethods = {
108+
async listRegions(): Promise<INodeOptionsValue[]> {
109+
return await getRegions(MODEL_TYPE.CHAT, 'awsChatBedrock')
110+
}
111+
}
112+
113+
//@ts-ignore
114+
vectorStoreMethods = {
115+
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
116+
const indexId = nodeData.inputs?.indexId as string
117+
const region = nodeData.inputs?.region as string
118+
const docs = nodeData.inputs?.document as Document[]
119+
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
120+
121+
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
122+
let clientConfig: any = { region }
123+
124+
if (credentialData && Object.keys(credentialData).length !== 0) {
125+
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
126+
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
127+
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
128+
129+
if (accessKeyId && secretAccessKey) {
130+
clientConfig.credentials = {
131+
accessKeyId,
132+
secretAccessKey,
133+
...(sessionToken && { sessionToken })
134+
}
135+
}
136+
}
137+
138+
const client = new KendraClient(clientConfig)
139+
140+
const flattenDocs = docs && docs.length ? flatten(docs) : []
141+
const finalDocs = []
142+
const kendraDocuments = []
143+
144+
for (let i = 0; i < flattenDocs.length; i += 1) {
145+
if (flattenDocs[i] && flattenDocs[i].pageContent) {
146+
if (isFileUploadEnabled && options.chatId) {
147+
flattenDocs[i].metadata = { ...flattenDocs[i].metadata, [FLOWISE_CHATID]: options.chatId }
148+
}
149+
finalDocs.push(new Document(flattenDocs[i]))
150+
151+
// Prepare document for Kendra
152+
const docId = `doc_${Date.now()}_${i}`
153+
const docTitle = flattenDocs[i].metadata?.title || flattenDocs[i].metadata?.source || `Document ${i + 1}`
154+
155+
kendraDocuments.push({
156+
Id: docId,
157+
Title: docTitle,
158+
Blob: new Uint8Array(Buffer.from(flattenDocs[i].pageContent, 'utf-8')),
159+
ContentType: 'PLAIN_TEXT' as any
160+
})
161+
}
162+
}
163+
164+
try {
165+
if (kendraDocuments.length > 0) {
166+
// Kendra has a limit of 10 documents per batch
167+
const batchSize = 10
168+
for (let i = 0; i < kendraDocuments.length; i += batchSize) {
169+
const batch = kendraDocuments.slice(i, i + batchSize)
170+
const command = new BatchPutDocumentCommand({
171+
IndexId: indexId,
172+
Documents: batch
173+
})
174+
175+
const response = await client.send(command)
176+
177+
if (response.FailedDocuments && response.FailedDocuments.length > 0) {
178+
console.error('Failed documents:', response.FailedDocuments)
179+
throw new Error(`Failed to index some documents: ${JSON.stringify(response.FailedDocuments)}`)
180+
}
181+
}
182+
}
183+
184+
return { numAdded: finalDocs.length, addedDocs: finalDocs }
185+
} catch (error) {
186+
throw new Error(`Failed to index documents to Kendra: ${error}`)
187+
}
188+
},
189+
190+
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
191+
const indexId = nodeData.inputs?.indexId as string
192+
const region = nodeData.inputs?.region as string
193+
194+
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
195+
let clientConfig: any = { region }
196+
197+
if (credentialData && Object.keys(credentialData).length !== 0) {
198+
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
199+
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
200+
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
201+
202+
if (accessKeyId && secretAccessKey) {
203+
clientConfig.credentials = {
204+
accessKeyId,
205+
secretAccessKey,
206+
...(sessionToken && { sessionToken })
207+
}
208+
}
209+
}
210+
211+
const client = new KendraClient(clientConfig)
212+
213+
try {
214+
// Kendra has a limit of 10 documents per batch delete
215+
const batchSize = 10
216+
for (let i = 0; i < ids.length; i += batchSize) {
217+
const batch = ids.slice(i, i + batchSize)
218+
const command = new BatchDeleteDocumentCommand({
219+
IndexId: indexId,
220+
DocumentIdList: batch
221+
})
222+
await client.send(command)
223+
}
224+
} catch (error) {
225+
throw new Error(`Failed to delete documents from Kendra: ${error}`)
226+
}
227+
}
228+
}
229+
230+
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
231+
const indexId = nodeData.inputs?.indexId as string
232+
const region = nodeData.inputs?.region as string
233+
const topK = nodeData.inputs?.topK as string
234+
const attributeFilter = nodeData.inputs?.attributeFilter
235+
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
236+
237+
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
238+
let clientOptions: any = {}
239+
240+
if (credentialData && Object.keys(credentialData).length !== 0) {
241+
clientOptions.credentials = {
242+
accessKeyId: getCredentialParam('awsKey', credentialData, nodeData),
243+
secretAccessKey: getCredentialParam('awsSecret', credentialData, nodeData),
244+
sessionToken: getCredentialParam('awsSession', credentialData, nodeData)
245+
}
246+
}
247+
248+
let filter = undefined
249+
if (attributeFilter) {
250+
filter = typeof attributeFilter === 'object' ? attributeFilter : JSON.parse(attributeFilter)
251+
}
252+
253+
// Add chat-specific filtering if file upload is enabled
254+
if (isFileUploadEnabled && options.chatId) {
255+
if (!filter) {
256+
filter = {}
257+
}
258+
filter.OrAllFilters = [
259+
...(filter.OrAllFilters || []),
260+
{
261+
EqualsTo: {
262+
Key: FLOWISE_CHATID,
263+
Value: {
264+
StringValue: options.chatId
265+
}
266+
}
267+
}
268+
]
269+
}
270+
271+
const retriever = new AmazonKendraRetriever({
272+
topK: topK ? parseInt(topK) : 10,
273+
indexId,
274+
region,
275+
attributeFilter: filter,
276+
clientOptions
277+
})
278+
279+
const output = nodeData.outputs?.output as string
280+
281+
if (output === 'retriever') {
282+
return retriever
283+
} else if (output === 'vectorStore') {
284+
// Kendra doesn't have a traditional vector store interface,
285+
// but we can return the retriever with additional properties
286+
;(retriever as any).k = topK ? parseInt(topK) : 10
287+
;(retriever as any).filter = filter
288+
return retriever
289+
}
290+
}
291+
}
292+
293+
module.exports = { nodeClass: Kendra_VectorStores }
Lines changed: 31 additions & 0 deletions
Loading

packages/components/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"@arizeai/openinference-instrumentation-langchain": "^2.0.0",
2626
"@aws-sdk/client-bedrock-runtime": "3.422.0",
2727
"@aws-sdk/client-dynamodb": "^3.360.0",
28+
"@aws-sdk/client-kendra": "^3.750.0",
2829
"@aws-sdk/client-s3": "^3.844.0",
2930
"@aws-sdk/client-secrets-manager": "^3.699.0",
3031
"@aws-sdk/client-sns": "^3.699.0",

0 commit comments

Comments
 (0)