Skip to content

Commit fcf128f

Browse files
authored
improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs (#1170)
* improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs * cleanup * add documentName to upload chunk op as well
1 parent 56543da commit fcf128f

File tree

8 files changed

+69
-41
lines changed

8 files changed

+69
-41
lines changed

apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ export async function POST(
187187
success: true,
188188
data: {
189189
...newChunk,
190+
documentId,
191+
documentName: doc.filename,
190192
...(cost
191193
? {
192194
cost: {

apps/sim/app/api/knowledge/search/route.test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,14 @@ const mockHandleVectorOnlySearch = vi.fn()
6565
const mockHandleTagAndVectorSearch = vi.fn()
6666
const mockGetQueryStrategy = vi.fn()
6767
const mockGenerateSearchEmbedding = vi.fn()
68+
const mockGetDocumentNamesByIds = vi.fn()
6869
vi.mock('./utils', () => ({
6970
handleTagOnlySearch: mockHandleTagOnlySearch,
7071
handleVectorOnlySearch: mockHandleVectorOnlySearch,
7172
handleTagAndVectorSearch: mockHandleTagAndVectorSearch,
7273
getQueryStrategy: mockGetQueryStrategy,
7374
generateSearchEmbedding: mockGenerateSearchEmbedding,
75+
getDocumentNamesByIds: mockGetDocumentNamesByIds,
7476
APIError: class APIError extends Error {
7577
public status: number
7678
constructor(message: string, status: number) {
@@ -146,6 +148,10 @@ describe('Knowledge Search API Route', () => {
146148
singleQueryOptimized: true,
147149
})
148150
mockGenerateSearchEmbedding.mockClear().mockResolvedValue([0.1, 0.2, 0.3, 0.4, 0.5])
151+
mockGetDocumentNamesByIds.mockClear().mockResolvedValue({
152+
doc1: 'Document 1',
153+
doc2: 'Document 2',
154+
})
149155

150156
vi.stubGlobal('crypto', {
151157
randomUUID: vi.fn().mockReturnValue('mock-uuid-1234-5678'),

apps/sim/app/api/knowledge/search/route.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
99
import { calculateCost } from '@/providers/utils'
1010
import {
1111
generateSearchEmbedding,
12+
getDocumentNamesByIds,
1213
getQueryStrategy,
1314
handleTagAndVectorSearch,
1415
handleTagOnlySearch,
@@ -238,6 +239,10 @@ export async function POST(request: NextRequest) {
238239
tagDefinitionsMap[kbId] = map
239240
})
240241

242+
// Fetch document names for the results
243+
const documentIds = results.map((result) => result.documentId)
244+
const documentNameMap = await getDocumentNamesByIds(documentIds)
245+
241246
return NextResponse.json({
242247
success: true,
243248
data: {
@@ -263,12 +268,11 @@ export async function POST(request: NextRequest) {
263268
})
264269

265270
return {
266-
id: result.id,
267-
content: result.content,
268271
documentId: result.documentId,
269-
documentName: (result as any).documentName || undefined,
272+
documentName: documentNameMap[result.documentId] || undefined,
273+
content: result.content,
270274
chunkIndex: result.chunkIndex,
271-
tags, // Clean display name mapped tags
275+
metadata: tags, // Clean display name mapped tags
272276
similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches
273277
}
274278
}),

apps/sim/app/api/knowledge/search/utils.ts

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,34 @@ import { document, embedding } from '@/db/schema'
55

66
const logger = createLogger('KnowledgeSearchUtils')
77

8+
export async function getDocumentNamesByIds(
9+
documentIds: string[]
10+
): Promise<Record<string, string>> {
11+
if (documentIds.length === 0) {
12+
return {}
13+
}
14+
15+
const uniqueIds = [...new Set(documentIds)]
16+
const documents = await db
17+
.select({
18+
id: document.id,
19+
filename: document.filename,
20+
})
21+
.from(document)
22+
.where(inArray(document.id, uniqueIds))
23+
24+
const documentNameMap: Record<string, string> = {}
25+
documents.forEach((doc) => {
26+
documentNameMap[doc.id] = doc.filename
27+
})
28+
29+
return documentNameMap
30+
}
31+
832
export interface SearchResult {
933
id: string
1034
content: string
1135
documentId: string
12-
documentName: string | null
1336
chunkIndex: number
1437
tag1: string | null
1538
tag2: string | null
@@ -131,7 +154,6 @@ async function executeVectorSearchOnIds(
131154
id: embedding.id,
132155
content: embedding.content,
133156
documentId: embedding.documentId,
134-
documentName: document.filename,
135157
chunkIndex: embedding.chunkIndex,
136158
tag1: embedding.tag1,
137159
tag2: embedding.tag2,
@@ -144,7 +166,6 @@ async function executeVectorSearchOnIds(
144166
knowledgeBaseId: embedding.knowledgeBaseId,
145167
})
146168
.from(embedding)
147-
.innerJoin(document, eq(embedding.documentId, document.id))
148169
.where(
149170
and(
150171
inArray(embedding.id, embeddingIds),
@@ -176,7 +197,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
176197
id: embedding.id,
177198
content: embedding.content,
178199
documentId: embedding.documentId,
179-
documentName: document.filename,
180200
chunkIndex: embedding.chunkIndex,
181201
tag1: embedding.tag1,
182202
tag2: embedding.tag2,
@@ -189,7 +209,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
189209
knowledgeBaseId: embedding.knowledgeBaseId,
190210
})
191211
.from(embedding)
192-
.innerJoin(document, eq(embedding.documentId, document.id))
193212
.where(
194213
and(
195214
eq(embedding.knowledgeBaseId, kbId),
@@ -209,7 +228,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
209228
id: embedding.id,
210229
content: embedding.content,
211230
documentId: embedding.documentId,
212-
documentName: document.filename,
213231
chunkIndex: embedding.chunkIndex,
214232
tag1: embedding.tag1,
215233
tag2: embedding.tag2,
@@ -222,7 +240,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
222240
knowledgeBaseId: embedding.knowledgeBaseId,
223241
})
224242
.from(embedding)
225-
.innerJoin(document, eq(embedding.documentId, document.id))
226243
.where(
227244
and(
228245
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
@@ -254,7 +271,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
254271
id: embedding.id,
255272
content: embedding.content,
256273
documentId: embedding.documentId,
257-
documentName: document.filename,
258274
chunkIndex: embedding.chunkIndex,
259275
tag1: embedding.tag1,
260276
tag2: embedding.tag2,
@@ -267,7 +283,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
267283
knowledgeBaseId: embedding.knowledgeBaseId,
268284
})
269285
.from(embedding)
270-
.innerJoin(document, eq(embedding.documentId, document.id))
271286
.where(
272287
and(
273288
eq(embedding.knowledgeBaseId, kbId),
@@ -289,7 +304,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
289304
id: embedding.id,
290305
content: embedding.content,
291306
documentId: embedding.documentId,
292-
documentName: document.filename,
293307
chunkIndex: embedding.chunkIndex,
294308
tag1: embedding.tag1,
295309
tag2: embedding.tag2,
@@ -302,7 +316,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
302316
knowledgeBaseId: embedding.knowledgeBaseId,
303317
})
304318
.from(embedding)
305-
.innerJoin(document, eq(embedding.documentId, document.id))
306319
.where(
307320
and(
308321
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),

apps/sim/tools/knowledge/create_document.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,19 +160,19 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
160160
return {
161161
success: true,
162162
output: {
163+
message:
164+
uploadCount > 1
165+
? `Successfully created ${uploadCount} documents in knowledge base`
166+
: `Successfully created document in knowledge base`,
163167
data: {
164-
id: firstDocument?.documentId || firstDocument?.id || '',
165-
name: uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
168+
documentId: firstDocument?.documentId || firstDocument?.id || '',
169+
documentName:
170+
uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
166171
type: 'document',
172+
enabled: true,
167173
createdAt: new Date().toISOString(),
168174
updatedAt: new Date().toISOString(),
169-
enabled: true,
170175
},
171-
message:
172-
uploadCount > 1
173-
? `Successfully created ${uploadCount} documents in knowledge base`
174-
: `Successfully created document in knowledge base`,
175-
documentId: firstDocument?.documentId || firstDocument?.id || '',
176176
},
177177
}
178178
},
@@ -182,8 +182,8 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
182182
type: 'object',
183183
description: 'Information about the created document',
184184
properties: {
185-
id: { type: 'string', description: 'Document ID' },
186-
name: { type: 'string', description: 'Document name' },
185+
documentId: { type: 'string', description: 'Document ID' },
186+
documentName: { type: 'string', description: 'Document name' },
187187
type: { type: 'string', description: 'Document type' },
188188
enabled: { type: 'boolean', description: 'Whether the document is enabled' },
189189
createdAt: { type: 'string', description: 'Creation timestamp' },

apps/sim/tools/knowledge/search.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,12 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
110110
items: {
111111
type: 'object',
112112
properties: {
113-
id: { type: 'string' },
114-
content: { type: 'string' },
115-
documentId: { type: 'string' },
116-
documentName: { type: 'string' },
117-
chunkIndex: { type: 'number' },
118-
similarity: { type: 'number' },
119-
metadata: { type: 'object' },
113+
documentId: { type: 'string', description: 'Document ID' },
114+
documentName: { type: 'string', description: 'Document name' },
115+
content: { type: 'string', description: 'Content of the result' },
116+
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
117+
similarity: { type: 'number', description: 'Similarity score of the result' },
118+
metadata: { type: 'object', description: 'Metadata of the result, including tags' },
120119
},
121120
},
122121
},

apps/sim/tools/knowledge/types.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
export interface KnowledgeSearchResult {
2-
id: string
3-
content: string
42
documentId: string
53
documentName: string
4+
content: string
65
chunkIndex: number
76
metadata: Record<string, any>
87
similarity: number
@@ -41,7 +40,7 @@ export interface KnowledgeSearchParams {
4140
}
4241

4342
export interface KnowledgeUploadChunkResult {
44-
id: string
43+
chunkId: string
4544
chunkIndex: number
4645
content: string
4746
contentLength: number
@@ -57,6 +56,7 @@ export interface KnowledgeUploadChunkResponse {
5756
data: KnowledgeUploadChunkResult
5857
message: string
5958
documentId: string
59+
documentName: string
6060
cost?: {
6161
input: number
6262
output: number
@@ -84,8 +84,8 @@ export interface KnowledgeUploadChunkParams {
8484
}
8585

8686
export interface KnowledgeCreateDocumentResult {
87-
id: string
88-
name: string
87+
documentId: string
88+
documentName: string
8989
type: string
9090
enabled: boolean
9191
createdAt: string
@@ -97,7 +97,6 @@ export interface KnowledgeCreateDocumentResponse {
9797
output: {
9898
data: KnowledgeCreateDocumentResult
9999
message: string
100-
documentId: string
101100
}
102101
error?: string
103102
}

apps/sim/tools/knowledge/upload_chunk.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
5252
return {
5353
success: true,
5454
output: {
55+
message: `Successfully uploaded chunk to document`,
5556
data: {
56-
id: data.id,
57+
chunkId: data.id,
5758
chunkIndex: data.chunkIndex || 0,
5859
content: data.content,
5960
contentLength: data.contentLength || data.content?.length || 0,
@@ -62,8 +63,8 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
6263
createdAt: data.createdAt,
6364
updatedAt: data.updatedAt,
6465
},
65-
message: `Successfully uploaded chunk to document`,
6666
documentId: data.documentId,
67+
documentName: data.documentName,
6768
cost: data.cost,
6869
},
6970
}
@@ -74,7 +75,7 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
7475
type: 'object',
7576
description: 'Information about the uploaded chunk',
7677
properties: {
77-
id: { type: 'string', description: 'Chunk ID' },
78+
chunkId: { type: 'string', description: 'Chunk ID' },
7879
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
7980
content: { type: 'string', description: 'Content of the chunk' },
8081
contentLength: { type: 'number', description: 'Length of the content in characters' },
@@ -92,6 +93,10 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
9293
type: 'string',
9394
description: 'ID of the document the chunk was added to',
9495
},
96+
documentName: {
97+
type: 'string',
98+
description: 'Name of the document the chunk was added to',
99+
},
95100
cost: {
96101
type: 'object',
97102
description: 'Cost information for the upload operation',

0 commit comments

Comments
 (0)