Skip to content

Commit 0ec91f9

Browse files
fix(min-chunk): remove minsize for chunk (#911)
* fix(min-chunk): remove minsize for chunk * fix tests
1 parent db581dc commit 0ec91f9

File tree

12 files changed

+21
-18
lines changed

12 files changed

+21
-18
lines changed

apps/sim/app/api/knowledge/[id]/documents/route.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ describe('Knowledge Base Documents API Route', () => {
376376
],
377377
processingOptions: {
378378
chunkSize: 50, // Invalid: too small
379-
minCharactersPerChunk: 10, // Invalid: too small
379+
minCharactersPerChunk: 0, // Invalid: too small
380380
recipe: 'default',
381381
lang: 'en',
382382
chunkOverlap: 1000, // Invalid: too large

apps/sim/app/api/knowledge/[id]/documents/route.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ const BulkCreateDocumentsSchema = z.object({
295295
documents: z.array(CreateDocumentSchema),
296296
processingOptions: z.object({
297297
chunkSize: z.number().min(100).max(4000),
298-
minCharactersPerChunk: z.number().min(50).max(2000),
298+
minCharactersPerChunk: z.number().min(1).max(2000),
299299
recipe: z.string(),
300300
lang: z.string(),
301301
chunkOverlap: z.number().min(0).max(500),

apps/sim/app/api/knowledge/route.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ describe('Knowledge Base API Route', () => {
168168
expect(data.data.embeddingDimension).toBe(1536)
169169
expect(data.data.chunkingConfig).toEqual({
170170
maxSize: 1024,
171-
minSize: 100,
171+
minSize: 1,
172172
overlap: 200,
173173
})
174174
})

apps/sim/app/api/knowledge/route.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ const CreateKnowledgeBaseSchema = z.object({
1818
chunkingConfig: z
1919
.object({
2020
maxSize: z.number().min(100).max(4000).default(1024),
21-
minSize: z.number().min(50).max(2000).default(100),
21+
minSize: z.number().min(1).max(2000).default(1),
2222
overlap: z.number().min(0).max(500).default(200),
2323
})
2424
.default({
2525
maxSize: 1024,
26-
minSize: 100,
26+
minSize: 1,
2727
overlap: 200,
2828
})
2929
.refine((data) => data.minSize < data.maxSize, {

apps/sim/app/api/knowledge/utils.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ export async function processDocumentAsync(
531531
docData.filename,
532532
docData.mimeType,
533533
processingOptions.chunkSize || 1000,
534-
processingOptions.chunkOverlap || 200
534+
processingOptions.chunkOverlap || 200,
535+
processingOptions.minCharactersPerChunk || 1
535536
)
536537

537538
const now = new Date()

apps/sim/app/workspace/[workspaceId]/knowledge/[id]/components/upload-modal/upload-modal.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ export function UploadModal({
142142
try {
143143
await uploadFiles(files, knowledgeBaseId, {
144144
chunkSize: chunkingConfig?.maxSize || 1024,
145-
minCharactersPerChunk: chunkingConfig?.minSize || 100,
145+
minCharactersPerChunk: chunkingConfig?.minSize || 1,
146146
chunkOverlap: chunkingConfig?.overlap || 200,
147147
recipe: 'default',
148148
})

apps/sim/app/workspace/[workspaceId]/knowledge/components/create-modal/create-modal.tsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ const FormSchema = z
5050
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
5151
minChunkSize: z
5252
.number()
53-
.min(50, 'Min chunk size must be at least 50')
53+
.min(1, 'Min chunk size must be at least 1')
5454
.max(2000, 'Min chunk size must be less than 2000'),
5555
maxChunkSize: z
5656
.number()
@@ -115,7 +115,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
115115
defaultValues: {
116116
name: '',
117117
description: '',
118-
minChunkSize: 100,
118+
minChunkSize: 1,
119119
maxChunkSize: 1024,
120120
overlapSize: 200,
121121
},
@@ -299,7 +299,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
299299
reset({
300300
name: '',
301301
description: '',
302-
minChunkSize: 100,
302+
minChunkSize: 1,
303303
maxChunkSize: 1024,
304304
overlapSize: 200,
305305
})
@@ -423,7 +423,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
423423
<Input
424424
id='minChunkSize'
425425
type='number'
426-
placeholder='100'
426+
placeholder='1'
427427
{...register('minChunkSize', { valueAsNumber: true })}
428428
className={errors.minChunkSize ? 'border-red-500' : ''}
429429
autoComplete='off'

apps/sim/app/workspace/[workspaceId]/knowledge/hooks/use-knowledge-upload.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
278278
})),
279279
processingOptions: {
280280
chunkSize: processingOptions.chunkSize || 1024,
281-
minCharactersPerChunk: processingOptions.minCharactersPerChunk || 100,
281+
minCharactersPerChunk: processingOptions.minCharactersPerChunk || 1,
282282
chunkOverlap: processingOptions.chunkOverlap || 200,
283283
recipe: processingOptions.recipe || 'default',
284284
lang: 'en',

apps/sim/lib/documents/chunker.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ export class TextChunker {
5757

5858
constructor(options: ChunkerOptions = {}) {
5959
this.chunkSize = options.chunkSize ?? 512
60-
this.minChunkSize = options.minChunkSize ?? 50
60+
this.minChunkSize = options.minChunkSize ?? 1
6161
this.overlap = options.overlap ?? 0
6262
}
6363

apps/sim/lib/documents/docs-chunker.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export class DocsChunker {
2525
// Use the existing TextChunker for chunking logic
2626
this.textChunker = new TextChunker({
2727
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
28-
minChunkSize: options.minChunkSize ?? 100,
28+
minChunkSize: options.minChunkSize ?? 1,
2929
overlap: options.overlap ?? 50,
3030
})
3131
// Use localhost docs in development, production docs otherwise

0 commit comments

Comments
 (0)