Skip to content

Commit 3760e03

Browse files
committed
fixed a document/chunk limit
1 parent 74a4bb1 commit 3760e03

File tree

6 files changed

+338
-6
lines changed

6 files changed

+338
-6
lines changed

dist/vectordb/index.d.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,30 @@ export declare class VectorStore {
149149
* Record FTS success (resets circuit breaker)
150150
*/
151151
private recordFtsSuccess;
152+
/**
153+
* Extract unsupported custom metadata field from LanceDB schema mismatch errors.
154+
*
155+
* Returns:
156+
* - specific key (e.g., "character") for metadata.custom.character mismatch
157+
* - CUSTOM_METADATA_ALL_FIELDS when metadata.custom itself is unsupported
158+
* - null when error is unrelated
159+
*/
160+
private extractUnsupportedCustomMetadataField;
161+
/**
162+
* Remove unsupported custom metadata field from chunks for schema compatibility.
163+
*
164+
* @param chunks - Source chunks
165+
* @param field - Unsupported field name, or CUSTOM_METADATA_ALL_FIELDS to drop all custom metadata
166+
* @returns Sanitized chunks and whether any changes were applied
167+
*/
168+
private stripUnsupportedCustomMetadata;
169+
/**
170+
* Add chunks to existing table with automatic fallback for custom metadata schema mismatches.
171+
*
172+
* LanceDB infers struct fields from early inserts, so later custom metadata keys may fail.
173+
* This method retries by stripping only unsupported custom fields when needed.
174+
*/
175+
private addChunksWithSchemaFallback;
152176
/**
153177
* Initialize LanceDB and create table
154178
*/

dist/vectordb/index.d.ts.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/vectordb/index.js

Lines changed: 110 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/vectordb/index.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/vectordb/__tests__/vectordb.test.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,67 @@ describe('VectorStore', () => {
5656
return vector.map((x) => x / norm)
5757
}
5858

59+
describe('Custom metadata schema compatibility', () => {
60+
it('should keep ingesting when later chunks introduce new custom metadata keys', async () => {
61+
const metadataSchemaDbPath = makeTestDbPath('test-vectordb-custom-metadata-schema')
62+
if (fs.existsSync(metadataSchemaDbPath)) {
63+
fs.rmSync(metadataSchemaDbPath, { recursive: true })
64+
}
65+
66+
try {
67+
const store = new VectorStore({
68+
dbPath: metadataSchemaDbPath,
69+
tableName: 'chunks',
70+
})
71+
await store.initialize()
72+
73+
const firstChunk = createTestChunk(
74+
'Timeline chunk',
75+
'/test/timeline.md',
76+
0,
77+
createNormalizedVector(1)
78+
)
79+
firstChunk.metadata.custom = {
80+
domain: 'series-canon',
81+
type: 'timeline',
82+
}
83+
await store.insertChunks([firstChunk])
84+
85+
const secondChunk = createTestChunk(
86+
'Character arc chunk',
87+
'/test/character.md',
88+
0,
89+
createNormalizedVector(2)
90+
)
91+
secondChunk.metadata.custom = {
92+
domain: 'series-canon',
93+
type: 'character-arc',
94+
character: 'ellie',
95+
}
96+
97+
// Regression check:
98+
// Previously this failed with:
99+
// "Found field not in schema: metadata.custom.character at row 0"
100+
await expect(store.insertChunks([secondChunk])).resolves.toBeUndefined()
101+
102+
const files = await store.listFiles()
103+
expect(files.map((f) => f.filePath).sort()).toEqual(
104+
['/test/character.md', '/test/timeline.md'].sort()
105+
)
106+
107+
const results = await store.search(createNormalizedVector(2), 'character arc', 10)
108+
const characterResult = results.find((r) => r.filePath === '/test/character.md')
109+
expect(characterResult).toBeDefined()
110+
expect(characterResult?.metadata.custom?.['domain']).toBe('series-canon')
111+
expect(characterResult?.metadata.custom?.['type']).toBe('character-arc')
112+
} finally {
113+
if (fs.existsSync(metadataSchemaDbPath)) {
114+
fs.rmSync(metadataSchemaDbPath, { recursive: true })
115+
}
116+
}
117+
})
118+
})
119+
59120
describe('FTS Index Creation and Migration', () => {
60121
it('should not write VectorStore operational logs to stdout', async () => {
61122
const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {})

0 commit comments

Comments
 (0)