Skip to content

Commit 9318e2a

Browse files
authored
Remove old embedding disk cache support (#1188)
With the new embeddings model, we should never use the old caches so there's no need to migrate them over
1 parent 6271f4e commit 9318e2a

File tree

1 file changed

+5
-102
lines changed

1 file changed

+5
-102
lines changed

src/platform/workspaceChunkSearch/node/workspaceChunkAndEmbeddingCache.ts

Lines changed: 5 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -11,31 +11,14 @@ import { CancellationToken } from '../../../util/vs/base/common/cancellation';
1111
import { ResourceMap } from '../../../util/vs/base/common/map';
1212
import { Schemas } from '../../../util/vs/base/common/network';
1313
import { URI } from '../../../util/vs/base/common/uri';
14-
import { IRange, Range } from '../../../util/vs/editor/common/core/range';
14+
import { Range } from '../../../util/vs/editor/common/core/range';
1515
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
1616
import { FileChunkWithEmbedding } from '../../chunking/common/chunk';
17-
import { Embedding, EmbeddingType, EmbeddingVector, getWellKnownEmbeddingTypeInfo } from '../../embeddings/common/embeddingsComputer';
17+
import { Embedding, EmbeddingType, getWellKnownEmbeddingTypeInfo } from '../../embeddings/common/embeddingsComputer';
1818
import { IFileSystemService } from '../../filesystem/common/fileSystemService';
1919
import { ILogService } from '../../log/common/logService';
2020
import { FileRepresentation, IWorkspaceFileIndex } from './workspaceFileIndex';
2121

22-
interface PersistedCache {
23-
readonly version: string;
24-
readonly embeddingModel: string | undefined;
25-
readonly entries: Record<string, PersistedCacheEntry>;
26-
}
27-
28-
interface PersistedCacheEntry {
29-
readonly contentVersionId: string | undefined;
30-
readonly hash: string | undefined;
31-
readonly entries: readonly {
32-
readonly text: string;
33-
readonly range: IRange;
34-
readonly embedding: EmbeddingVector | /* base64*/ string;
35-
readonly chunkHash: string | undefined;
36-
}[];
37-
}
38-
3922
type CacheEntry = {
4023
readonly contentVersionId: string | undefined;
4124
readonly fileHash: string | undefined;
@@ -80,49 +63,8 @@ export async function createWorkspaceChunkAndEmbeddingCache(
8063
}
8164

8265
class OldDiskCache {
83-
private static readonly version = '1.0.0';
8466
private static cacheFileName = 'workspace-chunks.json';
8567

86-
public static decodeEmbedding(base64Str: string): EmbeddingVector {
87-
const decoded = Buffer.from(base64Str, 'base64');
88-
const float32Array = new Float32Array(decoded.buffer, decoded.byteOffset, decoded.byteLength / Float32Array.BYTES_PER_ELEMENT);
89-
return Array.from(float32Array);
90-
}
91-
92-
public static async readDiskCache(accessor: ServicesAccessor, embeddingType: EmbeddingType, cacheRoot: URI, logService: ILogService): Promise<Iterable<[string, PersistedCacheEntry]> | undefined> {
93-
const fileSystem = accessor.get(IFileSystemService);
94-
95-
const cachePath = URI.joinPath(cacheRoot, OldDiskCache.cacheFileName);
96-
try {
97-
let file: Uint8Array | undefined;
98-
try {
99-
file = await fileSystem.readFile(cachePath, true);
100-
} catch (e) {
101-
// Expected, most likely file doesn't exist
102-
return undefined;
103-
}
104-
105-
const data: PersistedCache = JSON.parse(new TextDecoder().decode(file));
106-
if (data.version !== OldDiskCache.version) {
107-
logService.debug(`WorkspaceChunkAndEmbeddingCache: invalidating cache due to version mismatch. Expected ${OldDiskCache.version} but found ${data.version}`);
108-
return undefined;
109-
}
110-
111-
// Check mismatch in embedding models
112-
// Older cached version don't store their embedding model but it's always text3small_512
113-
if (
114-
(data.embeddingModel === undefined && embeddingType !== EmbeddingType.text3small_512)
115-
|| (data.embeddingModel !== undefined && data.embeddingModel !== embeddingType.id)
116-
) {
117-
logService.debug(`WorkspaceChunkAndEmbeddingCache: invalidating cache due to embeddings type mismatch. Expected ${embeddingType} but found ${data.embeddingModel}`);
118-
return undefined;
119-
}
120-
121-
return Object.entries(data.entries);
122-
} catch {
123-
return undefined;
124-
}
125-
}
12668

12769
static async deleteDiskCache(accessor: ServicesAccessor, cacheRoot: URI) {
12870
const fileSystem = accessor.get(IFileSystemService);
@@ -221,48 +163,9 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
221163
db.exec('DELETE FROM CacheMeta;');
222164
db.prepare('INSERT INTO CacheMeta (version, embeddingModel) VALUES (?, ?)').run(this.version, embeddingType.id);
223165

224-
// Load existing disk db if it exists
225-
const diskCache = cacheRoot !== ':memory:' ?
226-
await instantiationService.invokeFunction(accessor => OldDiskCache.readDiskCache(
227-
accessor,
228-
embeddingType,
229-
cacheRoot,
230-
accessor.get(ILogService)
231-
))
232-
: undefined;
233-
if (diskCache) {
234-
try {
235-
const insertFileStatement = db.prepare('INSERT OR REPLACE INTO Files (uri, contentVersionId) VALUES (?, ?)');
236-
const insertChunkStatement = db.prepare(`INSERT INTO FileChunks (fileId, text, range_startLineNumber, range_startColumn, range_endLineNumber, range_endColumn, embedding, chunkHash) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
237-
238-
db.exec('BEGIN TRANSACTION');
239-
for (const [uri, entry] of diskCache) {
240-
const fileIdResult = insertFileStatement
241-
.run(uri.toString(), entry.contentVersionId ?? '');
242-
243-
for (const chunk of entry.entries) {
244-
insertChunkStatement.run(
245-
fileIdResult.lastInsertRowid as number,
246-
chunk.text,
247-
chunk.range.startLineNumber,
248-
chunk.range.startColumn,
249-
chunk.range.endLineNumber,
250-
chunk.range.endColumn,
251-
packEmbedding({
252-
type: embeddingType,
253-
value: typeof chunk.embedding === 'string' ? OldDiskCache.decodeEmbedding(chunk.embedding) : chunk.embedding,
254-
}),
255-
chunk.chunkHash ?? ''
256-
);
257-
}
258-
}
259-
} finally {
260-
db.exec('COMMIT');
261-
}
262-
263-
if (cacheRoot !== ':memory:') {
264-
void instantiationService.invokeFunction(accessor => OldDiskCache.deleteDiskCache(accessor, cacheRoot));
265-
}
166+
// Clean up old disk db if it exists
167+
if (cacheRoot !== ':memory:') {
168+
void instantiationService.invokeFunction(accessor => OldDiskCache.deleteDiskCache(accessor, cacheRoot));
266169
}
267170

268171
// Validate all files in the database against the workspace index and remove any that are no longer present

0 commit comments

Comments
 (0)