Skip to content

Commit 8964d19

Browse files
committed
entirely local embeddings logic
1 parent 25d00b0 commit 8964d19

File tree

10 files changed

+148
-58
lines changed

10 files changed

+148
-58
lines changed

electron-src/data/database.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ export class SQLDatabase extends BaseDatabase<MesssagesDatabase> {
114114
indexMap.set(texts[i], i);
115115
}
116116
results.sort((a, b) => {
117-
const aIndex = indexMap.get(a.text);
118-
const bIndex = indexMap.get(b.text);
117+
const aIndex = indexMap.get(a.text!);
118+
const bIndex = indexMap.get(b.text!);
119119
if (aIndex === undefined || bIndex === undefined) {
120120
return 0;
121121
}

electron-src/data/embeddings-database.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import type { DB as EmbeddingsDb } from "../../_generated/embeddings-db";
22
import logger from "../utils/logger";
33
import { embeddingsDbPath } from "../utils/constants";
44
import BaseDatabase from "./base-database";
5+
import cosineSimilarity from "../semantic-search/vector-comparison";
56

67
export class EmbeddingsDatabase extends BaseDatabase<EmbeddingsDb> {
78
embeddingsCache: { text: string; embedding: Float32Array }[] = [];
@@ -14,7 +15,16 @@ export class EmbeddingsDatabase extends BaseDatabase<EmbeddingsDb> {
1415
return result[0].count as number;
1516
};
1617

17-
getAllEmbeddings = async () => {
18+
calculateSimilarity = async (embedding: Float32Array) => {
19+
const allEmbeddings = await this.getAllEmbeddings();
20+
const similarities = allEmbeddings.map((e) => {
21+
const similarity = cosineSimilarity(embedding!, e.embedding);
22+
return { similarity, text: e.text };
23+
});
24+
similarities.sort((a, b) => b.similarity - a.similarity);
25+
return similarities.slice(0, 100).map((l) => l.text!);
26+
};
27+
loadVectorsIntoMemory = async () => {
1828
if (this.embeddingsCache.length) {
1929
return this.embeddingsCache;
2030
}
@@ -32,6 +42,9 @@ export class EmbeddingsDatabase extends BaseDatabase<EmbeddingsDb> {
3242
),
3343
};
3444
});
45+
};
46+
getAllEmbeddings = async () => {
47+
await this.loadVectorsIntoMemory();
3548
return this.embeddingsCache;
3649
};
3750
getEmbeddingByText = async (text: string) => {

electron-src/esbuild.main.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const config: BuildOptions = {
66
entryPoints: [
77
path.resolve("electron-src/index.ts"),
88
path.resolve("electron-src/workers/worker.ts"),
9+
path.resolve("electron-src/workers/embeddings-worker.ts"),
910
path.resolve("electron-src/utils/preload.ts"),
1011
],
1112
bundle: true,

electron-src/semantic-search/semantic-search.ts

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ import { handleIpc } from "../ipc/ipc";
55
import logger from "../utils/logger";
66
import { BatchOpenAi, OPENAI_EMBEDDING_MODEL } from "./batch-utils";
77
import pMap from "p-map";
8-
import embeddingsDb from "../data/embeddings-database";
9-
import cosineSimilarity from "./vector-comparison";
108
import { uniqBy } from "lodash-es";
119

1210
export interface SemanticSearchMetadata {
@@ -82,9 +80,9 @@ export const createEmbeddings = async ({ openAiKey }: { openAiKey: string }) =>
8280
});
8381
const openai = new OpenAIApi(configuration);
8482

85-
await embeddingsDb.initialize();
83+
await dbWorker.embeddingsWorker.initialize();
8684

87-
const existingText = await embeddingsDb.getAllText();
85+
const existingText = await dbWorker.embeddingsWorker.getAllText();
8886
const set = new Set(existingText);
8987
numCompleted = existingText.length;
9088
const notParsed = messages.filter((m) => m.text && !set.has(m.text));
@@ -105,7 +103,7 @@ export const createEmbeddings = async ({ openAiKey }: { openAiKey: string }) =>
105103
try {
106104
logger.info(`Inserting ${itemEmbeddings.length} vectors`);
107105
const embeddings = itemEmbeddings.map((l) => ({ embedding: l.values, text: l.metadata.text }));
108-
await embeddingsDb.insertEmbeddings(embeddings);
106+
await dbWorker.embeddingsWorker.insertEmbeddings(embeddings);
109107
logger.info(`Inserted ${itemEmbeddings.length} vectors`);
110108
numCompleted += itemEmbeddings.length;
111109
} catch (e) {
@@ -134,7 +132,7 @@ interface SemanticQueryOpts {
134132
}
135133

136134
export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts) {
137-
const existingEmbedding = await embeddingsDb.getEmbeddingByText(queryText);
135+
const existingEmbedding = await dbWorker.embeddingsWorker.getEmbeddingByText(queryText);
138136
let floatEmbedding = existingEmbedding?.embedding;
139137

140138
if (!existingEmbedding) {
@@ -153,17 +151,11 @@ export async function semanticQuery({ queryText, openAiKey }: SemanticQueryOpts)
153151
return [];
154152
}
155153
// save embedding
156-
await embeddingsDb.insertEmbeddings([{ embedding, text: queryText }]);
154+
await dbWorker.embeddingsWorker.insertEmbeddings([{ embedding, text: queryText }]);
157155
floatEmbedding = new Float32Array(embedding);
158156
}
159157

160-
const allEmbeddings = await embeddingsDb.getAllEmbeddings();
161-
const similarities = allEmbeddings.map((e) => {
162-
const similarity = cosineSimilarity(floatEmbedding!, e.embedding);
163-
return { similarity, text: e.text };
164-
});
165-
similarities.sort((a, b) => b.similarity - a.similarity);
166-
return similarities.slice(0, 100).map((l) => l.text!);
158+
return dbWorker.embeddingsWorker.calculateSimilarity(floatEmbedding!);
167159
}
168160

169161
handleIpc("createEmbeddings", async ({ openAiKey: openAiKey }) => {
@@ -178,7 +170,7 @@ handleIpc("getEmbeddingsCompleted", async () => {
178170

179171
handleIpc("calculateSemanticSearchStatsEnhanced", async () => {
180172
const stats = await dbWorker.worker.calculateSemanticSearchStats();
181-
const localDb = embeddingsDb;
173+
const localDb = dbWorker.embeddingsWorker;
182174
try {
183175
await localDb.initialize();
184176
const count = await localDb.countEmbeddings();

electron-src/semantic-search/vector-comparison.ts

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,27 @@ function dot(x: ArrayLike, y: ArrayLike) {
3535
}
3636
return sum;
3737
}
38-
function cosineSimilarity(x: ArrayLike, y: ArrayLike): number {
39-
const a = dot(x, y);
40-
const b = l2norm(x);
41-
const c = l2norm(y);
42-
return a / (b * c);
43-
}
38+
// function cosineSimilarity(x: ArrayLike, y: ArrayLike): number {
39+
// const a = dot(x, y);
40+
// const b = l2norm(x);
41+
// const c = l2norm(y);
42+
// return a / (b * c);
43+
// }
44+
export default function cosineSimilarity(vectorA: Float32Array, vectorB: Float32Array) {
45+
const dimensionality = Math.min(vectorA.length, vectorB.length);
46+
let dotAB = 0;
47+
let dotA = 0;
48+
let dotB = 0;
49+
let dimension = 0;
50+
while (dimension < dimensionality) {
51+
const componentA = vectorA[dimension];
52+
const componentB = vectorB[dimension];
53+
dotAB += componentA * componentB;
54+
dotA += componentA * componentA;
55+
dotB += componentB * componentB;
56+
dimension += 1;
57+
}
4458

45-
export default cosineSimilarity;
59+
const magnitude = Math.sqrt(dotA * dotB);
60+
return magnitude === 0 ? 0 : dotAB / magnitude;
61+
}

electron-src/workers/database-worker.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,22 @@ import { copyLatestDb, localDbExists } from "../data/db-file-utils";
66
import isDev from "electron-is-dev";
77
import { join } from "path";
88
import logger from "../utils/logger";
9+
import type { EmbeddingsDatabase } from "../data/embeddings-database";
10+
import embeddingsDb from "../data/embeddings-database";
11+
912
type WorkerType<T> = {
1013
[P in keyof T]: T[P] extends (...args: infer A) => infer R ? (...args: A) => Promise<R> : never;
1114
};
1215

1316
class DbWorker {
1417
worker!: WorkerType<SQLDatabase> | SQLDatabase;
18+
embeddingsWorker!: WorkerType<EmbeddingsDatabase> | EmbeddingsDatabase;
1519

1620
startWorker = async () => {
1721
const path = isDev ? "workers/worker.js" : join("..", "..", "..", "app.asar.unpacked", "worker.js");
22+
const embeddingWorkerPath = isDev
23+
? "workers/embeddings-worker.js"
24+
: join("..", "..", "..", "app.asar.unpacked", "embeddings-worker.js");
1825

1926
this.worker = isDev
2027
? db
@@ -23,6 +30,13 @@ class DbWorker {
2330
resourceLimits: { maxOldGenerationSizeMb: 32678, maxYoungGenerationSizeMb: 32678 },
2431
}),
2532
);
33+
this.embeddingsWorker = isDev
34+
? embeddingsDb
35+
: await spawn<WorkerType<EmbeddingsDatabase>>(
36+
new Worker(embeddingWorkerPath, {
37+
resourceLimits: { maxOldGenerationSizeMb: 32678, maxYoungGenerationSizeMb: 32678 },
38+
}),
39+
);
2640
};
2741

2842
setupHandlers() {
@@ -40,6 +54,8 @@ class DbWorker {
4054
handleIpc(prop, dbElement as any);
4155
}
4256
}
57+
58+
handleIpc("loadVectorsIntoMemory", this.embeddingsWorker.loadVectorsIntoMemory);
4359
}
4460
isCopying = false;
4561
doesLocalDbCopyExist = async () => {
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import type { EmbeddingsDatabase } from "../data/embeddings-database";
2+
import { expose } from "threads/worker";
3+
import embeddingsDb from "../data/embeddings-database";
4+
5+
const exposed: Partial<Record<Partial<keyof EmbeddingsDatabase>, any>> = {};
6+
for (const property in embeddingsDb) {
7+
const prop = property as keyof EmbeddingsDatabase;
8+
const dbElement = embeddingsDb[prop];
9+
if (typeof dbElement === "function") {
10+
exposed[prop] = dbElement;
11+
}
12+
}
13+
14+
expose(exposed);

package.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"author": "JonLuca DeCaro <mimessage@jonlu.ca>",
44
"main": "build/electron-src/index.js",
55
"name": "MiMessage",
6-
"version": "1.0.10",
6+
"version": "1.0.11",
77
"productName": "Mimessage",
88
"description": "Apple Messages UI alternative, with export, search, and more.",
99
"scripts": {
@@ -155,11 +155,16 @@
155155
{
156156
"from": "build/electron-src/workers/worker.js",
157157
"to": "app.asar.unpacked/worker.js"
158+
},
159+
{
160+
"from": "build/electron-src/workers/embeddings-worker.js",
161+
"to": "app.asar.unpacked/embeddings-worker.js"
158162
}
159163
],
160164
"mac": {
161165
"binaries": [
162-
"build/electron-src/workers/worker.js"
166+
"build/electron-src/workers/worker.js",
167+
"build/electron-src/workers/embeddings-worker.js"
163168
],
164169
"target": {
165170
"target": "default",

src/components/global-search/GlobalSearch.tsx

Lines changed: 58 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import {
88
useGlobalSearch,
99
useGroupChatList,
1010
useHandleMap,
11+
useLoadSemanticResultsIntoMemory,
1112
} from "../../hooks/dataHooks";
12-
import { LinearProgress } from "@mui/material";
13+
import { CircularProgress, LinearProgress } from "@mui/material";
1314
import { Button, Checkbox, FormControlLabel, FormGroup } from "@mui/material";
1415

1516
import { Virtuoso } from "react-virtuoso";
@@ -29,6 +30,7 @@ import { DayPicker } from "react-day-picker";
2930
import Popover from "@mui/material/Popover";
3031
import { shallow } from "zustand/shallow";
3132
import { SemanticSearchInfo } from "../chat/OpenAiKey";
33+
import Backdrop from "@mui/material/Backdrop";
3234

3335
const GloablSearchInput = () => {
3436
const globalSearch = useMimessage((state) => state.globalSearch);
@@ -190,28 +192,69 @@ const GroupChatFilter = () => {
190192
);
191193
};
192194

195+
const ToggleSemanticSearch = () => {
196+
const { mutateAsync, isLoading } = useLoadSemanticResultsIntoMemory();
197+
const { openAiKey, setUseSemanticSearch, useSemanticSearch } = useMimessage(
198+
(state) => ({
199+
useSemanticSearch: state.useSemanticSearch,
200+
setUseSemanticSearch: state.setUseSemanticSearch,
201+
openAiKey: state.openAiKey,
202+
}),
203+
shallow,
204+
);
205+
return (
206+
<>
207+
{isLoading && (
208+
<Backdrop open>
209+
<Box
210+
onClick={(e) => e.stopPropagation()}
211+
sx={{ background: "#2c2c2c", maxWidth: 600, p: 2, m: 2 }}
212+
display={"flex"}
213+
flexDirection={"column"}
214+
>
215+
<Typography variant="h1" sx={{ color: "white" }}>
216+
Loading Vectors into Memory
217+
</Typography>
218+
<Typography variant="h6" sx={{ color: "white" }}>
219+
This takes ~2s per 100k messages
220+
</Typography>
221+
<CircularProgress />
222+
</Box>
223+
</Backdrop>
224+
)}
225+
<FormGroup>
226+
<FormControlLabel
227+
control={
228+
<Checkbox
229+
style={{
230+
color: "white",
231+
}}
232+
checked={useSemanticSearch}
233+
onChange={() => {
234+
setUseSemanticSearch(!useSemanticSearch);
235+
mutateAsync();
236+
}}
237+
disabled={!openAiKey}
238+
title={openAiKey ? "" : "OpenAI Key Required"}
239+
/>
240+
}
241+
label="Use Semantic Search"
242+
/>
243+
</FormGroup>
244+
</>
245+
);
246+
};
247+
193248
const GlobalSearchFilter = () => {
194249
const { data: results } = useGlobalSearch();
195250
const count = results?.length || 0;
196-
const {
197-
openAiKey,
198-
setUseSemanticSearch,
199-
useSemanticSearch,
200-
startDate,
201-
setStartDate,
202-
setEndDate,
203-
endDate,
204-
globalSearch,
205-
} = useMimessage(
251+
const { startDate, setStartDate, setEndDate, endDate, globalSearch } = useMimessage(
206252
(state) => ({
207253
startDate: state.startDate,
208254
endDate: state.endDate,
209255
setStartDate: state.setStartDate,
210256
setEndDate: state.setEndDate,
211257
globalSearch: state.globalSearch,
212-
useSemanticSearch: state.useSemanticSearch,
213-
setUseSemanticSearch: state.setUseSemanticSearch,
214-
openAiKey: state.openAiKey,
215258
}),
216259
shallow,
217260
);
@@ -237,22 +280,7 @@ const GlobalSearchFilter = () => {
237280
<GroupChatFilter />
238281
<DateFilter selection={startDate} setSelection={setStartDate} text={"Start Date"} />
239282
<DateFilter selection={endDate} setSelection={setEndDate} text={"End Date"} />
240-
<FormGroup>
241-
<FormControlLabel
242-
control={
243-
<Checkbox
244-
style={{
245-
color: "white",
246-
}}
247-
checked={useSemanticSearch}
248-
onChange={() => setUseSemanticSearch(!useSemanticSearch)}
249-
disabled={!openAiKey}
250-
title={openAiKey ? "" : "OpenAI Key Required"}
251-
/>
252-
}
253-
label="Use Semantic Search"
254-
/>
255-
</FormGroup>
283+
<ToggleSemanticSearch />
256284
</Box>
257285
);
258286
};

src/hooks/dataHooks.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,6 @@ export const useGlobalSearch = () => {
396396
}),
397397
shallow,
398398
);
399-
400399
const handleMap = useContactToHandleMap();
401400
return useQuery<GlobalSearchResponse>(
402401
[
@@ -612,6 +611,12 @@ export const useOpenFileAtLocation = () => {
612611
});
613612
};
614613

614+
export const useLoadSemanticResultsIntoMemory = () => {
615+
return useMutation(["loadVectorsIntoMemory"], async () => {
616+
await ipcRenderer.invoke("loadVectorsIntoMemory");
617+
});
618+
};
619+
615620
export const useAccessibilityPermissionsCheck = () => {
616621
return useQuery<boolean | null>(
617622
["accessibility-permissions"],

0 commit comments

Comments
 (0)