diff --git a/configs/config.yaml-template b/configs/config.yaml-template index e4394c0..1ac1c9d 100644 --- a/configs/config.yaml-template +++ b/configs/config.yaml-template @@ -51,6 +51,13 @@ plugins: file-system: python: long-term-memory: + base_url: https://api.openai.com/v1 + api_key: sk-proj-your-openai-api-key + vector_model: text-embedding-3-small + dimensions: 100 + max_query_results: 3 + persist_db: true + db_file: configs/long-term-memory.db discord: bot_token: your-discord-bot-token allowed_channel_ids: diff --git a/package.json b/package.json index 03a9d37..fa1b497 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "openai": "^4.78.1", "playwright": "^1.51.1", "python-shell": "^5.0.0", + "sqlite-vec": "0.1.7-alpha.2", "winston": "^3.17.0", "winston-transport": "^4.9.0", "ws": "^8.18.0", @@ -53,7 +54,7 @@ "@types/html-to-text": "^9.0.4", "@types/jsdom": "^21.1.7", "@types/mime-types": "^2.1.4", - "@types/node": "^22.10.5", + "@types/node": "^22.14.1", "@types/node-telegram-bot-api": "^0.64.7", "@types/ws": "^8.5.14", "tsx": "^4.19.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c3a2c58..dc83f2c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,6 +50,9 @@ importers: python-shell: specifier: ^5.0.0 version: 5.0.0 + sqlite-vec: + specifier: 0.1.7-alpha.2 + version: 0.1.7-alpha.2 winston: specifier: ^3.17.0 version: 3.17.0 @@ -82,8 +85,8 @@ importers: specifier: ^2.1.4 version: 2.1.4 '@types/node': - specifier: ^22.10.5 - version: 22.10.5 + specifier: ^22.14.1 + version: 22.14.1 '@types/node-telegram-bot-api': specifier: ^0.64.7 version: 0.64.7 @@ -591,8 +594,8 @@ packages: '@types/node@18.19.70': resolution: {integrity: sha512-RE+K0+KZoEpDUbGGctnGdkrLFwi1eYKTlIHNl2Um98mUkGsm1u2Ff6Ltd0e8DktTtC98uy7rSj+hO8t/QuLoVQ==} - '@types/node@22.10.5': - resolution: {integrity: sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==} + '@types/node@22.14.1': + resolution: {integrity: sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==} '@types/phoenix@1.6.6': resolution: {integrity: sha512-PIzZZlEppgrpoT2QgbnDU+MMzuR6BbCjllj0bM70lWoejMeNJAxCchxnv7J3XFkI8MpygtRpzXrIlmWUBclP5A==} @@ -1798,6 +1801,34 @@ packages: resolution: {integrity: sha512-l3BikUxvPOcn5E74dZiq5BGsTb5yEwhaTSzccU6t4sDOH8NWJCstKO5QT2CvtFoK6F0saL7p9xHAqHOlCPJygA==} engines: {node: '>= 8'} + sqlite-vec-darwin-arm64@0.1.7-alpha.2: + resolution: {integrity: sha512-raIATOqFYkeCHhb/t3r7W7Cf2lVYdf4J3ogJ6GFc8PQEgHCPEsi+bYnm2JT84MzLfTlSTIdxr4/NKv+zF7oLPw==} + cpu: [arm64] + os: [darwin] + + sqlite-vec-darwin-x64@0.1.7-alpha.2: + resolution: {integrity: sha512-jeZEELsQjjRsVojsvU5iKxOvkaVuE+JYC8Y4Ma8U45aAERrDYmqZoHvgSG7cg1PXL3bMlumFTAmHynf1y4pOzA==} + cpu: [x64] + os: [darwin] + + sqlite-vec-linux-arm64@0.1.7-alpha.2: + resolution: {integrity: sha512-6Spj4Nfi7tG13jsUG+W7jnT0bCTWbyPImu2M8nWp20fNrd1SZ4g3CSlDAK8GBdavX7wRlbBHCZ+BDa++rbDewA==} + cpu: [arm64] + os: [linux] + + sqlite-vec-linux-x64@0.1.7-alpha.2: + resolution: {integrity: sha512-IcgrbHaDccTVhXDf8Orwdc2+hgDLAFORl6OBUhcvlmwswwBP1hqBTSEhovClG4NItwTOBNgpwOoQ7Qp3VDPWLg==} + cpu: [x64] + os: [linux] + + sqlite-vec-windows-x64@0.1.7-alpha.2: + resolution: {integrity: sha512-TRP6hTjAcwvQ6xpCZvjP00pdlda8J38ArFy1lMYhtQWXiIBmWnhMaMbq4kaeCYwvTTddfidatRS+TJrwIKB/oQ==} + cpu: [x64] + os: [win32] + + sqlite-vec@0.1.7-alpha.2: + resolution: {integrity: sha512-rNgRCv+4V4Ed3yc33Qr+nNmjhtrMnnHzXfLVPeGb28Dx5mmDL3Ngw/Wk8vhCGjj76+oC6gnkmMG8y73BZWGBwQ==} + sshpk@1.18.0: resolution: {integrity: sha512-2p2KJZTSqQ/I3+HX42EpYOa2l3f8Erv8MWKsy2I9uf4wA7yFIkXRffYdsx86y6z4vHtV8u7g+pPlr8/4ouAxsQ==} engines: {node: '>=0.10.0'} @@ -1947,8 +1978,8 @@ packages: undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} - undici-types@6.20.0: - resolution: {integrity: sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==} + undici-types@6.21.0: + resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} undici@6.19.8: resolution: {integrity: sha512-U8uCCl2x9TK3WANvmBavymRzxbfFYG+tAu+fgx3zxQy3qdagQqBLwJVrdyO1TBfUXvfKveMKJZhpvUYoOjM+4g==} @@ -2457,7 +2488,7 @@ snapshots: '@types/follow-redirects@1.14.4': dependencies: - '@types/node': 22.10.5 + '@types/node': 22.14.1 '@types/html-to-text@9.0.4': {} @@ -2465,7 +2496,7 @@ snapshots: '@types/jsdom@21.1.7': dependencies: - '@types/node': 22.10.5 + '@types/node': 22.14.1 '@types/tough-cookie': 4.0.5 parse5: 7.2.1 @@ -2473,28 +2504,28 @@ snapshots: '@types/node-fetch@2.6.12': dependencies: - '@types/node': 22.10.5 + '@types/node': 22.14.1 form-data: 4.0.1 '@types/node-telegram-bot-api@0.64.7': dependencies: - '@types/node': 22.10.5 + '@types/node': 22.14.1 '@types/request': 2.48.12 '@types/node@18.19.70': dependencies: undici-types: 5.26.5 - '@types/node@22.10.5': + '@types/node@22.14.1': dependencies: - undici-types: 6.20.0 + undici-types: 6.21.0 '@types/phoenix@1.6.6': {} '@types/request@2.48.12': dependencies: '@types/caseless': 0.12.5 - '@types/node': 22.10.5 + '@types/node': 22.14.1 '@types/tough-cookie': 4.0.5 form-data: 2.5.2 @@ -2504,7 +2535,7 @@ snapshots: '@types/ws@8.5.14': dependencies: - '@types/node': 22.10.5 + '@types/node': 22.14.1 '@vladfrangu/async_event_emitter@2.4.6': {} @@ -3897,6 +3928,29 @@ snapshots: source-map@0.7.4: {} + sqlite-vec-darwin-arm64@0.1.7-alpha.2: + optional: true + + sqlite-vec-darwin-x64@0.1.7-alpha.2: + optional: true + + sqlite-vec-linux-arm64@0.1.7-alpha.2: + optional: true + + sqlite-vec-linux-x64@0.1.7-alpha.2: + optional: true + + sqlite-vec-windows-x64@0.1.7-alpha.2: + optional: true + + sqlite-vec@0.1.7-alpha.2: + optionalDependencies: + sqlite-vec-darwin-arm64: 0.1.7-alpha.2 + sqlite-vec-darwin-x64: 0.1.7-alpha.2 + sqlite-vec-linux-arm64: 0.1.7-alpha.2 + sqlite-vec-linux-x64: 0.1.7-alpha.2 + sqlite-vec-windows-x64: 0.1.7-alpha.2 + sshpk@1.18.0: dependencies: asn1: 0.2.6 @@ -4083,7 +4137,7 @@ snapshots: undici-types@5.26.5: {} - undici-types@6.20.0: {} + undici-types@6.21.0: {} undici@6.19.8: {} diff --git a/src/plugins/long-term-memory/init.ts b/src/plugins/long-term-memory/init.ts index b15ae4d..0d85196 100644 --- a/src/plugins/long-term-memory/init.ts +++ b/src/plugins/long-term-memory/init.ts @@ -1,6 +1,9 @@ import { Athena, Dict } from "../../core/athena.js"; import { PluginBase } from "../plugin-base.js"; - +import { load } from "sqlite-vec"; +import { DatabaseSync } from "node:sqlite"; +import OpenAI from "openai"; +import { openaiDefaultHeaders } from "../../utils/constants.js"; interface ILongTermMemoryItem { desc: string; data: Dict; @@ -9,21 +12,42 @@ interface ILongTermMemoryItem { export default class LongTermMemory extends PluginBase { store: Dict = {}; - + openai!: OpenAI; + db!: DatabaseSync; desc() { return "You have a long-term memory. You must put whatever you think a human would remember long-term in here. This could be knowledge, experiences, or anything else you think is important. It's a key-value store. The key is a string, and the value is a JSON object. You will override the value if you store the same key again. If you want to recall something, you should list and/or retrieve it."; } async load(athena: Athena) { + this.db = new DatabaseSync(this.config.persist_db ? this.config.db_file : ':memory:', { + allowExtension: true + }); + load(this.db); + + // TODO: Support migration for varying dimensions + this.db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS vec_items USING + vec0( + embedding float[${this.config.dimensions}], + desc text, + data text + ) + `); + + const insertStmt = this.db.prepare( + 'INSERT INTO vec_items(embedding, desc, data) VALUES (?, ?, ?)' + ); + + this.openai = new OpenAI({ + baseURL: this.config.base_url, + apiKey: this.config.api_key, + defaultHeaders: openaiDefaultHeaders, + }); + athena.registerTool({ name: "ltm/store", desc: "Store some data to your long-term memory.", args: { - key: { - type: "string", - desc: "The key to store the data.", - required: true, - }, desc: { type: "string", desc: "A description of the data.", @@ -43,36 +67,19 @@ export default class LongTermMemory extends PluginBase { }, }, fn: async (args: Dict) => { - this.store[args.key] = { - desc: args.desc, - data: args.data, - created_at: new Date().toString(), - }; - return { status: "success" }; - }, - }); - athena.registerTool({ - name: "ltm/remove", - desc: "Remove data from your long-term memory.", - args: { - key: { - type: "string", - desc: "The key to remove the data.", - required: true, - }, - }, - retvals: { - status: { - type: "string", - desc: "The status of the operation.", - required: true, - }, - }, - fn: async (args: Dict) => { - delete this.store[args.key]; + const embedding = await this.openai.embeddings.create({ + model: this.config.vector_model, + dimensions: this.config.dimensions, + input: args.desc, + encoding_format: "float", + }); + insertStmt.run(Float32Array.from(embedding.data[0].embedding), + args.desc, + JSON.stringify(args.data)); return { status: "success" }; }, }); + // TODO: Implement remove athena.registerTool({ name: "ltm/list", desc: "List your long-term memory.", @@ -87,33 +94,17 @@ export default class LongTermMemory extends PluginBase { desc: "The metadata of the long-term memory.", required: false, of: { - key: { - type: "string", - desc: "The key of the data.", - required: true, - }, desc: { type: "string", desc: "The description of the data.", required: true, }, - created_at: { - type: "string", - desc: "The creation date of the data.", - required: true, - }, }, }, }, }, fn: async (args: Dict) => { - const list = Object.keys(this.store).map((key) => { - return { - key: key, - desc: this.store[key].desc, - created_at: this.store[key].created_at, - }; - }); + const list = this.db.prepare("SELECT desc, data FROM vec_items").all(); return { list: list }; }, }); @@ -121,46 +112,71 @@ export default class LongTermMemory extends PluginBase { name: "ltm/retrieve", desc: "Retrieve data from your long-term memory.", args: { - key: { + query: { type: "string", - desc: "The key to retrieve the data.", + desc: "The query to retrieve the data.", required: true, }, }, retvals: { - desc: { - type: "string", - desc: "The description of the data.", - required: true, - }, - data: { - type: "object", - desc: "The data.", - required: true, - }, - created_at: { - type: "string", - desc: "The creation date of the data.", + list: { + type: "array", + desc: "Query results list of metadata of the long-term memory.", required: true, + of: { + type: "object", + desc: "The desc and data of the long-term memory.", + required: false, + of: { + desc: { + type: "string", + desc: "The description of the data.", + required: true, + }, + data: { + type: "object", + desc: "The data.", + required: true, + }, + }, + }, }, }, fn: async (args: Dict) => { - const item = this.store[args.key]; - if (!item) { - return { error: "The key does not exist." }; + const embedding = await this.openai.embeddings.create({ + model: this.config.vector_model, + dimensions: this.config.dimensions, + input: args.query, + encoding_format: "float", + }); + const results = this.db.prepare( + `SELECT + distance, + desc, + data + FROM vec_items + WHERE embedding MATCH ? + ORDER BY distance + LIMIT ${this.config.max_query_results}` + ).all(Float32Array.from(embedding.data[0].embedding)); + if (!results || results.length === 0) { + throw new Error("No results found"); } - return { - desc: item.desc, - data: item.data, - created_at: item.created_at, - }; + return results.map(result => { + if (!result || typeof result !== 'object') { + throw new Error("Invalid result format"); + } + return { + desc: String(result.desc), + data: JSON.parse(String(result.data)), + }; + }); }, }); } async unload(athena: Athena) { athena.deregisterTool("ltm/store"); - athena.deregisterTool("ltm/remove"); athena.deregisterTool("ltm/list"); athena.deregisterTool("ltm/retrieve"); }