|
| 1 | +import { Database } from "../drivers/database"; |
| 2 | + |
| 3 | +interface Column { |
| 4 | + name: string; |
| 5 | + type: string; |
| 6 | + partitionKey?: boolean; |
| 7 | + primaryKey?: boolean; |
| 8 | +} |
| 9 | + |
| 10 | +interface IndexOptions { |
| 11 | + tableName: string; |
| 12 | + dimensions: number; |
| 13 | + columns: Column[]; |
| 14 | + binaryQuantization?: boolean; |
| 15 | + dbName?: string; |
| 16 | +} |
| 17 | + |
| 18 | +type UpsertData = [Record<string, any> & { id: string | number }][] |
| 19 | + |
| 20 | +interface QueryOptions { |
| 21 | + topK: number, |
| 22 | + where?: string[] |
| 23 | +} |
| 24 | + |
| 25 | +interface SQLiteCloudVector { |
| 26 | + init(options: IndexOptions): Promise<SQLiteCloudVector> |
| 27 | + upsert(data: UpsertData): Promise<SQLiteCloudVector> |
| 28 | + query(queryEmbedding: number[], options: QueryOptions): Promise<any> |
| 29 | +} |
| 30 | + |
| 31 | +const DEFAULT_EMBEDDING_COLUMN_NAME = 'embedding' |
| 32 | + |
| 33 | +const buildEmbeddingType = (dimensions: number, binaryQuantization: boolean) => { |
| 34 | + return `${binaryQuantization ? 'BIT' : 'FLOAT'}[${dimensions}]` |
| 35 | +} |
| 36 | + |
| 37 | +const formatInitColumns = (opts: IndexOptions) => { |
| 38 | + const { columns, dimensions, binaryQuantization } = opts |
| 39 | + return columns.reduce((acc, column) => { |
| 40 | + let _type = column.type.toLowerCase(); |
| 41 | + const { name, primaryKey, partitionKey } = column |
| 42 | + if (_type === 'embedding') { |
| 43 | + _type = buildEmbeddingType(dimensions, !!binaryQuantization) |
| 44 | + } |
| 45 | + const formattedColumn = `${name} ${_type} ${primaryKey ? 'PRIMARY KEY' : ''}${partitionKey ? 'PARTITION KEY' : ''}` |
| 46 | + return `${acc}, ${formattedColumn}` |
| 47 | + }, '') |
| 48 | +} |
| 49 | + |
| 50 | +function formatUpsertCommand(data: UpsertData): [any, any] { |
| 51 | + throw new Error("Function not implemented."); |
| 52 | +} |
| 53 | + |
| 54 | + |
| 55 | +export class SQLiteCloudVectorClient implements SQLiteCloudVector { |
| 56 | + |
| 57 | + private _db: Database |
| 58 | + private _tableName: string |
| 59 | + private _columns: Column[] |
| 60 | + private _formattedColumns: string |
| 61 | + |
| 62 | + constructor(_db: Database) { |
| 63 | + this._db = _db |
| 64 | + this._tableName = '' |
| 65 | + this._columns = [] |
| 66 | + this._formattedColumns = '' |
| 67 | + } |
| 68 | + |
| 69 | + async init(options: IndexOptions) { |
| 70 | + const formattedColumns = formatInitColumns(options) |
| 71 | + this._tableName = options.tableName |
| 72 | + this._columns = options?.columns || [] |
| 73 | + this._formattedColumns = formattedColumns |
| 74 | + const useDbCommand = options?.dbName ? `USE DATABASE ${options.dbName}; ` : '' |
| 75 | + const hasTable = await this._db.sql`${useDbCommand}SELECT 1 FROM ${options.tableName} LIMIT 1;` |
| 76 | + |
| 77 | + if (hasTable.length === 0) { // TODO - VERIFY CHECK HAS TABLE |
| 78 | + const query = `CREATE VIRTUAL TABLE ${options.tableName} USING vec0(${formattedColumns})` |
| 79 | + await this._db.sql(query) |
| 80 | + } |
| 81 | + return this |
| 82 | + } |
| 83 | + |
| 84 | + async upsert(data: UpsertData) { |
| 85 | + const [formattedColumns, formattedValues] = formatUpsertCommand(data) |
| 86 | + const query = `INSERT INTO ${this._tableName}(${formattedColumns}) VALUES (${formattedValues})` |
| 87 | + return await this._db.sql(query) |
| 88 | + } |
| 89 | + |
| 90 | + async query(queryEmbedding: number[], options: QueryOptions) { |
| 91 | + const query = `SELECT * FROM ${this._tableName} WHERE ${DEFAULT_EMBEDDING_COLUMN_NAME} match ${JSON.stringify(queryEmbedding)} and k = ${options.topK} and ${(options?.where?.join(' and ') || '')}` |
| 92 | + const result = await this._db.sql(query) |
| 93 | + return { data: result, error: null } |
| 94 | + } |
| 95 | + |
| 96 | +} |
0 commit comments