diff --git a/src/lib/server/db/schema.ts b/src/lib/server/db/schema.ts index 5360814..776205e 100644 --- a/src/lib/server/db/schema.ts +++ b/src/lib/server/db/schema.ts @@ -12,9 +12,80 @@ import { float_32_array } from './utils'; * to the generated migration file */ -// this is just an example of a vector table...we can change this with the docs table later -export const vector_table = sqliteTable('vector_table', { +export const distillations = sqliteTable('distillations', { id: integer('id').primaryKey(), - text: text('text'), - vector: float_32_array('vector', { dimensions: 3 }), + preset_name: text('preset_name').notNull(), + version: text('version').notNull(), + content: text('content').notNull(), + size_kb: integer('size_kb').notNull(), + document_count: integer('document_count').notNull(), + distillation_job_id: integer('distillation_job_id').references(() => distillation_jobs.id), + created_at: integer('created_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), +}); + +export const distillation_jobs = sqliteTable('distillation_jobs', { + id: integer('id').primaryKey(), + preset_name: text('preset_name').notNull(), + batch_id: text('batch_id'), + status: text('status', { enum: ['pending', 'processing', 'completed', 'failed'] }).notNull(), + model_used: text('model_used').notNull(), + total_files: integer('total_files').notNull(), + processed_files: integer('processed_files').notNull().default(0), + successful_files: integer('successful_files').notNull().default(0), + minimize_applied: integer('minimize_applied', { mode: 'boolean' }).notNull().default(false), + total_input_tokens: integer('total_input_tokens').notNull().default(0), + total_output_tokens: integer('total_output_tokens').notNull().default(0), + started_at: integer('started_at', { mode: 'timestamp' }), + completed_at: integer('completed_at', { mode: 'timestamp' }), + error_message: text('error_message'), + metadata: text('metadata', { mode: 'json' }) + .$type>() + .notNull() + .default({}), + created_at: integer('created_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), + updated_at: integer('updated_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), +}); + +export const content = sqliteTable('content', { + id: integer('id').primaryKey(), + path: text('path').notNull(), + filename: text('filename').notNull(), + content: text('content').notNull(), + size_bytes: integer('size_bytes').notNull(), + embeddings: float_32_array('embeddings', { dimensions: 1024 }), + metadata: text('metadata', { mode: 'json' }) + .$type>() + .notNull() + .default({}), + created_at: integer('created_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), + updated_at: integer('updated_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), +}); + +export const content_distilled = sqliteTable('content_distilled', { + id: integer('id').primaryKey(), + path: text('path').notNull(), + filename: text('filename').notNull(), + content: text('content').notNull(), + size_bytes: integer('size_bytes').notNull(), + embeddings: float_32_array('embeddings', { dimensions: 1024 }), + metadata: text('metadata', { mode: 'json' }) + .$type>() + .notNull() + .default({}), + created_at: integer('created_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), + updated_at: integer('updated_at', { mode: 'timestamp' }) + .notNull() + .$defaultFn(() => new Date()), }); diff --git a/src/lib/server/db/utils.ts b/src/lib/server/db/utils.ts index dc14f1d..c0b219e 100644 --- a/src/lib/server/db/utils.ts +++ b/src/lib/server/db/utils.ts @@ -26,7 +26,9 @@ export function vector(arr: number[]) { * .execute(); */ export function distance(column: Column, arr: number[], as = 'distance') { - return sql`vector_distance_cos(${column}, vector32(${JSON.stringify(arr)}))`.as(as); + return sql`CASE ${column} ISNULL WHEN 1 THEN 1 ELSE vector_distance_cos(${column}, vector32(${JSON.stringify(arr)})) END`.as( + as, + ); } /** diff --git a/src/routes/+page.server.ts b/src/routes/+page.server.ts deleted file mode 100644 index 82f3eaf..0000000 --- a/src/routes/+page.server.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { VOYAGE_API_KEY } from '$env/static/private'; -import { db } from '$lib/server/db/index.js'; -import { vector_table } from '$lib/server/db/schema.js'; -import { distance, vector } from '$lib/server/db/utils.js'; -import { sql } from 'drizzle-orm'; - -async function get_embeddings(text: string) { - const result = await fetch('https://api.voyageai.com/v1/embeddings', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${VOYAGE_API_KEY}`, - }, - body: JSON.stringify({ - input: [text], - model: 'voyage-3.5', - }), - }).then((res) => res.json()); - - return result.data[0].embedding as number[]; -} - -export async function load({ url: { searchParams } }) { - const sentence = searchParams.get('sentence'); - if (!sentence) return { top: [], sentence: '' }; - const top = await db - .select({ - id: vector_table.id, - text: vector_table.text, - distance: distance(vector_table.vector, await get_embeddings(sentence)), - }) - .from(vector_table) - .orderBy(sql`distance`) - .execute(); - return { top, sentence }; -} - -export const actions = { - async default({ request }) { - const data = await request.formData(); - const text = data.get('text')?.toString(); - const embeddings = await get_embeddings(text ?? ''); - if (text && embeddings) { - await db - .insert(vector_table) - .values({ text, vector: vector(embeddings) }) - .execute(); - } - }, -}; diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 5a78957..39f691e 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -1,21 +1 @@ - -

Official Svelte MCP

- -
- -
- -
- -Comparing with -
{data.sentence}
- -{#each data.top as item (item.id)} -

{item.text} - {item.distance}

-{/each}