diff --git a/scripts/cleanup-ai-hub-archive.cjs b/scripts/cleanup-ai-hub-archive.cjs new file mode 100644 index 0000000000..3ad1868f21 --- /dev/null +++ b/scripts/cleanup-ai-hub-archive.cjs @@ -0,0 +1,241 @@ +#!/usr/bin/env node +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); +const sqlite3 = require('sqlite3'); + +const home = os.homedir(); +const archiveRoot = path.join( + home, + 'Library', + 'Application Support', + 'FerdiumDev', + 'ai-hub', +); +const dbPath = process.env.ARCHIVE_DB || path.join(archiveRoot, 'archive.db'); + +function queryAll(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (error, rows) => { + if (error) { + reject(error); + return; + } + + resolve(rows || []); + }); + }); +} + +function run(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.run(sql, params, error => { + if (error) { + reject(error); + return; + } + + resolve(); + }); + }); +} + +function get(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.get(sql, params, (error, row) => { + if (error) { + reject(error); + return; + } + + resolve(row); + }); + }); +} + +function extractConversationId(value) { + if (!value) { + return null; + } + + const match = String(value).match(/\/c\/([^#/?]+)/); + if (match?.[1]) { + return match[1]; + } + + return null; +} + +function getCanonicalConversationKey(conversation) { + return ( + extractConversationId(conversation.vendor_conversation_id) || + extractConversationId(conversation.source_url) || + conversation.source_url || + conversation.vendor_conversation_id || + conversation.id + ); +} + +function isPlaceholderTitle(title) { + return !title || title === 'Conversation'; +} + +function compareConversations(left, right) { + if (left.message_count !== right.message_count) { + return right.message_count - left.message_count; + } + + if (isPlaceholderTitle(left.title) !== isPlaceholderTitle(right.title)) { + return Number(isPlaceholderTitle(left.title)) - Number(isPlaceholderTitle(right.title)); + } + + return Date.parse(right.updated_at || 0) - Date.parse(left.updated_at || 0); +} + +function mergeMessages(rows) { + const merged = new Map(); + + for (const row of rows) { + const key = `${row.seq}|${row.role}|${row.content_text}`; + const existing = merged.get(key); + + if (!existing) { + merged.set(key, row); + continue; + } + + const existingMd = existing.content_md || ''; + const nextMd = row.content_md || ''; + if (nextMd.length > existingMd.length) { + merged.set(key, row); + } + } + + return [...merged.values()].sort((a, b) => a.seq - b.seq); +} + +async function main() { + if (!fs.existsSync(dbPath)) { + throw new Error(`Archive DB not found: ${dbPath}`); + } + + const backupPath = `${dbPath}.${new Date().toISOString().replaceAll(':', '-')}.bak`; + fs.copyFileSync(dbPath, backupPath); + + const db = new sqlite3.Database(dbPath); + + try { + const conversations = await queryAll( + db, + `SELECT c.id, c.account_id, c.vendor_conversation_id, c.title, c.source_url, c.created_at, c.updated_at, + (SELECT count(*) FROM messages m WHERE m.conversation_id = c.id) AS message_count + FROM conversations c`, + ); + + const grouped = new Map(); + for (const conversation of conversations) { + const key = `${conversation.account_id}|${getCanonicalConversationKey(conversation)}`; + if (!grouped.has(key)) { + grouped.set(key, []); + } + grouped.get(key).push(conversation); + } + + const duplicateGroups = [...grouped.values()].filter(group => group.length > 1); + + await run(db, 'BEGIN TRANSACTION'); + + let removedConversations = 0; + let movedMessages = 0; + + for (const group of duplicateGroups) { + const sorted = [...group].sort(compareConversations); + const winner = sorted[0]; + const losers = sorted.slice(1); + + const allMessages = []; + for (const conversation of sorted) { + const rows = await queryAll( + db, + `SELECT role, sender_label, content_text, content_md, seq, created_at, hash + FROM messages + WHERE conversation_id = ? + ORDER BY seq ASC, created_at ASC`, + [conversation.id], + ); + allMessages.push(...rows); + } + + const mergedMessages = mergeMessages(allMessages); + movedMessages += mergedMessages.length; + + const preferredTitle = sorted.find(row => !isPlaceholderTitle(row.title))?.title || winner.title; + const preferredVendorConversationId = sorted.find(row => extractConversationId(row.vendor_conversation_id))?.vendor_conversation_id || winner.vendor_conversation_id; + const preferredSourceUrl = sorted.find(row => extractConversationId(row.source_url))?.source_url || sorted.find(row => row.source_url)?.source_url || winner.source_url; + const latestUpdatedAt = sorted.reduce( + (latest, row) => (Date.parse(row.updated_at || 0) > Date.parse(latest || 0) ? row.updated_at : latest), + winner.updated_at, + ); + + await run(db, 'DELETE FROM messages WHERE conversation_id = ?', [winner.id]); + + for (const [index, message] of mergedMessages.entries()) { + await run( + db, + `INSERT INTO messages (id, conversation_id, role, sender_label, content_text, content_md, seq, created_at, hash) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [ + `cleanup-${winner.id}-${index + 1}`, + winner.id, + message.role, + message.sender_label, + message.content_text, + message.content_md, + message.seq, + message.created_at, + message.hash, + ], + ); + } + + await run( + db, + `UPDATE conversations + SET title = ?, vendor_conversation_id = ?, source_url = ?, updated_at = ? + WHERE id = ?`, + [ + preferredTitle, + preferredVendorConversationId, + preferredSourceUrl, + latestUpdatedAt, + winner.id, + ], + ); + + for (const loser of losers) { + await run(db, 'DELETE FROM messages WHERE conversation_id = ?', [loser.id]); + await run(db, 'DELETE FROM conversations WHERE id = ?', [loser.id]); + removedConversations += 1; + } + } + + await run(db, 'COMMIT'); + + const remaining = await get(db, 'SELECT COUNT(*) AS count FROM conversations'); + console.log(`Backup: ${backupPath}`); + console.log(`Duplicate groups cleaned: ${duplicateGroups.length}`); + console.log(`Conversations removed: ${removedConversations}`); + console.log(`Conversation rows remaining: ${remaining.count}`); + console.log(`Merged message rows written: ${movedMessages}`); + } catch (error) { + await run(db, 'ROLLBACK'); + throw error; + } finally { + db.close(); + } +} + +main().catch(error => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/export-ai-hub-archive.cjs b/scripts/export-ai-hub-archive.cjs new file mode 100755 index 0000000000..e5fc5be9b3 --- /dev/null +++ b/scripts/export-ai-hub-archive.cjs @@ -0,0 +1,222 @@ +#!/usr/bin/env node +const path = require('node:path'); +const os = require('node:os'); +const fs = require('node:fs'); +const sqlite3 = require('sqlite3'); +const sanitizeFilename = require('sanitize-filename'); + +const home = os.homedir(); +const defaultRoot = path.join( + home, + 'Library', + 'Application Support', + 'FerdiumDev', + 'ai-hub', +); +const archiveRoot = process.env.ARCHIVE_ROOT || defaultRoot; +const dbPath = process.env.ARCHIVE_DB || path.join(archiveRoot, 'archive.db'); +const conversationsDir = + process.env.ARCHIVE_CONVERSATIONS_DIR || path.join(archiveRoot, 'conversations'); +const latestMarkdownPath = + process.env.ARCHIVE_LATEST_MD || path.join(archiveRoot, 'conversation.md'); + +function ensureDir(dir) { + fs.mkdirSync(dir, { recursive: true }); +} + +function slugifyTitle(title) { + const safe = sanitizeFilename(title || 'Conversation') + .replace(/\s+/g, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, ''); + + return safe || 'Conversation'; +} + +function queryAll(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (error, rows) => { + if (error) { + reject(error); + return; + } + + resolve(rows || []); + }); + }); +} + +function buildMarkdown(conversation, messages) { + const lines = [`# ${conversation.title || 'Conversation'}`, '']; + + if (conversation.source_url) { + lines.push(`Source: ${conversation.source_url}`, ''); + } + + for (const message of messages) { + const speaker = + message.sender_label || (message.role === 'user' ? 'You' : message.role); + lines.push( + `## ${message.seq}. ${speaker}`, + '', + message.content_md || message.content_text || '', + '', + ); + } + + return `${lines.join('\n').trim()}\n`; +} + +function clearConversationExports(dir) { + if (!fs.existsSync(dir)) { + return; + } + + for (const filename of fs.readdirSync(dir)) { + if (filename.endsWith('.md')) { + fs.rmSync(path.join(dir, filename), { force: true }); + } + } +} + +function extractConversationId(value) { + if (!value) { + return null; + } + + const match = String(value).match(/\/c\/([^#/?]+)/); + if (match?.[1]) { + return match[1]; + } + + if (/^[0-9a-f]{8}-[0-9a-f-]{27}$/i.test(String(value))) { + return String(value); + } + + return null; +} + +function getCanonicalConversationKey(conversation) { + return ( + extractConversationId(conversation.vendor_conversation_id) || + extractConversationId(conversation.source_url) || + conversation.source_url || + conversation.vendor_conversation_id || + conversation.id + ); +} + +function shouldPreferConversation(nextConversation, currentConversation) { + const nextUpdatedAt = Date.parse(nextConversation.updated_at || 0); + const currentUpdatedAt = Date.parse(currentConversation.updated_at || 0); + + if (nextConversation.message_count !== currentConversation.message_count) { + return nextConversation.message_count > currentConversation.message_count; + } + + if (nextUpdatedAt !== currentUpdatedAt) { + return nextUpdatedAt > currentUpdatedAt; + } + + const nextTitle = nextConversation.title || ''; + const currentTitle = currentConversation.title || ''; + + if (currentTitle === 'Conversation' && nextTitle !== 'Conversation') { + return true; + } + + return nextTitle.length > currentTitle.length; +} + + +function isOpaqueConversationKey(value) { + return !extractConversationId(value); +} + +function getConversationFilename(conversation) { + const baseKey = isOpaqueConversationKey(conversation.canonical_key) + ? `${conversation.account_id}-${conversation.canonical_key}` + : conversation.canonical_key; + const safeKey = sanitizeFilename(String(baseKey).replace(/:\/\//g, '-').replace(/\//g, '-')); + + return `${safeKey || 'conversation'}-${slugifyTitle(conversation.title)}.md`; +} + +function dedupeConversations(conversations) { + const deduped = new Map(); + + for (const conversation of conversations) { + const key = `${conversation.account_id}|${getCanonicalConversationKey(conversation)}`; + const existing = deduped.get(key); + + if (!existing || shouldPreferConversation(conversation, existing)) { + deduped.set(key, { + ...conversation, + canonical_key: getCanonicalConversationKey(conversation), + }); + } + } + + return [...deduped.values()].sort( + (left, right) => + Date.parse(right.updated_at || 0) - Date.parse(left.updated_at || 0), + ); +} + +async function main() { + if (!fs.existsSync(dbPath)) { + throw new Error(`Archive DB not found: ${dbPath}`); + } + + ensureDir(conversationsDir); + clearConversationExports(conversationsDir); + + const db = new sqlite3.Database(dbPath); + try { + const conversations = await queryAll( + db, + `SELECT c.id, c.account_id, c.vendor_conversation_id, c.title, c.source_url, c.updated_at, + (SELECT count(*) FROM messages m WHERE m.conversation_id = c.id) AS message_count + FROM conversations c + ORDER BY c.updated_at DESC`, + ); + const dedupedConversations = dedupeConversations(conversations); + + let latestMarkdown = null; + let latestCount = 0; + + for (const conversation of dedupedConversations) { + const messages = await queryAll( + db, + 'SELECT seq, role, sender_label, content_md, content_text FROM messages WHERE conversation_id = ? ORDER BY seq ASC', + [conversation.id], + ); + + const markdown = buildMarkdown(conversation, messages); + const filename = getConversationFilename(conversation); + fs.writeFileSync(path.join(conversationsDir, filename), markdown, 'utf8'); + + if (!latestMarkdown) { + latestMarkdown = markdown; + } + + latestCount += 1; + } + + if (latestMarkdown) { + fs.writeFileSync(latestMarkdownPath, latestMarkdown, 'utf8'); + } + + console.log(`Exported ${latestCount} conversation(s)`); + console.log(`DB: ${dbPath}`); + console.log(`Dir: ${conversationsDir}`); + console.log(`Latest: ${latestMarkdownPath}`); + } finally { + db.close(); + } +} + +main().catch(error => { + console.error(error); + process.exitCode = 1; +}); diff --git a/scripts/export-ai-hub-index.cjs b/scripts/export-ai-hub-index.cjs new file mode 100644 index 0000000000..228e6f5982 --- /dev/null +++ b/scripts/export-ai-hub-index.cjs @@ -0,0 +1,132 @@ +#!/usr/bin/env node +const os = require('node:os'); +const path = require('node:path'); +const fs = require('node:fs'); +const sqlite3 = require('sqlite3'); +const sanitizeFilename = require('sanitize-filename'); + +const home = os.homedir(); +const archiveRoot = + process.env.ARCHIVE_ROOT || + path.join(home, 'Library', 'Application Support', 'FerdiumDev', 'ai-hub'); +const dbPath = process.env.ARCHIVE_DB || path.join(archiveRoot, 'archive.db'); +const conversationsDir = path.join(archiveRoot, 'conversations'); +const indexMarkdownPath = path.join(archiveRoot, 'index.md'); + +function all(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (error, rows) => { + if (error) { + reject(error); + return; + } + + resolve(rows || []); + }); + }); +} + +function slugifyTitle(title) { + const safe = sanitizeFilename(title || 'Conversation') + .replaceAll(/\s+/g, '-') + .replaceAll(/-+/g, '-') + .replaceAll(/^-|-$/g, ''); + + return safe || 'Conversation'; +} + +function getConversationMarkdownRelativePath(conversationKey, title) { + const filename = `${conversationKey}-${slugifyTitle(title)}.md`; + const absolutePath = path.join(conversationsDir, filename); + return `./${path.relative(archiveRoot, absolutePath).replaceAll(path.sep, '/')}`; +} + +function toMarkdown(items) { + const lines = [ + '# AI Hub Archive Index', + '', + `Updated at: ${new Date().toISOString()}`, + '', + ]; + const grouped = new Map(); + + for (const item of items) { + const key = `${item.vendor} / ${item.accountLabel}`; + const existing = grouped.get(key) || []; + existing.push(item); + grouped.set(key, existing); + } + + for (const [group, groupItems] of grouped.entries()) { + lines.push(`## ${group}`); + + for (const item of groupItems) { + lines.push( + `- ${item.title || '(untitled)'}`, + ` - conversation: ${item.conversationKey || item.conversationId}`, + ` - updated: ${item.updatedAt}`, + ` - messages: ${item.messageCount}`, + ` - markdown: ${item.markdownPath || '(missing)'}`, + ); + } + + lines.push(''); + } + + return `${lines.join('\n').trim()}\n`; +} + +async function main() { + const db = new sqlite3.Database(dbPath); + + try { + const rows = await all( + db, + `SELECT + c.id AS conversationId, + COALESCE(c.vendor_conversation_id, c.id) AS conversationKey, + c.title AS title, + c.updated_at AS updatedAt, + a.id AS accountId, + a.vendor AS vendor, + a.account_label AS accountLabel, + COUNT(m.id) AS messageCount + FROM conversations c + JOIN accounts a ON a.id = c.account_id + LEFT JOIN messages m ON m.conversation_id = c.id + GROUP BY c.id, c.vendor_conversation_id, c.title, c.updated_at, a.id, a.vendor, a.account_label + ORDER BY a.vendor ASC, a.account_label ASC, c.updated_at DESC`, + ); + + const items = rows.map(row => ({ + ...row, + markdownPath: getConversationMarkdownRelativePath( + row.conversationKey || row.conversationId, + row.title || 'Conversation', + ), + })); + + fs.mkdirSync(archiveRoot, { recursive: true }); + fs.writeFileSync(indexMarkdownPath, toMarkdown(items), 'utf8'); + + process.stdout.write( + `${JSON.stringify( + { + tag: '[AI-HUB] exported archive index', + itemCount: items.length, + indexMarkdownPath, + }, + null, + 2, + )}\n`, + ); + } finally { + db.close(); + } +} + +main().catch(error => { + process.stderr.write('[AI-HUB] failed to export archive index\n'); + process.stderr.write(`${error?.stack || error}\n`); + process.exitCode = 1; +}); diff --git a/scripts/rebuild-ai-hub-fts.cjs b/scripts/rebuild-ai-hub-fts.cjs new file mode 100644 index 0000000000..a561c08c67 --- /dev/null +++ b/scripts/rebuild-ai-hub-fts.cjs @@ -0,0 +1,101 @@ +#!/usr/bin/env node +const os = require('node:os'); +const path = require('node:path'); +const sqlite3 = require('sqlite3'); + +const home = os.homedir(); +const archiveRoot = + process.env.ARCHIVE_ROOT || + path.join(home, 'Library', 'Application Support', 'FerdiumDev', 'ai-hub'); +const dbPath = process.env.ARCHIVE_DB || path.join(archiveRoot, 'archive.db'); + +function run(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.run(sql, params, error => { + if (error) { + reject(error); + return; + } + + resolve(); + }); + }); +} + +function get(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.get(sql, params, (error, row) => { + if (error) { + reject(error); + return; + } + + resolve(row); + }); + }); +} + +async function main() { + const db = new sqlite3.Database(dbPath); + + try { + await run( + db, + `CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( + message_id UNINDEXED, + conversation_id UNINDEXED, + account_id UNINDEXED, + vendor UNINDEXED, + conversation_title, + content_text, + content_md + )`, + ); + await run(db, 'DELETE FROM messages_fts'); + await run( + db, + `INSERT INTO messages_fts ( + message_id, + conversation_id, + account_id, + vendor, + conversation_title, + content_text, + content_md + ) + SELECT + m.id, + m.conversation_id, + c.account_id, + a.vendor, + COALESCE(c.title, ''), + m.content_text, + m.content_md + FROM messages m + JOIN conversations c ON c.id = m.conversation_id + JOIN accounts a ON a.id = c.account_id`, + ); + + const row = await get(db, 'SELECT COUNT(*) AS count FROM messages_fts'); + + process.stdout.write( + `${JSON.stringify( + { + tag: '[AI-HUB] rebuilt FTS index', + dbPath, + indexedMessages: row?.count || 0, + }, + null, + 2, + )}\n`, + ); + } finally { + db.close(); + } +} + +main().catch(error => { + process.stderr.write('[AI-HUB] failed to rebuild FTS index\n'); + process.stderr.write(`${error?.stack || error}\n`); + process.exitCode = 1; +}); diff --git a/src/archive/archiveService.ts b/src/archive/archiveService.ts new file mode 100644 index 0000000000..2970e08f3e --- /dev/null +++ b/src/archive/archiveService.ts @@ -0,0 +1,147 @@ +import type Service from '../models/Service'; +import { resolveConversationIdentity } from './conversationKeyResolver'; +import { upsertAccount, upsertConversation } from './conversationUpsertService'; +import { archivePaths, getArchiveDb, run } from './db'; +import { exportArchiveIndex } from './index/archiveIndexExporter'; +import { exportConversationMarkdown } from './markdownExporter'; +import { + replaceConversationMessagesForFullScan, + updateLastAssistantMessage, + upsertIncrementalMessages, +} from './messageUpsertService'; +import type { ArchiveIncrementalPayload, ArchiveScanPayload } from './types'; + +function getNormalizedMessages(payload: { messages?: any[] }) { + return (payload.messages || []).filter(message => message?.text?.trim()); +} + +async function exportArchiveArtifacts( + db: any, + identity: ReturnType, +) { + const exportResult = await exportConversationMarkdown(db, identity); + const indexResult = await exportArchiveIndex(db); + + return { + exportResult, + indexResult, + }; +} + +export async function archiveConversationScan({ + service, + payload, +}: { + service: Service; + payload: ArchiveScanPayload; +}) { + const db = await getArchiveDb(); + const now = new Date().toISOString(); + const identity = resolveConversationIdentity(service, payload); + const messages = getNormalizedMessages(payload); + + await run(db, 'BEGIN TRANSACTION'); + + try { + await upsertAccount(db, identity, now); + await upsertConversation(db, identity, now); + await replaceConversationMessagesForFullScan( + db, + identity.conversationId, + messages, + now, + ); + await run(db, 'COMMIT'); + } catch (error) { + await run(db, 'ROLLBACK'); + throw error; + } + + const { exportResult, indexResult } = await exportArchiveArtifacts( + db, + identity, + ); + + return { + accountId: identity.accountId, + conversationId: identity.conversationId, + messageCount: messages.length, + dbPath: archivePaths.dbPath, + markdownPath: archivePaths.latestMarkdownPath, + conversationMarkdownPath: exportResult.conversationMarkdownPath, + indexMarkdownPath: indexResult.indexMarkdownPath, + }; +} + +export async function archiveIncrementalMessages({ + service, + payload, +}: { + service: Service; + payload: ArchiveIncrementalPayload; +}) { + if (payload.mode === 'rescan') { + return archiveConversationScan({ + service, + payload: { + platform: payload.vendor, + title: payload.title, + model: payload.model, + currentUrl: payload.currentUrl || payload.sourceUrl, + messageCount: payload.messages?.length, + messages: payload.messages, + }, + }); + } + + const db = await getArchiveDb(); + const now = payload.scannedAt || new Date().toISOString(); + const identity = resolveConversationIdentity(service, payload); + const messages = getNormalizedMessages(payload); + + await run(db, 'BEGIN TRANSACTION'); + + try { + await upsertAccount(db, identity, now); + await upsertConversation(db, identity, now); + + if (payload.mode === 'bootstrap' || payload.mode === 'append') { + await upsertIncrementalMessages( + db, + identity.conversationId, + messages, + now, + ); + } + + if (payload.mode === 'update-tail' && payload.updatedTail?.text?.trim()) { + await updateLastAssistantMessage( + db, + identity.conversationId, + payload.updatedTail, + now, + ); + } + + await run(db, 'COMMIT'); + } catch (error) { + await run(db, 'ROLLBACK'); + throw error; + } + + const { exportResult, indexResult } = await exportArchiveArtifacts( + db, + identity, + ); + + return { + accountId: identity.accountId, + conversationId: identity.conversationId, + mode: payload.mode, + messageCount: payload.mode === 'update-tail' ? 1 : messages.length, + dbPath: archivePaths.dbPath, + markdownPath: archivePaths.latestMarkdownPath, + conversationMarkdownPath: exportResult.conversationMarkdownPath, + indexMarkdownPath: indexResult.indexMarkdownPath, + }; +} diff --git a/src/archive/conversationKeyResolver.ts b/src/archive/conversationKeyResolver.ts new file mode 100644 index 0000000000..28a5c923a1 --- /dev/null +++ b/src/archive/conversationKeyResolver.ts @@ -0,0 +1,116 @@ +import sanitizeFilename from 'sanitize-filename'; +import type Service from '../models/Service'; +import { stableHash } from './db'; +import type { ArchiveConversationInput, ConversationIdentity } from './types'; + +function getAccountId(service: Service): string { + return stableHash(`account|${service.partition || service.id}`); +} + +function getAccountLabel(service: Service): string { + return service.name || service.recipe?.name || 'unknown'; +} + +function getPayloadSourceUrl(payload: ArchiveConversationInput): string | null { + return ( + payload.currentUrl || + ('sourceUrl' in payload ? payload.sourceUrl : null) || + null + ); +} + +function getPayloadConversationKey( + payload: ArchiveConversationInput, +): string | null { + return ( + ('conversationKey' in payload ? payload.conversationKey : null) || null + ); +} + +function getPayloadVendor( + payload: ArchiveConversationInput, + service: Service, +): string { + if ('platform' in payload && payload.platform) { + return payload.platform; + } + + if ('vendor' in payload && payload.vendor) { + return payload.vendor; + } + + return service.recipe?.id || 'unknown'; +} + +function extractConversationId(value?: string | null): string | null { + if (!value) { + return null; + } + + const match = value.match(/\/c\/([^#/?]+)/); + + if (match?.[1]) { + return match[1]; + } + + if (/^[\da-f]{8}-[\da-f-]{27}$/i.test(value)) { + return value; + } + + return null; +} + +function slugifyTitle(title: string): string { + const safe = sanitizeFilename(title || 'Conversation') + .replaceAll(/\s+/g, '-') + .replaceAll(/-+/g, '-') + .replaceAll(/^-|-$/g, ''); + + return safe || 'Conversation'; +} + +export function resolveConversationIdentity( + service: Service, + payload: ArchiveConversationInput, +): ConversationIdentity { + const accountId = getAccountId(service); + const sourceUrl = getPayloadSourceUrl(payload); + const conversationKey = getPayloadConversationKey(payload); + const vendorConversationId = + extractConversationId(sourceUrl) || + extractConversationId(conversationKey) || + null; + const conversationDiscriminator = + vendorConversationId || conversationKey || sourceUrl || 'unknown-url'; + const conversationId = stableHash( + `conversation|${accountId}|${conversationDiscriminator}`, + ); + + return { + accountId, + accountLabel: getAccountLabel(service), + vendor: getPayloadVendor(payload, service), + sourceUrl, + vendorConversationId, + conversationId, + conversationFileKey: vendorConversationId || conversationId, + title: payload.title || service.name || 'Conversation', + service, + }; +} + +export function getConversationFilenameForKey( + conversationFileKey: string, + title: string, +): string { + return `${conversationFileKey}-${slugifyTitle(title)}.md`; +} + +export function getConversationFilename( + identity: ConversationIdentity, +): string { + return getConversationFilenameForKey( + identity.conversationFileKey, + identity.title, + ); +} diff --git a/src/archive/conversationUpsertService.ts b/src/archive/conversationUpsertService.ts new file mode 100644 index 0000000000..83ae95d7e1 --- /dev/null +++ b/src/archive/conversationUpsertService.ts @@ -0,0 +1,44 @@ +import { run } from './db'; +import type { ConversationIdentity } from './types'; + +export async function upsertAccount( + db: any, + identity: ConversationIdentity, + now: string, +) { + await run( + db, + `INSERT INTO accounts (id, vendor, account_label, partition_name, created_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET vendor = excluded.vendor, account_label = excluded.account_label, partition_name = excluded.partition_name`, + [ + identity.accountId, + identity.vendor, + identity.accountLabel, + identity.service.partition || identity.service.id, + now, + ], + ); +} + +export async function upsertConversation( + db: any, + identity: ConversationIdentity, + now: string, +) { + await run( + db, + `INSERT INTO conversations (id, account_id, vendor_conversation_id, title, source_url, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET title = excluded.title, source_url = excluded.source_url, updated_at = excluded.updated_at`, + [ + identity.conversationId, + identity.accountId, + identity.vendorConversationId, + identity.title, + identity.sourceUrl, + now, + now, + ], + ); +} diff --git a/src/archive/db.ts b/src/archive/db.ts new file mode 100644 index 0000000000..cda13cf6a7 --- /dev/null +++ b/src/archive/db.ts @@ -0,0 +1,227 @@ +import { createHash } from 'node:crypto'; +import { dirname, join } from 'node:path'; +import { ensureDirSync, writeFileSync } from 'fs-extra'; +import { userDataPath } from '../environment-remote'; + +const sqlite3 = require('sqlite3'); + +const schemaSql = ` +CREATE TABLE IF NOT EXISTS accounts ( + id TEXT PRIMARY KEY, + vendor TEXT NOT NULL, + account_label TEXT NOT NULL, + partition_name TEXT NOT NULL, + created_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS conversations ( + id TEXT PRIMARY KEY, + account_id TEXT NOT NULL, + vendor_conversation_id TEXT, + title TEXT, + source_url TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS messages ( + id TEXT PRIMARY KEY, + conversation_id TEXT NOT NULL, + role TEXT NOT NULL, + sender_label TEXT, + content_text TEXT NOT NULL, + content_md TEXT NOT NULL, + seq INTEGER NOT NULL, + message_key TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + hash TEXT NOT NULL +); + +CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( + message_id UNINDEXED, + conversation_id UNINDEXED, + account_id UNINDEXED, + vendor UNINDEXED, + conversation_title, + content_text, + content_md +); +`; + +let databasePromise: Promise | null = null; + +export const archivePaths = { + rootDir: userDataPath('ai-hub'), + dbPath: userDataPath('ai-hub', 'archive.db'), + latestMarkdownPath: userDataPath('ai-hub', 'conversation.md'), + indexMarkdownPath: userDataPath('ai-hub', 'index.md'), + conversationsDir: userDataPath('ai-hub', 'conversations'), +}; + +export function stableHash(value: string): string { + return createHash('sha256').update(value).digest('hex'); +} + +export function run(db: any, sql: string, params: any[] = []): Promise { + return new Promise((resolve, reject) => { + db.run(sql, params, (error: Error | null) => { + if (error) { + reject(error); + return; + } + + resolve(); + }); + }); +} + +export function get( + db: any, + sql: string, + params: any[] = [], +): Promise { + return new Promise((resolve, reject) => { + db.get(sql, params, (error: Error | null, row: T) => { + if (error) { + reject(error); + return; + } + + resolve(row); + }); + }); +} + +export function all(db: any, sql: string, params: any[] = []): Promise { + return new Promise((resolve, reject) => { + db.all(sql, params, (error: Error | null, rows: T[]) => { + if (error) { + reject(error); + return; + } + + resolve(rows || []); + }); + }); +} + +async function ensureMessageColumns(db: any) { + const columns = await all<{ name: string }>( + db, + 'PRAGMA table_info(messages)', + ); + const columnNames = new Set(columns.map(column => column.name)); + + if (!columnNames.has('message_key')) { + await run(db, 'ALTER TABLE messages ADD COLUMN message_key TEXT'); + } + + if (!columnNames.has('updated_at')) { + await run(db, 'ALTER TABLE messages ADD COLUMN updated_at TEXT'); + } +} + +export async function ensureFtsSchema(db: any) { + await run( + db, + `CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( + message_id UNINDEXED, + conversation_id UNINDEXED, + account_id UNINDEXED, + vendor UNINDEXED, + conversation_title, + content_text, + content_md + )`, + ); +} + +export async function rebuildFtsIndex(db: any): Promise { + await ensureFtsSchema(db); + await run(db, 'DELETE FROM messages_fts'); + await run( + db, + `INSERT INTO messages_fts ( + message_id, + conversation_id, + account_id, + vendor, + conversation_title, + content_text, + content_md + ) + SELECT + m.id, + m.conversation_id, + c.account_id, + a.vendor, + COALESCE(c.title, ''), + m.content_text, + m.content_md + FROM messages m + JOIN conversations c ON c.id = m.conversation_id + JOIN accounts a ON a.id = c.account_id`, + ); + + const row = await get<{ count: number }>( + db, + 'SELECT COUNT(*) AS count FROM messages_fts', + ); + + return row?.count || 0; +} + +function openDatabase(): Promise { + ensureDirSync(dirname(archivePaths.dbPath)); + + return new Promise((resolve, reject) => { + const db = new sqlite3.Database( + archivePaths.dbPath, + (error: Error | null) => { + if (error) { + reject(error); + return; + } + + db.exec(schemaSql, async (execError: Error | null) => { + if (execError) { + reject(execError); + return; + } + + try { + await ensureMessageColumns(db); + await ensureFtsSchema(db); + resolve(db); + } catch (migrationError) { + reject(migrationError); + } + }); + }, + ); + }); +} + +export function getArchiveDb(): Promise { + if (!databasePromise) { + databasePromise = openDatabase(); + } + + return databasePromise; +} + +export function writeLatestMarkdown(content: string) { + ensureDirSync(dirname(archivePaths.latestMarkdownPath)); + writeFileSync(archivePaths.latestMarkdownPath, content, 'utf8'); +} + +export function writeIndexMarkdown(content: string) { + ensureDirSync(dirname(archivePaths.indexMarkdownPath)); + writeFileSync(archivePaths.indexMarkdownPath, content, 'utf8'); +} + +export function writeConversationMarkdown(filename: string, content: string) { + ensureDirSync(archivePaths.conversationsDir); + writeFileSync(join(archivePaths.conversationsDir, filename), content, 'utf8'); +} diff --git a/src/archive/index/archiveIndexExporter.ts b/src/archive/index/archiveIndexExporter.ts new file mode 100644 index 0000000000..74efb9951c --- /dev/null +++ b/src/archive/index/archiveIndexExporter.ts @@ -0,0 +1,98 @@ +import { all, archivePaths, writeIndexMarkdown } from '../db'; +import { getConversationMarkdownRelativePath } from '../markdownExporter'; +import type { ArchiveConversationIndexItem } from '../search/searchTypes'; + +type ArchiveIndexRow = { + conversationId: string; + conversationKey: string | null; + title: string | null; + updatedAt: string; + accountId: string; + vendor: string; + accountLabel: string; + messageCount: number; +}; + +function groupKey(item: ArchiveConversationIndexItem) { + return `${item.vendor} / ${item.accountLabel}`; +} + +function toMarkdown(items: ArchiveConversationIndexItem[]) { + const lines = [ + '# AI Hub Archive Index', + '', + `Updated at: ${new Date().toISOString()}`, + '', + ]; + const grouped = new Map(); + + for (const item of items) { + const key = groupKey(item); + const existing = grouped.get(key) || []; + existing.push(item); + grouped.set(key, existing); + } + + for (const [group, groupItems] of grouped.entries()) { + lines.push(`## ${group}`); + + for (const item of groupItems) { + const title = item.title || '(untitled)'; + const conversationKey = item.conversationKey || item.conversationId; + const markdownPath = item.markdownPath || '(missing)'; + + lines.push( + `- ${title}`, + ` - conversation: ${conversationKey}`, + ` - updated: ${item.updatedAt}`, + ` - messages: ${item.messageCount}`, + ` - markdown: ${markdownPath}`, + ); + } + + lines.push(''); + } + + return `${lines.join('\n').trim()}\n`; +} + +export async function listArchiveIndexItems( + db: any, +): Promise { + const rows = await all( + db, + `SELECT + c.id AS conversationId, + COALESCE(c.vendor_conversation_id, c.id) AS conversationKey, + c.title AS title, + c.updated_at AS updatedAt, + a.id AS accountId, + a.vendor AS vendor, + a.account_label AS accountLabel, + COUNT(m.id) AS messageCount + FROM conversations c + JOIN accounts a ON a.id = c.account_id + LEFT JOIN messages m ON m.conversation_id = c.id + GROUP BY c.id, c.vendor_conversation_id, c.title, c.updated_at, a.id, a.vendor, a.account_label + ORDER BY a.vendor ASC, a.account_label ASC, c.updated_at DESC`, + ); + + return rows.map(row => ({ + ...row, + markdownPath: getConversationMarkdownRelativePath({ + conversationFileKey: row.conversationKey || row.conversationId, + title: row.title || 'Conversation', + }), + })); +} + +export async function exportArchiveIndex(db: any) { + const items = await listArchiveIndexItems(db); + const markdown = toMarkdown(items); + writeIndexMarkdown(markdown); + + return { + itemCount: items.length, + indexMarkdownPath: archivePaths.indexMarkdownPath, + }; +} diff --git a/src/archive/markdownExporter.ts b/src/archive/markdownExporter.ts new file mode 100644 index 0000000000..0efbbb31a7 --- /dev/null +++ b/src/archive/markdownExporter.ts @@ -0,0 +1,124 @@ +import { join, relative } from 'node:path'; +import { pathExistsSync, readdirSync, removeSync } from 'fs-extra'; +import { getConversationFilenameForKey } from './conversationKeyResolver'; +import { + all, + archivePaths, + get, + writeConversationMarkdown, + writeLatestMarkdown, +} from './db'; +import type { ConversationIdentity, PersistedMessageRow } from './types'; + +function toMarkdown( + messages: PersistedMessageRow[], + title: string, + sourceUrl?: string | null, +) { + const lines = [`# ${title || 'Conversation'}`, '']; + + if (sourceUrl) { + lines.push(`Source: ${sourceUrl}`, ''); + } + + for (const message of messages) { + lines.push( + `## ${message.seq}. ${message.sender_label || message.role}`, + '', + message.content_md || message.content_text || '', + '', + ); + } + + return `${lines.join('\n').trim()}\n`; +} + +function removeStaleConversationMarkdowns( + fileKey: string, + nextFilename: string, +) { + if (!pathExistsSync(archivePaths.conversationsDir)) { + return; + } + + const filenames = readdirSync(archivePaths.conversationsDir); + + filenames + .filter( + filename => + filename.startsWith(`${fileKey}-`) && filename !== nextFilename, + ) + .forEach(filename => { + removeSync(join(archivePaths.conversationsDir, filename)); + }); +} + +export function getConversationMarkdownFilename(input: { + conversationFileKey: string; + title: string; +}) { + return getConversationFilenameForKey(input.conversationFileKey, input.title); +} + +export function getConversationMarkdownAbsolutePath(input: { + conversationFileKey: string; + title: string; +}) { + return join( + archivePaths.conversationsDir, + getConversationMarkdownFilename(input), + ); +} + +export function getConversationMarkdownRelativePath(input: { + conversationFileKey: string; + title: string; +}) { + const relativePath = relative( + archivePaths.rootDir, + getConversationMarkdownAbsolutePath(input), + ); + + return `./${relativePath.replaceAll('\\', '/')}`; +} + +export async function exportConversationMarkdown( + db: any, + identity: ConversationIdentity, +) { + const conversation = await get<{ + title: string | null; + source_url: string | null; + }>(db, 'SELECT title, source_url FROM conversations WHERE id = ? LIMIT 1', [ + identity.conversationId, + ]); + const messages = await all( + db, + `SELECT id, seq, role, sender_label, content_md, content_text, message_key, created_at, updated_at + FROM messages + WHERE conversation_id = ? + ORDER BY seq ASC`, + [identity.conversationId], + ); + const title = conversation?.title || identity.title; + const sourceUrl = conversation?.source_url || identity.sourceUrl; + const markdown = toMarkdown(messages, title, sourceUrl); + + writeLatestMarkdown(markdown); + + const nextFilename = getConversationMarkdownFilename({ + conversationFileKey: identity.conversationFileKey, + title, + }); + + removeStaleConversationMarkdowns(identity.conversationFileKey, nextFilename); + writeConversationMarkdown(nextFilename, markdown); + + return { + markdown, + conversationMarkdownPath: getConversationMarkdownAbsolutePath({ + conversationFileKey: identity.conversationFileKey, + title, + }), + }; +} diff --git a/src/archive/messageUpsertService.ts b/src/archive/messageUpsertService.ts new file mode 100644 index 0000000000..7ae2347d28 --- /dev/null +++ b/src/archive/messageUpsertService.ts @@ -0,0 +1,196 @@ +import { get, run, stableHash } from './db'; +import { + replaceConversationDocuments, + syncMessageDocuments, +} from './search/ftsService'; +import type { ArchiveScanMessage } from './types'; + +function toSenderLabel(message: ArchiveScanMessage): string { + if (message.role === 'user') { + return 'You'; + } + + if (message.role === 'assistant') { + return 'Assistant'; + } + + return 'System'; +} + +function getMessageKey( + message: ArchiveScanMessage, + conversationId: string, +): string { + return ( + message.messageKey || + stableHash( + `message-key|${conversationId}|${message.seq}|${message.role}|${message.text}`, + ) + ); +} + +function getMessageId(messageKey: string, conversationId: string): string { + return stableHash(`message|${conversationId}|${messageKey}`); +} + +export async function replaceConversationMessagesForFullScan( + db: any, + conversationId: string, + messages: ArchiveScanMessage[], + now: string, +) { + await run(db, 'DELETE FROM messages WHERE conversation_id = ?', [ + conversationId, + ]); + + const messageIds = await Promise.all( + messages.map(async message => { + const messageKey = getMessageKey(message, conversationId); + const contentText = message.text; + const contentMd = message.markdown || contentText; + const messageId = getMessageId(messageKey, conversationId); + + await run( + db, + `INSERT INTO messages (id, conversation_id, role, sender_label, content_text, content_md, seq, message_key, created_at, updated_at, hash) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [ + messageId, + conversationId, + message.role, + toSenderLabel(message), + contentText, + contentMd, + message.seq, + messageKey, + now, + now, + stableHash(`${message.role}|${contentText}`), + ], + ); + + return messageId; + }), + ); + + await replaceConversationDocuments(db, conversationId); + + return messageIds; +} + +export async function upsertIncrementalMessages( + db: any, + conversationId: string, + messages: ArchiveScanMessage[], + now: string, +) { + const messageIds = await Promise.all( + messages.map(async message => { + const messageKey = getMessageKey(message, conversationId); + const messageId = getMessageId(messageKey, conversationId); + const existingMessage = await get<{ id: string }>( + db, + 'SELECT id FROM messages WHERE conversation_id = ? AND message_key = ? LIMIT 1', + [conversationId, messageKey], + ); + const contentText = message.text; + const contentMd = message.markdown || contentText; + + const operation = existingMessage?.id + ? run( + db, + `UPDATE messages + SET role = ?, sender_label = ?, content_text = ?, content_md = ?, seq = ?, updated_at = ?, hash = ? + WHERE id = ?`, + [ + message.role, + toSenderLabel(message), + contentText, + contentMd, + message.seq, + now, + stableHash(`${message.role}|${contentText}`), + existingMessage.id, + ], + ) + : run( + db, + `INSERT INTO messages (id, conversation_id, role, sender_label, content_text, content_md, seq, message_key, created_at, updated_at, hash) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [ + messageId, + conversationId, + message.role, + toSenderLabel(message), + contentText, + contentMd, + message.seq, + messageKey, + now, + now, + stableHash(`${message.role}|${contentText}`), + ], + ); + + await operation; + return existingMessage?.id || messageId; + }), + ); + + await syncMessageDocuments(db, messageIds); + + return messageIds; +} + +export async function updateLastAssistantMessage( + db: any, + conversationId: string, + message: ArchiveScanMessage, + now: string, +) { + const lastAssistantMessage = await get<{ id: string }>( + db, + `SELECT id + FROM messages + WHERE conversation_id = ? AND role = 'assistant' + ORDER BY seq DESC, COALESCE(updated_at, created_at) DESC + LIMIT 1`, + [conversationId], + ); + + if (!lastAssistantMessage?.id) { + const insertedIds = await upsertIncrementalMessages( + db, + conversationId, + [message], + now, + ); + return insertedIds[0] || null; + } + + const contentText = message.text; + const contentMd = message.markdown || contentText; + const messageKey = getMessageKey(message, conversationId); + + await run( + db, + `UPDATE messages + SET role = ?, sender_label = ?, content_text = ?, content_md = ?, seq = ?, message_key = ?, updated_at = ?, hash = ? + WHERE id = ?`, + [ + message.role, + toSenderLabel(message), + contentText, + contentMd, + message.seq, + messageKey, + now, + stableHash(`${message.role}|${contentText}`), + lastAssistantMessage.id, + ], + ); + + await syncMessageDocuments(db, [lastAssistantMessage.id]); + + return lastAssistantMessage.id; +} diff --git a/src/archive/schema.sql b/src/archive/schema.sql new file mode 100644 index 0000000000..b5b6c83e25 --- /dev/null +++ b/src/archive/schema.sql @@ -0,0 +1,41 @@ +CREATE TABLE IF NOT EXISTS accounts ( + id TEXT PRIMARY KEY, + vendor TEXT NOT NULL, + account_label TEXT NOT NULL, + partition_name TEXT NOT NULL, + created_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS conversations ( + id TEXT PRIMARY KEY, + account_id TEXT NOT NULL, + vendor_conversation_id TEXT, + title TEXT, + source_url TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS messages ( + id TEXT PRIMARY KEY, + conversation_id TEXT NOT NULL, + role TEXT NOT NULL, + sender_label TEXT, + content_text TEXT NOT NULL, + content_md TEXT NOT NULL, + seq INTEGER NOT NULL, + message_key TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + hash TEXT NOT NULL +); + +CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( + message_id UNINDEXED, + conversation_id UNINDEXED, + account_id UNINDEXED, + vendor UNINDEXED, + conversation_title, + content_text, + content_md +); diff --git a/src/archive/search/ftsService.ts b/src/archive/search/ftsService.ts new file mode 100644 index 0000000000..609a3bc0fd --- /dev/null +++ b/src/archive/search/ftsService.ts @@ -0,0 +1,166 @@ +import { + all, + ensureFtsSchema as ensureFtsSchemaInDb, + run, + stableHash, +} from '../db'; + +export type MessageFtsDocument = { + messageId: string; + conversationId: string; + accountId: string; + vendor: string; + conversationTitle: string | null; + contentText: string; + contentMd: string; +}; + +type MessageFtsRow = { + messageId: string; + conversationId: string; + accountId: string; + vendor: string; + conversationTitle: string | null; + contentText: string; + contentMd: string; +}; + +export async function ensureFtsSchema(db: any) { + await ensureFtsSchemaInDb(db); +} + +export async function deleteMessageDocument(db: any, messageId: string) { + await ensureFtsSchema(db); + await run(db, 'DELETE FROM messages_fts WHERE message_id = ?', [messageId]); +} + +export async function upsertMessageDocument( + db: any, + input: MessageFtsDocument, +) { + await ensureFtsSchema(db); + await deleteMessageDocument(db, input.messageId); + await run( + db, + `INSERT INTO messages_fts ( + message_id, + conversation_id, + account_id, + vendor, + conversation_title, + content_text, + content_md + ) VALUES (?, ?, ?, ?, ?, ?, ?)`, + [ + input.messageId, + input.conversationId, + input.accountId, + input.vendor, + input.conversationTitle || '', + input.contentText, + input.contentMd, + ], + ); +} + +export async function rebuildFtsIndex(db: any) { + await ensureFtsSchemaInDb(db); + await run(db, 'DELETE FROM messages_fts'); + await run( + db, + `INSERT INTO messages_fts ( + message_id, + conversation_id, + account_id, + vendor, + conversation_title, + content_text, + content_md + ) + SELECT + m.id, + m.conversation_id, + c.account_id, + a.vendor, + COALESCE(c.title, ''), + m.content_text, + m.content_md + FROM messages m + JOIN conversations c ON c.id = m.conversation_id + JOIN accounts a ON a.id = c.account_id`, + ); +} + +export async function replaceConversationDocuments( + db: any, + conversationId: string, +) { + await ensureFtsSchema(db); + await run(db, 'DELETE FROM messages_fts WHERE conversation_id = ?', [ + conversationId, + ]); + + const rows = await all( + db, + `SELECT + m.id AS messageId, + m.conversation_id AS conversationId, + c.account_id AS accountId, + a.vendor AS vendor, + c.title AS conversationTitle, + m.content_text AS contentText, + m.content_md AS contentMd + FROM messages m + JOIN conversations c ON c.id = m.conversation_id + JOIN accounts a ON a.id = c.account_id + WHERE m.conversation_id = ? + ORDER BY m.seq ASC`, + [conversationId], + ); + + await Promise.all(rows.map(row => upsertMessageDocument(db, row))); +} + +export async function syncMessageDocuments(db: any, messageIds: string[]) { + const uniqueMessageIds = [...new Set(messageIds.filter(Boolean))]; + + if (uniqueMessageIds.length === 0) { + return; + } + + await ensureFtsSchema(db); + + const placeholders = uniqueMessageIds.map(() => '?').join(', '); + const rows = await all( + db, + `SELECT + m.id AS messageId, + m.conversation_id AS conversationId, + c.account_id AS accountId, + a.vendor AS vendor, + c.title AS conversationTitle, + m.content_text AS contentText, + m.content_md AS contentMd + FROM messages m + JOIN conversations c ON c.id = m.conversation_id + JOIN accounts a ON a.id = c.account_id + WHERE m.id IN (${placeholders})`, + uniqueMessageIds, + ); + + await Promise.all( + uniqueMessageIds.map(messageId => deleteMessageDocument(db, messageId)), + ); + await Promise.all(rows.map(row => upsertMessageDocument(db, row))); +} + +export function createFallbackMessageKey(input: { + conversationId: string; + seq: number; + role: string; + text: string; +}) { + return stableHash( + `message-key|${input.conversationId}|${input.seq}|${input.role}|${input.text}`, + ); +} diff --git a/src/archive/search/searchService.ts b/src/archive/search/searchService.ts new file mode 100644 index 0000000000..d8c20718c9 --- /dev/null +++ b/src/archive/search/searchService.ts @@ -0,0 +1,129 @@ +import { all } from '../db'; +import { getConversationMarkdownRelativePath } from '../markdownExporter'; +import type { + ArchiveConversationIndexItem, + ArchiveSearchFilters, + ArchiveSearchMessageResult, +} from './searchTypes'; + +type ConversationTitleRow = { + conversationId: string; + conversationKey: string | null; + title: string | null; + vendor: string; + accountId: string; + accountLabel: string; + updatedAt: string; + messageCount: number; +}; + +export async function searchMessages( + db: any, + query: string, + filters: ArchiveSearchFilters = {}, +): Promise { + const normalizedQuery = query.trim(); + + if (!normalizedQuery) { + return []; + } + + const clauses = ['messages_fts MATCH ?']; + const params: (string | number)[] = [normalizedQuery]; + + if (filters.vendor) { + clauses.push('vendor = ?'); + params.push(filters.vendor); + } + + if (filters.accountId) { + clauses.push('account_id = ?'); + params.push(filters.accountId); + } + + if (filters.conversationId) { + clauses.push('conversation_id = ?'); + params.push(filters.conversationId); + } + + params.push(filters.limit || 20); + + return all( + db, + `SELECT + message_id AS messageId, + conversation_id AS conversationId, + account_id AS accountId, + vendor, + NULLIF(conversation_title, '') AS conversationTitle, + content_text AS contentText, + snippet(messages_fts, 5, '[', ']', ' … ', 12) AS snippet, + bm25(messages_fts) AS rank + FROM messages_fts + WHERE ${clauses.join(' AND ')} + ORDER BY rank + LIMIT ?`, + params, + ); +} + +export async function searchConversationsByTitle( + db: any, + query: string, + filters: ArchiveSearchFilters = {}, +): Promise { + const normalizedQuery = query.trim(); + + if (!normalizedQuery) { + return []; + } + + const clauses = ['c.title LIKE ?']; + const params: (string | number)[] = [`%${normalizedQuery}%`]; + + if (filters.vendor) { + clauses.push('a.vendor = ?'); + params.push(filters.vendor); + } + + if (filters.accountId) { + clauses.push('a.id = ?'); + params.push(filters.accountId); + } + + if (filters.conversationId) { + clauses.push('c.id = ?'); + params.push(filters.conversationId); + } + + params.push(filters.limit || 20); + + const rows = await all( + db, + `SELECT + c.id AS conversationId, + COALESCE(c.vendor_conversation_id, c.id) AS conversationKey, + c.title AS title, + a.vendor AS vendor, + a.id AS accountId, + a.account_label AS accountLabel, + c.updated_at AS updatedAt, + COUNT(m.id) AS messageCount + FROM conversations c + JOIN accounts a ON a.id = c.account_id + LEFT JOIN messages m ON m.conversation_id = c.id + WHERE ${clauses.join(' AND ')} + GROUP BY c.id, c.vendor_conversation_id, c.title, a.vendor, a.id, a.account_label, c.updated_at + ORDER BY c.updated_at DESC + LIMIT ?`, + params, + ); + + return rows.map(row => ({ + ...row, + markdownPath: getConversationMarkdownRelativePath({ + conversationFileKey: row.conversationKey || row.conversationId, + title: row.title || 'Conversation', + }), + })); +} diff --git a/src/archive/search/searchTypes.ts b/src/archive/search/searchTypes.ts new file mode 100644 index 0000000000..a1078657df --- /dev/null +++ b/src/archive/search/searchTypes.ts @@ -0,0 +1,29 @@ +export type ArchiveSearchFilters = { + vendor?: string; + accountId?: string; + conversationId?: string; + limit?: number; +}; + +export type ArchiveSearchMessageResult = { + messageId: string; + conversationId: string; + accountId: string; + vendor: string; + conversationTitle: string | null; + snippet: string; + contentText: string; + rank: number; +}; + +export type ArchiveConversationIndexItem = { + conversationId: string; + conversationKey: string | null; + title: string | null; + vendor: string; + accountId: string; + accountLabel: string; + updatedAt: string; + messageCount: number; + markdownPath: string | null; +}; diff --git a/src/archive/types.ts b/src/archive/types.ts new file mode 100644 index 0000000000..d345a9c74e --- /dev/null +++ b/src/archive/types.ts @@ -0,0 +1,69 @@ +import type Service from '../models/Service'; + +export type ArchiveMessageRole = 'user' | 'assistant' | 'system'; + +export type ArchiveScanMessage = { + seq: number; + role: ArchiveMessageRole; + text: string; + markdown?: string; + html?: string; + id?: string; + messageKey?: string; +}; + +export type ArchiveScanPayload = { + platform?: string; + title?: string; + model?: string; + currentUrl?: string; + messageCount?: number; + messages?: ArchiveScanMessage[]; +}; + +export type ArchiveIncrementalMode = + | 'bootstrap' + | 'append' + | 'update-tail' + | 'rescan'; + +export type ArchiveIncrementalPayload = { + vendor?: string; + conversationKey?: string; + sourceUrl?: string; + currentUrl?: string; + title?: string; + model?: string; + mode: ArchiveIncrementalMode; + messages?: ArchiveScanMessage[]; + updatedTail?: ArchiveScanMessage | null; + scannedAt?: string; +}; + +export type ArchiveConversationInput = + | ArchiveScanPayload + | ArchiveIncrementalPayload; + +export type ConversationIdentity = { + accountId: string; + accountLabel: string; + vendor: string; + sourceUrl: string | null; + vendorConversationId: string | null; + conversationId: string; + conversationFileKey: string; + title: string; + service: Service; +}; + +export type PersistedMessageRow = { + id: string; + seq: number; + role: string; + sender_label: string | null; + content_md: string; + content_text: string; + message_key: string | null; + created_at: string; + updated_at: string | null; +}; diff --git a/src/containers/settings/RecipesScreen.tsx b/src/containers/settings/RecipesScreen.tsx index 5540b602c0..91a5d054cb 100644 --- a/src/containers/settings/RecipesScreen.tsx +++ b/src/containers/settings/RecipesScreen.tsx @@ -1,4 +1,4 @@ -import { readJsonSync } from 'fs-extra'; +import { pathExistsSync, readJsonSync } from 'fs-extra'; import { type IReactionDisposer, autorun } from 'mobx'; import { inject, observer } from 'mobx-react'; import { Component, type ReactElement } from 'react'; @@ -10,7 +10,6 @@ import ErrorBoundary from '../../components/util/ErrorBoundary'; import withParams from '../../components/util/WithParams'; import { CUSTOM_WEBSITE_RECIPE_ID, FERDIUM_DEV_DOCS } from '../../config'; import { userDataRecipesPath } from '../../environment-remote'; -import { communityRecipesStore } from '../../features/communityRecipes'; import { asarRecipesPath } from '../../helpers/asar-helpers'; import { openPath } from '../../helpers/url-helpers'; import type Recipe from '../../models/Recipe'; @@ -35,7 +34,10 @@ class RecipesScreen extends Component { constructor(props: IProps) { super(props); - this.customRecipes = readJsonSync(asarRecipesPath('all.json')); + const allJsonPath = asarRecipesPath('all.json'); + this.customRecipes = pathExistsSync(allJsonPath) + ? readJsonSync(allJsonPath) + : []; this.state = { needle: null, currentFilter: 'featured', @@ -101,7 +103,14 @@ class RecipesScreen extends Component { // Create an array of RecipePreviews from an array of recipe objects createPreviews(recipes: Recipe[]) { - return recipes.map((recipe: any) => new RecipePreview(recipe)); + return recipes.map( + (recipe: any) => + new RecipePreview({ + ...recipe, + icon: recipe.icon || recipe.icons?.svg || '', + isDevRecipe: recipe.local, + }), + ); } resetSearch(): void { @@ -112,6 +121,8 @@ class RecipesScreen extends Component { const { recipePreviews, recipes, services } = this.props.stores!; const { app: appActions, service: serviceActions } = this.props.actions!; const filter = this.state.currentFilter; + const devRecipes = recipes.all.filter(recipe => recipe.local); + const devRecipePreviews = this.createPreviews(devRecipes); let recipeFilter; @@ -121,7 +132,7 @@ class RecipesScreen extends Component { ...this.createPreviews(this.customRecipes), ]); } else if (filter === 'dev') { - recipeFilter = communityRecipesStore.communityRecipes; + recipeFilter = this.prepareRecipes(devRecipePreviews); } else { recipeFilter = recipePreviews.featured; } @@ -132,11 +143,11 @@ class RecipesScreen extends Component { needle === null ? recipeFilter : this.prepareRecipes([ - // All search recipes from server - ...recipePreviews.searchResults, + // All search recipes from current source + ...(filter === 'dev' ? [] : recipePreviews.searchResults), // All search recipes from local recipes ...this.createPreviews( - this.customRecipes.filter( + [...this.customRecipes, ...devRecipes].filter( (recipe: Recipe) => recipe.name.toLowerCase().includes(needle.toLowerCase()) || (recipe.aliases || []).some(alias => diff --git a/src/i18n/locales/en-US.json b/src/i18n/locales/en-US.json index 3ef0884c53..1a108eff0d 100644 --- a/src/i18n/locales/en-US.json +++ b/src/i18n/locales/en-US.json @@ -181,6 +181,7 @@ "menu.view.reloadFerdium": "Reload Ferdium", "menu.view.reloadService": "Reload Service", "menu.view.reloadTodos": "Reload ToDos", + "menu.view.rescanCurrentConversation": "Rescan Current Conversation", "menu.view.resetZoom": "Actual Size", "menu.view.splitModeToggle": "Toggle Split Mode", "menu.view.toggleDarkMode": "Toggle Dark Mode", diff --git a/src/lib/Menu.ts b/src/lib/Menu.ts index 188cc676ef..cf7616a5fa 100644 --- a/src/lib/Menu.ts +++ b/src/lib/Menu.ts @@ -96,6 +96,10 @@ export const menuItems = defineMessages({ id: 'menu.edit.findInPage', defaultMessage: 'Find in Page', }, + rescanCurrentConversation: { + id: 'menu.view.rescanCurrentConversation', + defaultMessage: 'Rescan Current Conversation', + }, speech: { id: 'menu.edit.speech', defaultMessage: 'Speech', @@ -463,6 +467,22 @@ function titleBarTemplateFactory( }); }, }, + { + label: intl.formatMessage(menuItems.rescanCurrentConversation), + accelerator: `${cmdOrCtrlShortcutKey()}+${shiftKey()}+${altKey()}+S`, + click() { + const activeService = getActiveService(); + if (!activeService) { + return; + } + activeService.webview.focus(); + window['ferdium'].actions.service.sendIPCMessage({ + serviceId: activeService.id, + channel: 'aihub-rescan-conversation', + args: {}, + }); + }, + }, { type: 'separator', }, diff --git a/src/stores/AppStore.ts b/src/stores/AppStore.ts index 9f6ed13199..768466e4d3 100644 --- a/src/stores/AppStore.ts +++ b/src/stores/AppStore.ts @@ -9,7 +9,13 @@ import { } from '@electron/remote'; import AutoLaunch from 'auto-launch'; import { ipcRenderer } from 'electron'; -import { readJsonSync, readdirSync, writeJsonSync } from 'fs-extra'; +import { + ensureDirSync, + pathExistsSync, + readJsonSync, + readdirSync, + writeJsonSync, +} from 'fs-extra'; import { action, computed, makeObservable, observable } from 'mobx'; import moment from 'moment'; import ms from 'ms'; @@ -373,6 +379,10 @@ export default class AppStore extends TypedStore { _initializeSandboxes() { this._readSandboxes(); + if (!pathExistsSync(userDataPath('Partitions'))) { + return; + } + // Check partitions of the sandboxes that no longer exist const dir = readdirSync(userDataPath('Partitions')); dir @@ -410,9 +420,14 @@ export default class AppStore extends TypedStore { } _readSandboxes() { - this.sandboxServices = readJsonSync( - userDataPath('config', 'sandboxes.json'), - ); + const sandboxesPath = userDataPath('config', 'sandboxes.json'); + + if (!pathExistsSync(sandboxesPath)) { + this.sandboxServices = []; + return; + } + + this.sandboxServices = readJsonSync(sandboxesPath); } _writeSandboxes() { @@ -424,10 +439,10 @@ export default class AppStore extends TypedStore { ), })); - writeJsonSync( - userDataPath('config', 'sandboxes.json'), - this.sandboxServices, - ); + const sandboxesPath = userDataPath('config', 'sandboxes.json'); + ensureDirSync(userDataPath('config')); + + writeJsonSync(sandboxesPath, this.sandboxServices); } @computed get cacheSize() { diff --git a/src/stores/RecipesStore.ts b/src/stores/RecipesStore.ts index 3cd81807ae..5e4713be7c 100644 --- a/src/stores/RecipesStore.ts +++ b/src/stores/RecipesStore.ts @@ -1,4 +1,4 @@ -import { readJSONSync } from 'fs-extra'; +import { pathExistsSync, readJSONSync } from 'fs-extra'; import { action, computed, makeObservable, observable } from 'mobx'; import semver from 'semver'; @@ -103,7 +103,9 @@ export default class RecipesStore extends TypedStore { // Check for local updates const allJsonFile = asarRecipesPath('all.json'); - const allJson = readJSONSync(allJsonFile); + const allJson = pathExistsSync(allJsonFile) + ? readJSONSync(allJsonFile) + : []; const localUpdates: string[] = []; for (const recipe of Object.keys(recipes)) { diff --git a/src/stores/ServicesStore.ts b/src/stores/ServicesStore.ts index 58c6b2a87c..15c40966ca 100644 --- a/src/stores/ServicesStore.ts +++ b/src/stores/ServicesStore.ts @@ -21,6 +21,8 @@ import { SPELLCHECKER_LOCALES } from '../i18n/languages'; import { cleanseJSObject } from '../jsUtils'; import type { UnreadServices } from '../lib/dbus/Ferdium'; import type Service from '../models/Service'; +import { handleAiHubIncrementalMessages } from '../webview/handlers/handleAiHubIncrementalMessages'; +import { handleAiHubScanMessages } from '../webview/handlers/handleAiHubScanMessages'; import CachedRequest from './lib/CachedRequest'; import Request from './lib/Request'; import TypedStore from './lib/TypedStore'; @@ -840,6 +842,32 @@ export default class ServicesStore extends TypedStore { break; } + case 'aihub-scan-messages': { + const payload = args[0] || {}; + + handleAiHubScanMessages({ + service, + serviceId, + payload, + debug, + }); + + break; + } + + case 'aihub-incremental-messages': { + const payload = args[0] || {}; + + handleAiHubIncrementalMessages({ + service, + serviceId, + payload, + debug, + }); + + break; + } + case 'load-available-displays': { debug('Received request for capture devices from', serviceId); ipcRenderer.send('load-available-displays', { diff --git a/src/webview/handlers/handleAiHubIncrementalMessages.ts b/src/webview/handlers/handleAiHubIncrementalMessages.ts new file mode 100644 index 0000000000..9c2b9c3d0c --- /dev/null +++ b/src/webview/handlers/handleAiHubIncrementalMessages.ts @@ -0,0 +1,39 @@ +import { archiveIncrementalMessages } from '../../archive/archiveService'; +import type { ArchiveIncrementalPayload } from '../../archive/types'; +import type Service from '../../models/Service'; + +export function handleAiHubIncrementalMessages({ + service, + serviceId, + payload, + debug, +}: { + service: Service; + serviceId: string; + payload: ArchiveIncrementalPayload; + debug: (...args: any[]) => void; +}) { + debug('[AI-HUB] received incremental conversation payload', { + serviceId, + serviceName: service?.name, + recipeId: service?.recipe?.id, + mode: payload.mode, + title: payload.title, + currentUrl: payload.currentUrl, + sourceUrl: payload.sourceUrl, + conversationKey: payload.conversationKey, + messageCount: payload.messages?.length || 0, + updatedTail: payload.updatedTail, + }); + + archiveIncrementalMessages({ service, payload }) + .then(result => { + debug('[AI-HUB] archived incremental conversation payload', result); + }) + .catch(error => { + debug('[AI-HUB] failed to archive incremental conversation payload', { + serviceId, + error, + }); + }); +} diff --git a/src/webview/handlers/handleAiHubScanMessages.ts b/src/webview/handlers/handleAiHubScanMessages.ts new file mode 100644 index 0000000000..e43a9229a0 --- /dev/null +++ b/src/webview/handlers/handleAiHubScanMessages.ts @@ -0,0 +1,37 @@ +import { archiveConversationScan } from '../../archive/archiveService'; +import type { ArchiveScanPayload } from '../../archive/types'; +import type Service from '../../models/Service'; + +export function handleAiHubScanMessages({ + service, + serviceId, + payload, + debug, +}: { + service: Service; + serviceId: string; + payload: ArchiveScanPayload; + debug: (...args: any[]) => void; +}) { + debug('[AI-HUB] received conversation scan', { + serviceId, + serviceName: service?.name, + recipeId: service?.recipe?.id, + title: payload.title, + model: payload.model, + currentUrl: payload.currentUrl, + messageCount: payload.messageCount, + messages: payload.messages, + }); + + archiveConversationScan({ service, payload }) + .then(result => { + debug('[AI-HUB] archived conversation scan', result); + }) + .catch(error => { + debug('[AI-HUB] failed to archive conversation scan', { + serviceId, + error, + }); + }); +} diff --git a/src/webview/lib/RecipeWebview.ts b/src/webview/lib/RecipeWebview.ts index 2e967a7914..ff4a86cba0 100644 --- a/src/webview/lib/RecipeWebview.ts +++ b/src/webview/lib/RecipeWebview.ts @@ -42,6 +42,8 @@ class RecipeWebview { toggleToTalkFunc = () => null; + rescanConversationFunc = () => null; + darkModeHandler: ((darkMode: boolean, config: any) => void) | null = null; // TODO Remove this once we implement a proper wrapper. @@ -204,6 +206,16 @@ class RecipeWebview { toggleToTalk(fn) { this.toggleToTalkFunc = fn; } + + setRescanConversationHandler(fn) { + if (typeof fn === 'function') { + this.rescanConversationFunc = fn; + } + } + + rescanConversation() { + this.rescanConversationFunc(); + } } export default RecipeWebview; diff --git a/src/webview/recipe.ts b/src/webview/recipe.ts index ad2215ffdf..c3cd099f6f 100644 --- a/src/webview/recipe.ts +++ b/src/webview/recipe.ts @@ -53,6 +53,10 @@ window.chrome.runtime.sendMessage = noop; const debug = require('../preload-safe-debug')('Ferdium:Plugin'); +const aiHubBridge = { + rescanConversation: noop, +}; + const badgeHandler = new BadgeHandler(); const dialogTitleHandler = new DialogTitleHandler(); @@ -129,6 +133,7 @@ contextBridge.exposeInMainWorld('ferdium', { ); }, getDisplayMediaSelector, + aiHubRescanConversation: () => aiHubBridge.rescanConversation(), }); ipcRenderer.sendToHost( @@ -161,6 +166,7 @@ class RecipeController { 'get-service-id': 'serviceIdEcho', 'find-in-page': 'openFindInPage', 'toggle-to-talk': 'toggleToTalk', + 'aihub-rescan-conversation': 'rescanConversation', }; universalDarkModeInjected = false; @@ -245,6 +251,7 @@ class RecipeController { notificationsHandler, sessionHandler, ); + aiHubBridge.rescanConversation = () => this.recipe?.rescanConversation(); if (existsSync(modulePath)) { require(modulePath)(this.recipe, { ...config, recipe }); debug('Initialize Recipe', config, recipe); @@ -297,6 +304,10 @@ class RecipeController { this.findInPage?.openFindWindow(); } + rescanConversation() { + this.recipe?.rescanConversation(); + } + update() { debug('enableSpellchecking', this.settings.app.enableSpellchecking); debug('isDarkModeEnabled', this.settings.service.isDarkModeEnabled);