diff --git a/.env.template b/.env.template index feaa012..889d1e3 100644 --- a/.env.template +++ b/.env.template @@ -8,6 +8,11 @@ LOG_LEVEL="info" # The log level. Options: 'trace', 'deb OPEN_OBSERVE_USER="user@bookhive.buzz" # The email of the user that will be used to observe the open books OPEN_OBSERVE_PASSWORD="password" # The password of the user that will be used to observe the open books OPEN_OBSERVE_URL="http://localhost:5080" # The password of the user that will be used to observe the open books +# Export (optional) +# If set, enables GET /admin/export (Authorization: Bearer ) +EXPORT_SHARED_SECRET="" +# Optional directory for temporary export files (defaults to dirname(DB_PATH)) +DB_EXPORT_DIR="" # Secrets # Must set this in production. May be generated with `openssl rand -base64 33` # COOKIE_SECRET="" diff --git a/.github/workflows/database-export.yml b/.github/workflows/database-export.yml new file mode 100644 index 0000000..30e0c58 --- /dev/null +++ b/.github/workflows/database-export.yml @@ -0,0 +1,43 @@ +name: Publish database export artifact + +on: + schedule: + # Weekly (Sunday 02:15 UTC) + - cron: "15 2 * * 0" + workflow_dispatch: {} + +permissions: + contents: read + actions: write + +concurrency: + group: database-export + cancel-in-progress: false + +jobs: + export: + runs-on: ubuntu-latest + steps: + - name: Set export filename date + run: echo "EXPORT_DATE=$(date -u +%Y-%m-%d)" >> "$GITHUB_ENV" + + - name: Download export from BookHive instance + env: + EXPORT_URL: ${{ secrets.BOOKHIVE_EXPORT_URL }} + EXPORT_SECRET: ${{ secrets.BOOKHIVE_EXPORT_SHARED_SECRET }} + run: | + test -n "$EXPORT_URL" + test -n "$EXPORT_SECRET" + curl -fL --retry 3 --retry-delay 5 \ + -H "Authorization: Bearer $EXPORT_SECRET" \ + "$EXPORT_URL" \ + -o "bookhive-export.tgz" + ls -lh "bookhive-export.tgz" + + - name: Upload export as GitHub Actions artifact + uses: actions/upload-artifact@v4 + with: + name: bookhive-export-${{ env.EXPORT_DATE }} + path: bookhive-export.tgz + retention-days: 30 + diff --git a/README.md b/README.md index b932e1d..a5957c9 100644 --- a/README.md +++ b/README.md @@ -79,3 +79,16 @@ pnpm test:ui - **Backend**: [Hono](https://hono.dev) with AT Proto for OAuth - **Frontend**: Mostly static HTML, with some Hono JSX for dynamic content (Fast as possible) - **Database**: SQLite, with Kyesly as the ORM + +## 🗄️ Weekly database export (GitHub Actions artifact) + +This repo includes a workflow that can fetch a **sanitized SQLite export** from your running BookHive instance and upload it as a GitHub Actions artifact (weekly cron + manual trigger). + +- **Server endpoint**: `GET /admin/export` + - Requires `EXPORT_SHARED_SECRET` to be set + - Request header: `Authorization: Bearer ` + - Returns a `.tgz` containing `db.sqlite`, `kv.sqlite` (with auth tables excluded), and `manifest.json` +- **Workflow**: `.github/workflows/database-export.yml` + - Configure GitHub repo secrets: + - `BOOKHIVE_EXPORT_URL` (e.g. `https://bookhive.example.com/admin/export`) + - `BOOKHIVE_EXPORT_SHARED_SECRET` diff --git a/eslint.config.js b/eslint.config.js new file mode 100644 index 0000000..05cb6e9 --- /dev/null +++ b/eslint.config.js @@ -0,0 +1,28 @@ +import js from "@eslint/js"; +import tseslintPlugin from "@typescript-eslint/eslint-plugin"; +import tsParser from "@typescript-eslint/parser"; + +export default [ + { + ignores: ["dist/**", "src/bsky/lexicon/**", ".eslintrc.cjs"], + }, + js.configs.recommended, + { + files: ["**/*.ts", "**/*.tsx"], + languageOptions: { + parser: tsParser, + parserOptions: { + ecmaVersion: "latest", + sourceType: "module", + ecmaFeatures: { jsx: true }, + }, + }, + plugins: { + "@typescript-eslint": tseslintPlugin, + }, + rules: { + ...tseslintPlugin.configs.recommended.rules, + }, + }, +]; + diff --git a/src/env.ts b/src/env.ts index d01207f..31a420a 100644 --- a/src/env.ts +++ b/src/env.ts @@ -15,6 +15,16 @@ export const env = cleanEnv(process.env, { devDefault: ":memory:", desc: "Path to the KV SQLite database", }), + EXPORT_SHARED_SECRET: str({ + default: "", + desc: + "Shared secret for triggering DB exports via /admin/export (Bearer token). Leave empty to disable.", + }), + DB_EXPORT_DIR: str({ + default: "", + desc: + "Directory to write temporary export artifacts. Defaults to the directory containing DB_PATH.", + }), LOG_LEVEL: str({ default: "info", desc: "Log level for the app" }), COOKIE_SECRET: str({ devDefault: "00000000000000000000000000000000" }), OPEN_OBSERVE_URL: str({ devDefault: "" }), diff --git a/src/index.ts b/src/index.ts index dac6f1b..32189e3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -41,6 +41,12 @@ import { createRouter, searchBooks } from "./routes.tsx"; import sqliteKv from "./sqlite-kv.ts"; import type { HiveId } from "./types.ts"; import { createBatchTransform } from "./utils/batchTransform.ts"; +import { + cleanupExportPaths, + createExportReadStream, + createSanitizedExportArchive, + isAuthorizedExportRequest, +} from "./utils/dbExport.ts"; import { getGoodreadsCsvParser, getStorygraphCsvParser, @@ -55,6 +61,8 @@ import { import { lazy } from "./utils/lazy.ts"; import { readThroughCache } from "./utils/readThroughCache.ts"; +import fs from "node:fs"; +import path from "node:path"; // Application state passed to the router and elsewhere export type AppContext = { @@ -255,6 +263,133 @@ export class Server { app.use("*", registerMetrics); app.get("/metrics", printMetrics); + // Download a sanitized SQLite export bundle (db + kv without auth tables) + app.get("/admin/export", async (c) => { + const ctx = c.get("ctx"); + const clientIp = + c.req.header("x-forwarded-for")?.split(",")[0].trim() || + c.req.header("x-real-ip") || + "unknown"; + + try { + // Hide endpoint if not configured + if (!env.EXPORT_SHARED_SECRET) { + ctx.logger.warn( + { ip: clientIp, reason: "endpoint_not_configured" }, + "export endpoint access attempt - endpoint disabled", + ); + return c.json({ message: "Not Found" }, 404); + } + + // Check authorization + const authorization = c.req.header("authorization"); + if ( + !isAuthorizedExportRequest({ + authorizationHeader: authorization, + sharedSecret: env.EXPORT_SHARED_SECRET, + }) + ) { + ctx.logger.warn( + { ip: clientIp, reason: "invalid_authorization" }, + "export endpoint unauthorized access attempt", + ); + return c.json({ message: "Not Found" }, 404); + } + + // Validate database path + if (!env.DB_PATH || env.DB_PATH === ":memory:") { + ctx.logger.error( + { ip: clientIp, dbPath: env.DB_PATH }, + "export endpoint called but DB_PATH is not a file path", + ); + return c.json( + { message: "DB exports require DB_PATH to be a file path" }, + 400, + ); + } + + const exportDir = + env.DB_EXPORT_DIR?.trim() || + path.join(path.dirname(env.DB_PATH), "exports"); + + ctx.logger.info( + { ip: clientIp, exportDir }, + "starting database export", + ); + + const startTime = Date.now(); + let result; + + try { + await fs.promises.mkdir(exportDir, { recursive: true }); + + const includeKv = + Boolean(env.KV_DB_PATH) && + env.KV_DB_PATH !== ":memory:" && + fs.existsSync(env.KV_DB_PATH); + + result = await createSanitizedExportArchive({ + dbPath: env.DB_PATH, + kvPath: includeKv ? env.KV_DB_PATH : undefined, + exportDir, + includeKv, + }); + } catch (err) { + const duration = Date.now() - startTime; + ctx.logger.error( + { + ip: clientIp, + duration, + error: err instanceof Error ? err.message : String(err), + stack: err instanceof Error ? err.stack : undefined, + }, + "database export failed", + ); + return c.json({ message: "Failed to create export archive" }, 500); + } + + const duration = Date.now() - startTime; + const stream = createExportReadStream(result.archivePath, { + onClose: () => { + ctx.logger.info( + { ip: clientIp, filename: result.filename, duration }, + "database export completed successfully", + ); + cleanupExportPaths({ + archivePath: result.archivePath, + tmpDir: result.tmpDir, + }); + }, + onError: (err) => { + ctx.logger.error( + { ip: clientIp, filename: result.filename, error: err.message }, + "error streaming export file", + ); + cleanupExportPaths({ + archivePath: result.archivePath, + tmpDir: result.tmpDir, + }); + }, + }); + + return c.body(stream, 200, { + "Content-Type": "application/gzip", + "Content-Encoding": "gzip", + "Content-Disposition": `attachment; filename="${result.filename}"`, + "Cache-Control": "no-store", + }); + } catch (err) { + ctx.logger.error( + { + ip: clientIp, + error: err instanceof Error ? err.message : String(err), + }, + "unexpected error in export endpoint", + ); + return c.json({ message: "Internal server error" }, 500); + } + }); + // This is to import a Goodreads CSV export // It is here because we don't want it behind the etag middleware app.post( diff --git a/src/utils/dbExport.ts b/src/utils/dbExport.ts new file mode 100644 index 0000000..f17f9bc --- /dev/null +++ b/src/utils/dbExport.ts @@ -0,0 +1,375 @@ +import { spawn } from "node:child_process"; +import crypto from "node:crypto"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import path from "node:path"; +import { Readable } from "node:stream"; + +import Database from "better-sqlite3"; + +type ExportResult = { archivePath: string; filename: string; tmpDir: string }; + +type ExportManifest = { + createdAt: string; + version: string; + files: Array<{ name: string; md5: string; size: number }>; + excludedKvTables: string[]; + schema?: { tables: string[]; views: string[] }; +}; + +function toError(err: unknown): Error { + return err instanceof Error ? err : new Error(String(err)); +} + +function timingSafeEqualString(a: string, b: string): boolean { + const aBuf = Buffer.from(a); + const bBuf = Buffer.from(b); + if (aBuf.length !== bBuf.length) { + crypto.timingSafeEqual(aBuf, aBuf); + return false; + } + return crypto.timingSafeEqual(aBuf, bBuf); +} + +export function isAuthorizedExportRequest(opts: { + authorizationHeader?: string; + sharedSecret: string; +}) { + const { authorizationHeader, sharedSecret } = opts; + if (!sharedSecret) return false; + if (!authorizationHeader) return false; + const match = authorizationHeader.match(/^Bearer\s+(.+)$/i); + if (!match) return false; + return timingSafeEqualString(match[1], sharedSecret); +} + +async function sqliteBackup({ + sourcePath, + destPath, +}: { + sourcePath: string; + destPath: string; +}) { + const db = new Database(sourcePath, { fileMustExist: true }); + try { + db.pragma("busy_timeout = 5000"); + await db.backup(destPath); + } finally { + db.close(); + } +} + +function shouldExcludeTable(name: string): boolean { + return ( + name === "auth_sessions" || + name === "auth_state" || + name.startsWith("auth_") + ); +} + +async function createSanitizedKvCopy({ + sourcePath, + destPath, +}: { + sourcePath: string; + destPath: string; +}) { + const src = new Database(sourcePath, { + fileMustExist: true, + readonly: true, + }); + const dest = new Database(destPath, { fileMustExist: false }); + + try { + src.pragma("busy_timeout = 10000"); + dest.pragma("busy_timeout = 5000"); + + const objects = src + .prepare( + ` + SELECT type, name, tbl_name as tblName, sql + FROM sqlite_master + WHERE sql IS NOT NULL + ORDER BY + CASE type WHEN 'table' THEN 0 WHEN 'index' THEN 1 WHEN 'trigger' THEN 2 WHEN 'view' THEN 3 ELSE 4 END, + name + `, + ) + .all() as Array<{ + type: string; + name: string; + tblName: string; + sql: string; + }>; + + const tablesToCopy: Array<{ + name: string; + colList: string; + colNames: string[]; + quotedTable: string; + }> = []; + const tableSql: string[] = []; + const otherSql: string[] = []; + + for (const obj of objects) { + const name = obj.name; + const tbl = obj.tblName; + if (obj.type === "table") { + if (name.startsWith("sqlite_")) continue; + if (shouldExcludeTable(name)) continue; + tableSql.push(obj.sql); + const quotedTable = `"${name.replace(/"/g, '""')}"`; + const cols = src + .prepare(`PRAGMA table_info(${quotedTable})`) + .all() as Array<{ name: string }>; + if (cols.length === 0) { + throw new Error(`Failed to retrieve column info for table: ${name}`); + } + tablesToCopy.push({ + name, + colList: cols.map((c) => `"${c.name}"`).join(", "), + colNames: cols.map((c) => c.name), + quotedTable, + }); + continue; + } + if (tbl && shouldExcludeTable(tbl)) continue; + otherSql.push(obj.sql); + } + + dest.exec("BEGIN IMMEDIATE"); + try { + for (const sql of tableSql) { + dest.exec(sql); + } + + // Copy data by reading from src and inserting into dest (no ATTACH — + // the app may have the source DB open, which would lock it) + for (const { name, colList, colNames, quotedTable } of tablesToCopy) { + const rows = src + .prepare(`SELECT ${colList} FROM ${quotedTable}`) + .all() as Record[]; + if (rows.length === 0) continue; + const placeholders = colNames.map(() => "?").join(", "); + const safeName = `"${name.replace(/"/g, '""')}"`; + const insert = dest.prepare( + `INSERT INTO main.${safeName} (${colList}) VALUES (${placeholders})`, + ); + for (const row of rows) { + insert.run(...colNames.map((col) => row[col])); + } + } + + for (const sql of otherSql) { + dest.exec(sql); + } + + dest.exec("COMMIT"); + } catch (e) { + dest.exec("ROLLBACK"); + throw new Error( + `Failed to create sanitized KV copy: ${toError(e).message}`, + ); + } + + dest.exec("VACUUM"); + } finally { + src.close(); + dest.close(); + } +} + +function createTgz( + cwd: string, + outputFile: string, + files: string[], +): Promise { + return new Promise((resolve, reject) => { + const proc = spawn("tar", ["-czf", outputFile, "-C", cwd, ...files], { + stdio: ["ignore", "ignore", "pipe"], + }); + const stderr: string[] = []; + proc.stderr.on("data", (d) => stderr.push(d.toString())); + proc.on("error", reject); + proc.on("close", (code) => + code === 0 + ? resolve() + : reject( + new Error( + `tar exited ${code}: ${stderr.join("").trim() || "no stderr"}`, + ), + ), + ); + }); +} + +function computeFileMd5(filePath: string): string { + const content = fs.readFileSync(filePath); + return crypto.createHash("md5").update(content).digest("hex"); +} + +async function getFileStats( + filePath: string, +): Promise<{ md5: string; size: number }> { + const { size } = await fsp.stat(filePath); + return { md5: computeFileMd5(filePath), size }; +} + +export async function createSanitizedExportArchive(opts: { + dbPath: string; + kvPath?: string; + exportDir: string; + includeKv: boolean; +}): Promise { + const { dbPath, kvPath, exportDir, includeKv } = opts; + + const now = new Date(); + const stamp = now.toISOString().replace(/[:.]/g, "-"); + const runId = crypto.randomUUID(); + const tmpDir = path.join(exportDir, `bookhive-export-${stamp}-${runId}`); + + try { + await fsp.mkdir(tmpDir, { recursive: true }); + } catch (err) { + throw new Error( + `Failed to create export temporary directory at ${tmpDir}: ${toError(err).message}`, + ); + } + + const filename = `bookhive-export-${stamp}-${runId.slice(0, 8)}.tgz`; + const archivePath = path.join(exportDir, filename); + + try { + // Backup main database + const dbOut = path.join(tmpDir, "db.sqlite"); + try { + await sqliteBackup({ sourcePath: dbPath, destPath: dbOut }); + } catch (err) { + throw new Error( + `Failed to backup main database from ${dbPath}: ${toError(err).message}`, + ); + } + + const includedFiles: ExportManifest["files"] = []; + const tables: string[] = []; + const views: string[] = []; + + // Add db.sqlite file info + const dbStats = await getFileStats(dbOut); + includedFiles.push({ + name: "db.sqlite", + md5: dbStats.md5, + size: dbStats.size, + }); + + // Extract schema info from main database + try { + const db = new Database(dbOut, { fileMustExist: true, readonly: true }); + try { + const schemaObjects = db + .prepare( + `SELECT type, name FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY type, name`, + ) + .all() as Array<{ type: string; name: string }>; + + for (const obj of schemaObjects) { + (obj.type === "table" ? tables : views).push(obj.name); + } + } finally { + db.close(); + } + } catch (err) { + throw new Error( + `Failed to extract schema information from main database: ${toError(err).message}`, + ); + } + + // Handle KV database + if (includeKv && kvPath) { + const kvOut = path.join(tmpDir, "kv.sqlite"); + try { + await createSanitizedKvCopy({ sourcePath: kvPath, destPath: kvOut }); + } catch (err) { + throw new Error( + `Failed to create sanitized KV copy from ${kvPath}: ${toError(err).message}`, + ); + } + + const kvStats = await getFileStats(kvOut); + includedFiles.push({ + name: "kv.sqlite", + md5: kvStats.md5, + size: kvStats.size, + }); + } + + // Create and add manifest + const manifest: ExportManifest = { + createdAt: now.toISOString(), + version: "1.0", + files: includedFiles, + excludedKvTables: ["auth_sessions", "auth_state"], + schema: { + tables, + views, + }, + }; + + const manifestPath = path.join(tmpDir, "manifest.json"); + try { + await fsp.writeFile( + manifestPath, + JSON.stringify(manifest, null, 2) + "\n", + "utf8", + ); + } catch (err) { + throw new Error(`Failed to write manifest file: ${toError(err).message}`); + } + + const manifestStats = await getFileStats(manifestPath); + includedFiles.push({ + name: "manifest.json", + md5: manifestStats.md5, + size: manifestStats.size, + }); + + // Create archive + try { + await createTgz( + tmpDir, + archivePath, + includedFiles.map((f) => f.name), + ); + } catch (err) { + throw new Error(`Failed to create tar archive: ${toError(err).message}`); + } + + return { archivePath, filename, tmpDir }; + } catch (err) { + // Clean up on error + await cleanupExportPaths({ archivePath, tmpDir }); + throw err; + } +} + +export async function cleanupExportPaths(paths: { + archivePath?: string; + tmpDir?: string; +}): Promise { + const promises: Promise[] = []; + if (paths.archivePath) + promises.push(fsp.rm(paths.archivePath, { force: true })); + if (paths.tmpDir) + promises.push(fsp.rm(paths.tmpDir, { recursive: true, force: true })); + await Promise.allSettled(promises); +} + +export function createExportReadStream( + filePath: string, + callbacks?: { onClose?: () => void; onError?: (err: Error) => void }, +): ReadableStream { + const stream = fs.createReadStream(filePath, { highWaterMark: 64 * 1024 }); + stream.on("close", () => callbacks?.onClose?.()); + stream.on("error", (err) => callbacks?.onError?.(err)); + return Readable.toWeb(stream) as ReadableStream; +}