Skip to content

Commit d83276e

Browse files
feat: export full database weekly
Co-authored-by: computers <computers@nickthesick.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com>
1 parent 7d55de8 commit d83276e

File tree

7 files changed

+609
-0
lines changed

7 files changed

+609
-0
lines changed

.env.template

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ LOG_LEVEL="info" # The log level. Options: 'trace', 'deb
88
OPEN_OBSERVE_USER="user@bookhive.buzz" # The email of the user that will be used to observe the open books
99
OPEN_OBSERVE_PASSWORD="password" # The password of the user that will be used to observe the open books
1010
OPEN_OBSERVE_URL="http://localhost:5080" # The password of the user that will be used to observe the open books
11+
# Export (optional)
12+
# If set, enables GET /admin/export (Authorization: Bearer <secret>)
13+
EXPORT_SHARED_SECRET=""
14+
# Optional directory for temporary export files (defaults to dirname(DB_PATH))
15+
DB_EXPORT_DIR=""
1116
# Secrets
1217
# Must set this in production. May be generated with `openssl rand -base64 33`
1318
# COOKIE_SECRET=""
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Publish database export artifact
2+
3+
on:
4+
schedule:
5+
# Weekly (Sunday 02:15 UTC)
6+
- cron: "15 2 * * 0"
7+
workflow_dispatch: {}
8+
9+
permissions:
10+
contents: read
11+
actions: write
12+
13+
concurrency:
14+
group: database-export
15+
cancel-in-progress: false
16+
17+
jobs:
18+
export:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Set export filename date
22+
run: echo "EXPORT_DATE=$(date -u +%Y-%m-%d)" >> "$GITHUB_ENV"
23+
24+
- name: Download export from BookHive instance
25+
env:
26+
EXPORT_URL: ${{ secrets.BOOKHIVE_EXPORT_URL }}
27+
EXPORT_SECRET: ${{ secrets.BOOKHIVE_EXPORT_SHARED_SECRET }}
28+
run: |
29+
test -n "$EXPORT_URL"
30+
test -n "$EXPORT_SECRET"
31+
curl -fL --retry 3 --retry-delay 5 \
32+
-H "Authorization: Bearer $EXPORT_SECRET" \
33+
"$EXPORT_URL" \
34+
-o "bookhive-export.tgz"
35+
ls -lh "bookhive-export.tgz"
36+
37+
- name: Upload export as GitHub Actions artifact
38+
uses: actions/upload-artifact@v4
39+
with:
40+
name: bookhive-export-${{ env.EXPORT_DATE }}
41+
path: bookhive-export.tgz
42+
retention-days: 30
43+

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,16 @@ pnpm test:ui
7979
- **Backend**: [Hono](https://hono.dev) with AT Proto for OAuth
8080
- **Frontend**: Mostly static HTML, with some Hono JSX for dynamic content (Fast as possible)
8181
- **Database**: SQLite, with Kyesly as the ORM
82+
83+
## 🗄️ Weekly database export (GitHub Actions artifact)
84+
85+
This repo includes a workflow that can fetch a **sanitized SQLite export** from your running BookHive instance and upload it as a GitHub Actions artifact (weekly cron + manual trigger).
86+
87+
- **Server endpoint**: `GET /admin/export`
88+
- Requires `EXPORT_SHARED_SECRET` to be set
89+
- Request header: `Authorization: Bearer <EXPORT_SHARED_SECRET>`
90+
- Returns a `.tgz` containing `db.sqlite`, `kv.sqlite` (with auth tables excluded), and `manifest.json`
91+
- **Workflow**: `.github/workflows/database-export.yml`
92+
- Configure GitHub repo secrets:
93+
- `BOOKHIVE_EXPORT_URL` (e.g. `https://bookhive.example.com/admin/export`)
94+
- `BOOKHIVE_EXPORT_SHARED_SECRET`

eslint.config.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import js from "@eslint/js";
2+
import tseslintPlugin from "@typescript-eslint/eslint-plugin";
3+
import tsParser from "@typescript-eslint/parser";
4+
5+
export default [
6+
{
7+
ignores: ["dist/**", "src/bsky/lexicon/**", ".eslintrc.cjs"],
8+
},
9+
js.configs.recommended,
10+
{
11+
files: ["**/*.ts", "**/*.tsx"],
12+
languageOptions: {
13+
parser: tsParser,
14+
parserOptions: {
15+
ecmaVersion: "latest",
16+
sourceType: "module",
17+
ecmaFeatures: { jsx: true },
18+
},
19+
},
20+
plugins: {
21+
"@typescript-eslint": tseslintPlugin,
22+
},
23+
rules: {
24+
...tseslintPlugin.configs.recommended.rules,
25+
},
26+
},
27+
];
28+

src/env.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ export const env = cleanEnv(process.env, {
1515
devDefault: ":memory:",
1616
desc: "Path to the KV SQLite database",
1717
}),
18+
EXPORT_SHARED_SECRET: str({
19+
default: "",
20+
desc:
21+
"Shared secret for triggering DB exports via /admin/export (Bearer token). Leave empty to disable.",
22+
}),
23+
DB_EXPORT_DIR: str({
24+
default: "",
25+
desc:
26+
"Directory to write temporary export artifacts. Defaults to the directory containing DB_PATH.",
27+
}),
1828
LOG_LEVEL: str({ default: "info", desc: "Log level for the app" }),
1929
COOKIE_SECRET: str({ devDefault: "00000000000000000000000000000000" }),
2030
OPEN_OBSERVE_URL: str({ devDefault: "" }),

src/index.ts

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ import { createRouter, searchBooks } from "./routes.tsx";
4141
import sqliteKv from "./sqlite-kv.ts";
4242
import type { HiveId } from "./types.ts";
4343
import { createBatchTransform } from "./utils/batchTransform.ts";
44+
import {
45+
cleanupExportPaths,
46+
createExportReadStream,
47+
createSanitizedExportArchive,
48+
isAuthorizedExportRequest,
49+
} from "./utils/dbExport.ts";
4450
import {
4551
getGoodreadsCsvParser,
4652
getStorygraphCsvParser,
@@ -55,6 +61,8 @@ import {
5561

5662
import { lazy } from "./utils/lazy.ts";
5763
import { readThroughCache } from "./utils/readThroughCache.ts";
64+
import fs from "node:fs";
65+
import path from "node:path";
5866

5967
// Application state passed to the router and elsewhere
6068
export type AppContext = {
@@ -255,6 +263,133 @@ export class Server {
255263
app.use("*", registerMetrics);
256264
app.get("/metrics", printMetrics);
257265

266+
// Download a sanitized SQLite export bundle (db + kv without auth tables)
267+
app.get("/admin/export", async (c) => {
268+
const ctx = c.get("ctx");
269+
const clientIp =
270+
c.req.header("x-forwarded-for")?.split(",")[0].trim() ||
271+
c.req.header("x-real-ip") ||
272+
"unknown";
273+
274+
try {
275+
// Hide endpoint if not configured
276+
if (!env.EXPORT_SHARED_SECRET) {
277+
ctx.logger.warn(
278+
{ ip: clientIp, reason: "endpoint_not_configured" },
279+
"export endpoint access attempt - endpoint disabled",
280+
);
281+
return c.json({ message: "Not Found" }, 404);
282+
}
283+
284+
// Check authorization
285+
const authorization = c.req.header("authorization");
286+
if (
287+
!isAuthorizedExportRequest({
288+
authorizationHeader: authorization,
289+
sharedSecret: env.EXPORT_SHARED_SECRET,
290+
})
291+
) {
292+
ctx.logger.warn(
293+
{ ip: clientIp, reason: "invalid_authorization" },
294+
"export endpoint unauthorized access attempt",
295+
);
296+
return c.json({ message: "Not Found" }, 404);
297+
}
298+
299+
// Validate database path
300+
if (!env.DB_PATH || env.DB_PATH === ":memory:") {
301+
ctx.logger.error(
302+
{ ip: clientIp, dbPath: env.DB_PATH },
303+
"export endpoint called but DB_PATH is not a file path",
304+
);
305+
return c.json(
306+
{ message: "DB exports require DB_PATH to be a file path" },
307+
400,
308+
);
309+
}
310+
311+
const exportDir =
312+
env.DB_EXPORT_DIR?.trim() ||
313+
path.join(path.dirname(env.DB_PATH), "exports");
314+
315+
ctx.logger.info(
316+
{ ip: clientIp, exportDir },
317+
"starting database export",
318+
);
319+
320+
const startTime = Date.now();
321+
let result;
322+
323+
try {
324+
await fs.promises.mkdir(exportDir, { recursive: true });
325+
326+
const includeKv =
327+
Boolean(env.KV_DB_PATH) &&
328+
env.KV_DB_PATH !== ":memory:" &&
329+
fs.existsSync(env.KV_DB_PATH);
330+
331+
result = await createSanitizedExportArchive({
332+
dbPath: env.DB_PATH,
333+
kvPath: includeKv ? env.KV_DB_PATH : undefined,
334+
exportDir,
335+
includeKv,
336+
});
337+
} catch (err) {
338+
const duration = Date.now() - startTime;
339+
ctx.logger.error(
340+
{
341+
ip: clientIp,
342+
duration,
343+
error: err instanceof Error ? err.message : String(err),
344+
stack: err instanceof Error ? err.stack : undefined,
345+
},
346+
"database export failed",
347+
);
348+
return c.json({ message: "Failed to create export archive" }, 500);
349+
}
350+
351+
const duration = Date.now() - startTime;
352+
const stream = createExportReadStream(result.archivePath, {
353+
onClose: () => {
354+
ctx.logger.info(
355+
{ ip: clientIp, filename: result.filename, duration },
356+
"database export completed successfully",
357+
);
358+
cleanupExportPaths({
359+
archivePath: result.archivePath,
360+
tmpDir: result.tmpDir,
361+
});
362+
},
363+
onError: (err) => {
364+
ctx.logger.error(
365+
{ ip: clientIp, filename: result.filename, error: err.message },
366+
"error streaming export file",
367+
);
368+
cleanupExportPaths({
369+
archivePath: result.archivePath,
370+
tmpDir: result.tmpDir,
371+
});
372+
},
373+
});
374+
375+
return c.body(stream, 200, {
376+
"Content-Type": "application/gzip",
377+
"Content-Encoding": "gzip",
378+
"Content-Disposition": `attachment; filename="${result.filename}"`,
379+
"Cache-Control": "no-store",
380+
});
381+
} catch (err) {
382+
ctx.logger.error(
383+
{
384+
ip: clientIp,
385+
error: err instanceof Error ? err.message : String(err),
386+
},
387+
"unexpected error in export endpoint",
388+
);
389+
return c.json({ message: "Internal server error" }, 500);
390+
}
391+
});
392+
258393
// This is to import a Goodreads CSV export
259394
// It is here because we don't want it behind the etag middleware
260395
app.post(

0 commit comments

Comments
 (0)