Skip to content

Commit ef186e0

Browse files
committed
Allow external blobs
1 parent 52691b9 commit ef186e0

File tree

27 files changed

+565
-163
lines changed

27 files changed

+565
-163
lines changed

apps/server/spec/db/document.db

8 KB
Binary file not shown.
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import { Application } from "express";
2+
import { beforeAll, describe, expect, it } from "vitest";
3+
import supertest from "supertest";
4+
import { login } from "./utils.js";
5+
import config from "../../src/services/config.js";
6+
import sql from "../../src/services/sql.js";
7+
8+
let app: Application;
9+
let token: string;
10+
11+
const USER = "etapi";
12+
13+
describe("etapi/external-blobs", () => {
14+
beforeAll(async () => {
15+
config.General.noAuthentication = false;
16+
config.ExternalBlobStorage.enabled = true;
17+
config.ExternalBlobStorage.thresholdBytes = 10;
18+
19+
const buildApp = (await import("../../src/app.js")).default;
20+
app = await buildApp();
21+
token = await login(app);
22+
});
23+
24+
it("stores small note content internally", async () => {
25+
const payload = "a".repeat(10);
26+
const createRes = await supertest(app)
27+
.post("/etapi/create-note")
28+
.auth(USER, token, { "type": "basic"})
29+
.send({
30+
"parentNoteId": "root",
31+
"title": "Internal Blob Test",
32+
"mime": "text/plain",
33+
"type": "text",
34+
"content": payload
35+
})
36+
.expect(201);
37+
38+
const createdNoteId: string = createRes.body.note.noteId;
39+
expect(createdNoteId).toBeTruthy();
40+
41+
const blobId = sql.getValue<string>("SELECT blobId FROM notes WHERE noteId = ?", [createdNoteId]);
42+
expect(blobId).toBeTruthy();
43+
44+
const row = sql.getRow<{ contentLocation: string; content: string | null; contentLength: number }>(
45+
"SELECT contentLocation, content, contentLength FROM blobs WHERE blobId = ?",
46+
[blobId]
47+
);
48+
49+
expect(row).toBeTruthy();
50+
expect(row.contentLength).toEqual(payload.length);
51+
expect(row.contentLocation).toEqual("internal");
52+
expect(row.content).toEqual(payload);
53+
});
54+
55+
it("stores large note content externally and serves it back", async () => {
56+
const payload = "a".repeat(11);
57+
const createRes = await supertest(app)
58+
.post("/etapi/create-note")
59+
.auth(USER, token, { "type": "basic"})
60+
.send({
61+
"parentNoteId": "root",
62+
"title": "External Blob Test",
63+
"mime": "application/octet-stream",
64+
"type": "file",
65+
"content": payload
66+
})
67+
.expect(201);
68+
69+
const createdNoteId: string = createRes.body.note.noteId;
70+
expect(createdNoteId).toBeTruthy();
71+
72+
const blobId = sql.getValue<string>("SELECT blobId FROM notes WHERE noteId = ?", [createdNoteId]);
73+
expect(blobId).toBeTruthy();
74+
75+
const row = sql.getRow<{ contentLocation: string; content: string | null; contentLength: number }>(
76+
"SELECT contentLocation, content, contentLength FROM blobs WHERE blobId = ?",
77+
[blobId]
78+
);
79+
80+
expect(row).toBeTruthy();
81+
expect(row.contentLength).toEqual(payload.length);
82+
expect(row.contentLocation.startsWith("file://")).toBe(true);
83+
expect(row.content).toBeNull();
84+
85+
const getRes = await supertest(app)
86+
.get(`/etapi/notes/${createdNoteId}/content`)
87+
.auth(USER, token, { "type": "basic"})
88+
.expect(200);
89+
90+
expect(getRes.body.toString()).toEqual(payload);
91+
});
92+
});
93+
94+

apps/server/src/assets/db/schema.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ CREATE TABLE IF NOT EXISTS "recent_notes"
107107
CREATE TABLE IF NOT EXISTS "blobs" (
108108
`blobId` TEXT NOT NULL,
109109
`content` TEXT NULL DEFAULT NULL,
110+
`contentLocation` TEXT NOT NULL DEFAULT 'internal',
111+
`contentLength` INTEGER NOT NULL DEFAULT 0,
110112
`dateModified` TEXT NOT NULL,
111113
`utcDateModified` TEXT NOT NULL,
112114
PRIMARY KEY(`blobId`)

apps/server/src/becca/becca-interface.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ export default class Becca {
171171
opts.includeContentLength = !!opts.includeContentLength;
172172

173173
const query = opts.includeContentLength
174-
? /*sql*/`SELECT attachments.*, LENGTH(blobs.content) AS contentLength
174+
? /*sql*/`SELECT attachments.*, blobs.contentLength AS contentLength
175175
FROM attachments
176176
JOIN blobs USING (blobId)
177177
WHERE attachmentId = ? AND isDeleted = 0`
@@ -197,7 +197,7 @@ export default class Becca {
197197
return null;
198198
}
199199

200-
const row = sql.getRow<BlobRow | null>("SELECT *, LENGTH(content) AS contentLength FROM blobs WHERE blobId = ?", [entity.blobId]);
200+
const row = sql.getRow<BlobRow | null>("SELECT * FROM blobs WHERE blobId = ?", [entity.blobId]);
201201
return row ? new BBlob(row) : null;
202202
}
203203

apps/server/src/becca/entities/abstract_becca_entity.ts

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ import cls from "../../services/cls.js";
99
import log from "../../services/log.js";
1010
import protectedSessionService from "../../services/protected_session.js";
1111
import blobService from "../../services/blob.js";
12+
import blobStorageService from "../../services/blob-storage.js";
13+
import type { Blob } from "../../services/blob-interface.js";
1214
import type { default as Becca, ConstructorData } from "../becca-interface.js";
1315
import becca from "../becca.js";
16+
import type { BlobContentLocation, BlobRow } from "@triliumnext/commons";
1417

1518
interface ContentOpts {
1619
forceSave?: boolean;
@@ -195,6 +198,14 @@ abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> {
195198
return;
196199
}
197200

201+
if (blobStorageService.hasExternalContentColumns()) {
202+
const row = sql.getRow<{ contentLocation: string }>("SELECT contentLocation FROM blobs WHERE blobId = ?", [oldBlobId]);
203+
if (row?.contentLocation.startsWith('file://')) {
204+
const filePath = row.contentLocation.replace('file://', '');
205+
blobStorageService.deleteExternal(filePath);
206+
}
207+
}
208+
198209
sql.execute("DELETE FROM blobs WHERE blobId = ?", [oldBlobId]);
199210
// blobs are not marked as erased in entity_changes, they are just purged completely
200211
// this is because technically every keystroke can create a new blob, and there would be just too many
@@ -225,14 +236,40 @@ abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> {
225236
return newBlobId;
226237
}
227238

228-
const pojo = {
239+
// Check if we should store this blob externally
240+
const shouldStoreExternally = blobStorageService.shouldStoreExternally(content);
241+
let contentLocation: BlobContentLocation = 'internal';
242+
if (shouldStoreExternally) {
243+
try {
244+
const filePath = blobStorageService.saveExternal(newBlobId, content);
245+
contentLocation = `file://${filePath}` as BlobContentLocation;
246+
} catch (error) {
247+
log.error(`Failed to store blob ${newBlobId} externally, falling back to internal storage: ${error}`);
248+
contentLocation = 'internal';
249+
}
250+
}
251+
252+
const contentLength = blobService.getContentLength(content);
253+
254+
const pojo: BlobRow = {
229255
blobId: newBlobId,
230-
content: content,
256+
content: contentLocation === 'internal' ? content : null,
257+
contentLocation,
258+
contentLength,
231259
dateModified: dateUtils.localNowDateTime(),
232260
utcDateModified: dateUtils.utcNowDateTime()
233261
};
234262

235-
sql.upsert("blobs", "blobId", pojo);
263+
// external content columns might not be present when applying older migrations
264+
const pojoToSave = blobStorageService.hasExternalContentColumns()
265+
? pojo
266+
: {
267+
blobId: pojo.blobId,
268+
content,
269+
dateModified: pojo.dateModified,
270+
utcDateModified: pojo.utcDateModified
271+
};
272+
sql.upsert("blobs", "blobId", pojoToSave);
236273

237274
// we can't reuse blobId as an entity_changes hash, because this one has to be calculatable without having
238275
// access to the decrypted content
@@ -259,14 +296,20 @@ abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> {
259296
}
260297

261298
protected _getContent(): string | Buffer {
262-
const row = sql.getRow<{ content: string | Buffer }>(/*sql*/`SELECT content FROM blobs WHERE blobId = ?`, [this.blobId]);
299+
const query = blobStorageService.hasExternalContentColumns()
300+
? /*sql*/`SELECT content, contentLocation FROM blobs WHERE blobId = ?`
301+
: /*sql*/`SELECT content, 'internal' as contentLocation FROM blobs WHERE blobId = ?`;
302+
303+
const row = sql.getRow<{ content: string | Buffer, contentLocation: string }>(query, [this.blobId]);
263304

264305
if (!row) {
265306
const constructorData = this.constructor as unknown as ConstructorData<T>;
266307
throw new Error(`Cannot find content for ${constructorData.primaryKeyName} '${(this as any)[constructorData.primaryKeyName]}', blobId '${this.blobId}'`);
267308
}
268309

269-
return blobService.processContent(row.content, this.isProtected || false, this.hasStringContent());
310+
const content = blobStorageService.getContent(row);
311+
312+
return blobService.processContent(content, this.isProtected || false, this.hasStringContent());
270313
}
271314

272315
/**

apps/server/src/becca/entities/bblob.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import AbstractBeccaEntity from "./abstract_becca_entity.js";
2-
import type { BlobRow } from "@triliumnext/commons";
2+
import type { BlobRow, BlobContentLocation } from "@triliumnext/commons";
33

44
// TODO: Why this does not extend the abstract becca?
55
class BBlob extends AbstractBeccaEntity<BBlob> {
@@ -10,11 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
1010
return "blobId";
1111
}
1212
static get hashedProperties() {
13-
return ["blobId", "content"];
13+
return ["blobId", "content", "contentLocation"];
1414
}
1515

16-
content!: string | Buffer;
16+
content!: string | Buffer | null;
1717
contentLength!: number;
18+
contentLocation!: BlobContentLocation;
1819

1920
constructor(row: BlobRow) {
2021
super();
@@ -25,6 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
2526
this.blobId = row.blobId;
2627
this.content = row.content;
2728
this.contentLength = row.contentLength;
29+
this.contentLocation = row.contentLocation;
2830
this.dateModified = row.dateModified;
2931
this.utcDateModified = row.utcDateModified;
3032
}
@@ -34,6 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
3436
blobId: this.blobId,
3537
content: this.content || null,
3638
contentLength: this.contentLength,
39+
contentLocation: this.contentLocation,
3740
dateModified: this.dateModified,
3841
utcDateModified: this.utcDateModified
3942
};

apps/server/src/becca/entities/bnote.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import dateUtils from "../../services/date_utils.js";
1010
import AbstractBeccaEntity from "./abstract_becca_entity.js";
1111
import BRevision from "./brevision.js";
1212
import BAttachment from "./battachment.js";
13+
import blobStorageService from "../../services/blob-storage.js";
1314
import TaskContext from "../../services/task_context.js";
1415
import dayjs from "dayjs";
1516
import utc from "dayjs/plugin/utc.js";
@@ -1107,8 +1108,12 @@ class BNote extends AbstractBeccaEntity<BNote> {
11071108
// from testing, it looks like calculating length does not make a difference in performance even on large-ish DB
11081109
// given that we're always fetching attachments only for a specific note, we might just do it always
11091110

1111+
const contentLengthColumn = blobStorageService.hasExternalContentColumns()
1112+
? "blobs.contentLength"
1113+
: "LENGTH(COALESCE(blobs.content, ''))";
1114+
11101115
const query = opts.includeContentLength
1111-
? /*sql*/`SELECT attachments.*, LENGTH(blobs.content) AS contentLength
1116+
? /*sql*/`SELECT attachments.*, ${contentLengthColumn} AS contentLength
11121117
FROM attachments
11131118
JOIN blobs USING (blobId)
11141119
WHERE ownerId = ? AND isDeleted = 0
@@ -1121,8 +1126,12 @@ class BNote extends AbstractBeccaEntity<BNote> {
11211126
getAttachmentById(attachmentId: string, opts: AttachmentOpts = {}) {
11221127
opts.includeContentLength = !!opts.includeContentLength;
11231128

1129+
const contentLengthColumn = blobStorageService.hasExternalContentColumns()
1130+
? "blobs.contentLength"
1131+
: "LENGTH(COALESCE(blobs.content, ''))";
1132+
11241133
const query = opts.includeContentLength
1125-
? /*sql*/`SELECT attachments.*, LENGTH(blobs.content) AS contentLength
1134+
? /*sql*/`SELECT attachments.*, ${contentLengthColumn} AS contentLength
11261135
FROM attachments
11271136
JOIN blobs USING (blobId)
11281137
WHERE ownerId = ? AND attachmentId = ? AND isDeleted = 0`

apps/server/src/becca/entities/brevision.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import dateUtils from "../../services/date_utils.js";
66
import becca from "../becca.js";
77
import AbstractBeccaEntity from "./abstract_becca_entity.js";
88
import sql from "../../services/sql.js";
9+
import blobStorageService from "../../services/blob-storage.js";
910
import BAttachment from "./battachment.js";
1011
import type { AttachmentRow, NoteType, RevisionPojo, RevisionRow } from "@triliumnext/commons";
1112
import eraseService from "../../services/erase.js";
@@ -140,8 +141,12 @@ class BRevision extends AbstractBeccaEntity<BRevision> {
140141
getAttachmentById(attachmentId: String, opts: GetByIdOpts = {}): BAttachment | null {
141142
opts.includeContentLength = !!opts.includeContentLength;
142143

144+
const contentLengthColumn = blobStorageService.hasExternalContentColumns()
145+
? "blobs.contentLength"
146+
: "LENGTH(COALESCE(blobs.content, ''))";
147+
143148
const query = opts.includeContentLength
144-
? /*sql*/`SELECT attachments.*, LENGTH(blobs.content) AS contentLength
149+
? /*sql*/`SELECT attachments.*, ${contentLengthColumn} AS contentLength
145150
FROM attachments
146151
JOIN blobs USING (blobId)
147152
WHERE ownerId = ? AND attachmentId = ? AND isDeleted = 0`

apps/server/src/migrations/migrations.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@
66

77
// Migrations should be kept in descending order, so the latest migration is first.
88
const MIGRATIONS: (SqlMigration | JsMigration)[] = [
9+
// Add external blob storage support
10+
{
11+
version: 234,
12+
sql: /*sql*/`
13+
-- Add contentLocation column
14+
ALTER TABLE blobs ADD contentLocation TEXT DEFAULT 'internal';
15+
UPDATE blobs SET contentLocation = 'internal' WHERE contentLocation IS NULL;
16+
17+
-- Add contentLength column
18+
ALTER TABLE blobs ADD contentLength INTEGER DEFAULT 0;
19+
UPDATE blobs SET contentLength = CASE WHEN content IS NULL THEN 0 ELSE LENGTH(content) END WHERE contentLength IS NULL;
20+
`,
21+
},
922
// Migrate geo map to collection
1023
{
1124
version: 233,

apps/server/src/routes/api/revisions.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import cls from "../../services/cls.js";
77
import path from "path";
88
import becca from "../../becca/becca.js";
99
import blobService from "../../services/blob.js";
10+
import blobStorageService from "../../services/blob-storage.js";
1011
import eraseService from "../../services/erase.js";
1112
import type { Request, Response } from "express";
1213
import type BRevision from "../../becca/entities/brevision.js";
@@ -33,10 +34,14 @@ function getRevisionBlob(req: Request) {
3334
}
3435

3536
function getRevisions(req: Request) {
37+
const contentLengthColumn = blobStorageService.hasExternalContentColumns()
38+
? "blobs.contentLength"
39+
: "LENGTH(COALESCE(blobs.content, ''))";
40+
3641
return becca.getRevisionsFromQuery(
3742
`
3843
SELECT revisions.*,
39-
LENGTH(blobs.content) AS contentLength
44+
${contentLengthColumn} AS contentLength
4045
FROM revisions
4146
JOIN blobs ON revisions.blobId = blobs.blobId
4247
WHERE revisions.noteId = ?

0 commit comments

Comments
 (0)