@@ -17,11 +17,11 @@ This module is:
17
17
18
18
We care about memory efficiency here since it's likely we'll want to have
19
19
possibly thousands of these in a single nodejs process at once, but with
20
- less than 1 read/write per second for each. Thus memory is critical, and
20
+ less than 1 read/write per second for each. Thus memory is critical, and
21
21
supporting at least 1000 writes/second is what we need.
22
- Fortunately, this implementation can do ~50,000+ writes per second and read
23
- over 500,000 per second. Yes, it blocks the main thread, but by using
24
- better-sqlite3 and zstd-napi, we get 10x speed increases over async code,
22
+ Fortunately, this implementation can do ~50,000+ writes per second and read
23
+ over 500,000 per second. Yes, it blocks the main thread, but by using
24
+ better-sqlite3 and zstd-napi, we get 10x speed increases over async code,
25
25
so this is worth it.
26
26
27
27
@@ -31,16 +31,27 @@ I implemented *sync* lz4-napi compression here and it's very fast,
31
31
but it has to be run with async waits in a loop or it doesn't give back
32
32
memory, and such throttling may significantly negatively impact performance
33
33
and mean we don't get a 100% sync api (like we have now).
34
- The async functions in lz4-napi seem fine. Upstream report (by me):
34
+ The async functions in lz4-napi seem fine. Upstream report (by me):
35
35
https://github.com/antoniomuso/lz4-napi/issues/678
36
36
I also tried the rust sync snappy and it had a similar memory leak. Finally,
37
37
I tried zstd-napi and it has a very fast sync implementation that does *not*
38
- need async pauses to not leak memory. So zstd-napi it is.
38
+ need async pauses to not leak memory. So zstd-napi it is.
39
39
And I like zstandard anyways.
40
40
41
+ TIERED STORAGE:
42
+
43
+ You can provide a second path archive for the sqlite file. If provided, on creation,
44
+ this will stat both the main path and the archive path. If the archive path is
45
+ newer, then the file is first copied from the archive path to the normal path,
46
+ then opened. Also, if the archive path is provided, then a backup of the database
47
+ is made to the archive path periodically. We use this for tiered storage in
48
+ CoCalc as follows. The archive path is on a Google Cloud Storage autoclass bucket
49
+ that is mounted using gcsfuse. The normal primary path is on a small fast SSD
50
+ persistent disk, which we view as a cache.
51
+
41
52
NOTE:
42
53
43
- We use seconds instead of ms in sqlite since that is the standard
54
+ We use seconds instead of ms in sqlite since that is the standard
44
55
convention for times in sqlite.
45
56
46
57
DEVELOPMENT:
@@ -51,7 +62,14 @@ DEVELOPMENT:
51
62
*/
52
63
53
64
import { refCacheSync } from "@cocalc/util/refcache" ;
54
- import { createDatabase , type Database , compress , decompress } from "./context" ;
65
+ import {
66
+ createDatabase ,
67
+ type Database ,
68
+ compress ,
69
+ decompress ,
70
+ statSync ,
71
+ copyFileSync ,
72
+ } from "./context" ;
55
73
import type { JSONValue } from "@cocalc/util/types" ;
56
74
import { EventEmitter } from "events" ;
57
75
import {
@@ -61,6 +79,8 @@ import {
61
79
} from "@cocalc/conat/core/client" ;
62
80
import TTL from "@isaacs/ttlcache" ;
63
81
import { getLogger } from "@cocalc/conat/client" ;
82
+ import { reuseInFlight } from "@cocalc/util/reuse-in-flight" ;
83
+ import { throttle } from "lodash" ;
64
84
65
85
const logger = getLogger ( "persist:storage" ) ;
66
86
@@ -195,10 +215,22 @@ export interface DeleteOperation {
195
215
seqs : number [ ] ;
196
216
}
197
217
218
+ export const DEFAULT_ARCHIVE_INTERVAL = 30_000 ; // 30 seconds
219
+
198
220
export interface StorageOptions {
199
221
// absolute path to sqlite database file. This needs to be a valid filename
200
- // path, and must also be kept under 1K so it can be stored in cloud storage.
222
+ // path, and must also be kept under 1000 characters in length so it can be
223
+ // stored in cloud storage.
201
224
path : string ;
225
+ // another absolute pat. If this is given, then (1)
226
+ // it will be copied to path before opening path if it is newer, and (2) a
227
+ // backup will be saved to archive (using sqlite's backup feature) every
228
+ // archiveInteral ms. NOTE: we actually append ".db" to path and to archive.
229
+ archive ?: string ;
230
+ // the archive interval, if archive is given. defaults to DEFAULT_ARCHIVE_INTERVAL
231
+ // Depending on your setup, this is likely your tolerance for data loss in the worst case scenario, e.g.,
232
+ // "loss of the last 30 seconds of TimeTravel edit history".
233
+ archiveInterval ?: number ;
202
234
// if false (the default) do not require sync writes to disk on every set
203
235
sync ?: boolean ;
204
236
// if set, then data is never saved to disk at all. To avoid using a lot of server
@@ -216,23 +248,40 @@ export class PersistentStream extends EventEmitter {
216
248
private readonly db : Database ;
217
249
private readonly msgIDs = new TTL ( { ttl : 2 * 60 * 1000 } ) ;
218
250
private conf : Configuration ;
251
+ private throttledBackup ?;
219
252
220
253
constructor ( options : StorageOptions ) {
221
254
super ( ) ;
222
255
logger . debug ( "constructor " , options . path ) ;
223
-
224
256
this . setMaxListeners ( 1000 ) ;
225
257
options = { compression : DEFAULT_COMPRESSION , ...options } ;
226
258
this . options = options ;
227
259
const location = this . options . ephemeral
228
260
? ":memory:"
229
261
: this . options . path + ".db" ;
262
+ this . initArchive ( ) ;
230
263
this . db = createDatabase ( location ) ;
231
- //console.log(location);
232
- this . init ( ) ;
264
+ this . initSchema ( ) ;
233
265
}
234
266
235
- init = ( ) => {
267
+ private initArchive = ( ) => {
268
+ if ( ! this . options . archive ) {
269
+ return ;
270
+ }
271
+ this . throttledBackup = throttle (
272
+ this . backup ,
273
+ this . options . archiveInterval ?? DEFAULT_ARCHIVE_INTERVAL ,
274
+ ) ;
275
+ const archive = this . options . archive + ".db" ;
276
+ const path = this . options . path + ".db" ;
277
+ const archiveAge = age ( archive ) ;
278
+ const pathAge = age ( archive ) ;
279
+ if ( archiveAge > pathAge ) {
280
+ copyFileSync ( archive , path ) ;
281
+ }
282
+ } ;
283
+
284
+ private initSchema = ( ) => {
236
285
if ( ! this . options . sync && ! this . options . ephemeral ) {
237
286
// Unless sync is set, we do not require that the filesystem has commited changes
238
287
// to disk after every insert. This can easily make things 10x faster. sets are
@@ -245,7 +294,7 @@ export class PersistentStream extends EventEmitter {
245
294
// ttl is in milliseconds.
246
295
this . db
247
296
. prepare (
248
- `CREATE TABLE IF NOT EXISTS messages (
297
+ `CREATE TABLE IF NOT EXISTS messages (
249
298
seq INTEGER PRIMARY KEY AUTOINCREMENT, key TEXT UNIQUE, time INTEGER NOT NULL, headers TEXT, compress NUMBER NOT NULL, encoding NUMBER NOT NULL, raw BLOB NOT NULL, size NUMBER NOT NULL, ttl NUMBER
250
299
)
251
300
` ,
@@ -269,13 +318,13 @@ export class PersistentStream extends EventEmitter {
269
318
this . conf = this . config ( ) ;
270
319
} ;
271
320
272
- close = ( ) => {
321
+ close = async ( ) => {
273
322
logger . debug ( "close " , this . options . path ) ;
274
323
if ( this . db != null ) {
275
324
this . vacuum ( ) ;
276
325
this . db . prepare ( "PRAGMA wal_checkpoint(FULL)" ) . run ( ) ;
326
+ await this . backup ( ) ;
277
327
this . db . close ( ) ;
278
- // @ts -ignore
279
328
}
280
329
// @ts -ignore
281
330
delete this . options ;
@@ -284,6 +333,20 @@ export class PersistentStream extends EventEmitter {
284
333
delete this . msgIDs ;
285
334
} ;
286
335
336
+ private backup = reuseInFlight ( async ( ) : Promise < void > => {
337
+ // reuseInFlight since probably doing a backup on top
338
+ // of itself would corrupt data.
339
+ if ( ! this . options . archive ) {
340
+ throw Error ( "no backup target file set" ) ;
341
+ }
342
+ const path = this . options . archive + ".db" ;
343
+ try {
344
+ await this . db . backup ( path ) ;
345
+ } catch ( err ) {
346
+ logger . debug ( "WARNING: error creating a backup" , path , err ) ;
347
+ }
348
+ } ) ;
349
+
287
350
private compress = (
288
351
raw : Buffer ,
289
352
) : { raw : Buffer ; compress : CompressionAlgorithm } => {
@@ -387,6 +450,7 @@ export class PersistentStream extends EventEmitter {
387
450
headers,
388
451
msgID,
389
452
} ) ;
453
+ this . throttledBackup ( ) ;
390
454
if ( msgID !== undefined ) {
391
455
this . msgIDs . set ( msgID , { time, seq } ) ;
392
456
}
@@ -478,6 +542,7 @@ export class PersistentStream extends EventEmitter {
478
542
this . db . prepare ( "DELETE FROM messages WHERE seq=?" ) . run ( seq ) ;
479
543
}
480
544
this . emit ( "change" , { op : "delete" , seqs } ) ;
545
+ this . throttledBackup ( ) ;
481
546
return { seqs } ;
482
547
} ;
483
548
@@ -596,13 +661,15 @@ export class PersistentStream extends EventEmitter {
596
661
this . conf = full as Configuration ;
597
662
// ensure any new limits are enforced
598
663
this . enforceLimits ( 0 ) ;
664
+ this . throttledBackup ( ) ;
599
665
return full as Configuration ;
600
666
} ;
601
667
602
668
private emitDelete = ( rows ) => {
603
669
if ( rows . length > 0 ) {
604
670
const seqs = rows . map ( ( row : { seq : number } ) => row . seq ) ;
605
671
this . emit ( "change" , { op : "delete" , seqs } ) ;
672
+ this . throttledBackup ( ) ;
606
673
}
607
674
} ;
608
675
@@ -782,9 +849,7 @@ export const cache = refCacheSync<CreateOptions, PersistentStream>({
782
849
name : "persistent-storage-stream" ,
783
850
createKey : ( { path } : CreateOptions ) => path ,
784
851
createObject : ( options : CreateOptions ) => {
785
- const pstream = new PersistentStream ( options ) ;
786
- pstream . init ( ) ;
787
- return pstream ;
852
+ return new PersistentStream ( options ) ;
788
853
} ,
789
854
} ) ;
790
855
@@ -793,3 +858,11 @@ export function pstream(
793
858
) : PersistentStream {
794
859
return cache ( options ) ;
795
860
}
861
+
862
+ function age ( path : string ) {
863
+ try {
864
+ return statSync ( path ) . mtimeMs ;
865
+ } catch {
866
+ return 0 ;
867
+ }
868
+ }
0 commit comments