diff --git a/.changeset/chilled-mirrors-marry.md b/.changeset/chilled-mirrors-marry.md new file mode 100644 index 000000000..d8f3dcd1e --- /dev/null +++ b/.changeset/chilled-mirrors-marry.md @@ -0,0 +1,13 @@ +--- +'@powersync/service-module-postgres-storage': minor +'@powersync/service-module-mongodb-storage': minor +'@powersync/service-core-tests': minor +'@powersync/service-module-postgres': minor +'@powersync/service-module-mongodb': minor +'@powersync/service-core': minor +'@powersync/service-module-mysql': minor +'@powersync/service-sync-rules': minor +'@powersync/service-errors': patch +--- + +Minor optimizations to new checkpoint calulations. diff --git a/libs/lib-postgres/src/db/connection/AbstractPostgresConnection.ts b/libs/lib-postgres/src/db/connection/AbstractPostgresConnection.ts index 3484ac81c..4baafa0ed 100644 --- a/libs/lib-postgres/src/db/connection/AbstractPostgresConnection.ts +++ b/libs/lib-postgres/src/db/connection/AbstractPostgresConnection.ts @@ -7,9 +7,7 @@ export type DecodedSQLQueryExecutor> = { rows: () => Promise[]>; }; -export abstract class AbstractPostgresConnection< - Listener extends framework.DisposableListener = framework.DisposableListener -> extends framework.DisposableObserver { +export abstract class AbstractPostgresConnection extends framework.BaseObserver { protected abstract baseConnection: pgwire.PgClient; stream(...args: pgwire.Statement[]): AsyncIterableIterator { diff --git a/libs/lib-postgres/src/db/connection/ConnectionSlot.ts b/libs/lib-postgres/src/db/connection/ConnectionSlot.ts index ac04832e1..00670fa8f 100644 --- a/libs/lib-postgres/src/db/connection/ConnectionSlot.ts +++ b/libs/lib-postgres/src/db/connection/ConnectionSlot.ts @@ -1,7 +1,7 @@ import * as framework from '@powersync/lib-services-framework'; import * as pgwire from '@powersync/service-jpgwire'; -export interface NotificationListener extends framework.DisposableListener { +export interface NotificationListener { notification?: (payload: pgwire.PgNotification) => void; } @@ -23,7 +23,7 @@ export type ConnectionSlotOptions = { export const MAX_CONNECTION_ATTEMPTS = 5; -export class ConnectionSlot extends framework.DisposableObserver { +export class ConnectionSlot extends framework.BaseObserver { isAvailable: boolean; isPoking: boolean; @@ -63,7 +63,7 @@ export class ConnectionSlot extends framework.DisposableObserver implements ObserverClient { await cb(this.listeners[i]); } } + + clearListeners() { + this.listeners = {}; + } } diff --git a/libs/lib-services/src/utils/DisposableObserver.ts b/libs/lib-services/src/utils/DisposableObserver.ts deleted file mode 100644 index 194ed6955..000000000 --- a/libs/lib-services/src/utils/DisposableObserver.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { BaseObserver, ObserverClient } from './BaseObserver.js'; - -export interface DisposableListener { - /** - * Event which is fired when the `[Symbol.disposed]` method is called. - */ - disposed: () => void; -} - -export interface ManagedObserverClient extends ObserverClient { - /** - * Registers a listener that is automatically disposed when the parent is disposed. - * This is useful for disposing nested listeners. - */ - registerManagedListener: (parent: DisposableObserverClient, cb: Partial) => () => void; -} - -export interface DisposableObserverClient extends ManagedObserverClient, Disposable {} -export interface AsyncDisposableObserverClient - extends ManagedObserverClient, - AsyncDisposable {} - -export class DisposableObserver - extends BaseObserver - implements DisposableObserverClient -{ - registerManagedListener(parent: DisposableObserverClient, cb: Partial) { - const disposer = this.registerListener(cb); - parent.registerListener({ - disposed: () => { - disposer(); - } - }); - return disposer; - } - - [Symbol.dispose]() { - this.iterateListeners((cb) => cb.disposed?.()); - // Delete all callbacks - Object.keys(this.listeners).forEach((key) => delete this.listeners[key]); - } -} diff --git a/libs/lib-services/src/utils/utils-index.ts b/libs/lib-services/src/utils/utils-index.ts index 59b89d274..ee42d4057 100644 --- a/libs/lib-services/src/utils/utils-index.ts +++ b/libs/lib-services/src/utils/utils-index.ts @@ -1,3 +1,2 @@ export * from './BaseObserver.js'; -export * from './DisposableObserver.js'; export * from './environment-variables.js'; diff --git a/libs/lib-services/test/src/DisposeableObserver.test.ts b/libs/lib-services/test/src/DisposeableObserver.test.ts deleted file mode 100644 index 1cde6a58b..000000000 --- a/libs/lib-services/test/src/DisposeableObserver.test.ts +++ /dev/null @@ -1,58 +0,0 @@ -import { describe, expect, test } from 'vitest'; - -import { DisposableListener, DisposableObserver } from '../../src/utils/DisposableObserver.js'; - -describe('DisposableObserver', () => { - test('it should dispose all listeners on dispose', () => { - const listener = new DisposableObserver(); - - let wasDisposed = false; - listener.registerListener({ - disposed: () => { - wasDisposed = true; - } - }); - - listener[Symbol.dispose](); - - expect(wasDisposed).equals(true); - expect(Object.keys(listener['listeners']).length).equals(0); - }); - - test('it should dispose nested listeners for managed listeners', () => { - interface ParentListener extends DisposableListener { - childCreated: (child: DisposableObserver) => void; - } - class ParentObserver extends DisposableObserver { - createChild() { - const child = new DisposableObserver(); - this.iterateListeners((cb) => cb.childCreated?.(child)); - } - } - - const parent = new ParentObserver(); - let aChild: DisposableObserver | null = null; - - parent.registerListener({ - childCreated: (child) => { - aChild = child; - child.registerManagedListener(parent, { - test: () => { - // this does nothing - } - }); - } - }); - - parent.createChild(); - - // The managed listener should add a `disposed` listener - expect(Object.keys(parent['listeners']).length).equals(2); - expect(Object.keys(aChild!['listeners']).length).equals(1); - - parent[Symbol.dispose](); - expect(Object.keys(parent['listeners']).length).equals(0); - // The listener attached to the child should be disposed when the parent was disposed - expect(Object.keys(aChild!['listeners']).length).equals(0); - }); -}); diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index c164dc4d7..22f71dd7f 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -1,11 +1,8 @@ import { SqlSyncRules } from '@powersync/service-sync-rules'; -import { wrapWithAbort } from 'ix/asynciterable/operators/withabort.js'; -import { LRUCache } from 'lru-cache/min'; -import * as timers from 'timers/promises'; -import { storage, sync, utils } from '@powersync/service-core'; +import { storage } from '@powersync/service-core'; -import { DisposableObserver, ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; +import { BaseObserver, ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { v4 as uuid } from 'uuid'; import * as lib_mongo from '@powersync/lib-service-mongodb'; @@ -18,7 +15,7 @@ import { MongoSyncBucketStorage } from './implementation/MongoSyncBucketStorage. import { generateSlotName } from './implementation/util.js'; export class MongoBucketStorage - extends DisposableObserver + extends BaseObserver implements storage.BucketStorageFactory { private readonly client: mongo.MongoClient; @@ -26,26 +23,7 @@ export class MongoBucketStorage // TODO: This is still Postgres specific and needs to be reworked public readonly slot_name_prefix: string; - private readonly storageCache = new LRUCache({ - max: 3, - fetchMethod: async (id) => { - const doc2 = await this.db.sync_rules.findOne( - { - _id: id - }, - { limit: 1 } - ); - if (doc2 == null) { - // Deleted in the meantime? - return undefined; - } - const rules = new MongoPersistedSyncRulesContent(this.db, doc2); - return this.getInstance(rules); - }, - dispose: (storage) => { - storage[Symbol.dispose](); - } - }); + private activeStorageCache: MongoSyncBucketStorage | undefined; public readonly db: PowerSyncMongo; @@ -63,7 +41,7 @@ export class MongoBucketStorage } async [Symbol.asyncDispose]() { - super[Symbol.dispose](); + // No-op } getInstance(options: storage.PersistedSyncRulesContent): MongoSyncBucketStorage { @@ -75,8 +53,7 @@ export class MongoBucketStorage this.iterateListeners((cb) => cb.syncStorageCreated?.(storage)); storage.registerListener({ batchStarted: (batch) => { - // This nested listener will be automatically disposed when the storage is disposed - batch.registerManagedListener(storage, { + batch.registerListener({ replicationEvent: (payload) => this.iterateListeners((cb) => cb.replicationEvent?.(payload)) }); } @@ -293,19 +270,22 @@ export class MongoBucketStorage }); } - async getActiveCheckpoint(): Promise { - const doc = await this.db.sync_rules.findOne( - { - state: storage.SyncRuleState.ACTIVE - }, - { - sort: { _id: -1 }, - limit: 1, - projection: { _id: 1, last_checkpoint: 1, last_checkpoint_lsn: 1 } - } - ); + async getActiveStorage(): Promise { + const content = await this.getActiveSyncRulesContent(); + if (content == null) { + return null; + } - return this.makeActiveCheckpoint(doc); + // It is important that this instance is cached. + // Not for the instance construction itself, but to ensure that internal caches on the instance + // are re-used properly. + if (this.activeStorageCache?.group_id == content.id) { + return this.activeStorageCache; + } else { + const instance = this.getInstance(content); + this.activeStorageCache = instance; + return instance; + } } async getStorageMetrics(): Promise { @@ -391,166 +371,4 @@ export class MongoBucketStorage return instance!._id; } - - private makeActiveCheckpoint(doc: SyncRuleDocument | null) { - return { - checkpoint: utils.timestampToOpId(doc?.last_checkpoint ?? 0n), - lsn: doc?.last_checkpoint_lsn ?? null, - hasSyncRules() { - return doc != null; - }, - getBucketStorage: async () => { - if (doc == null) { - return null; - } - return (await this.storageCache.fetch(doc._id)) ?? null; - } - } satisfies storage.ActiveCheckpoint; - } - - /** - * Instance-wide watch on the latest available checkpoint (op_id + lsn). - */ - private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable { - const pipeline: mongo.Document[] = [ - { - $match: { - 'fullDocument.state': 'ACTIVE', - operationType: { $in: ['insert', 'update'] } - } - }, - { - $project: { - operationType: 1, - 'fullDocument._id': 1, - 'fullDocument.last_checkpoint': 1, - 'fullDocument.last_checkpoint_lsn': 1 - } - } - ]; - - // Use this form instead of (doc: SyncRuleDocument | null = null), - // otherwise we get weird "doc: never" issues. - let doc = null as SyncRuleDocument | null; - let clusterTime = null as mongo.Timestamp | null; - - await this.client.withSession(async (session) => { - doc = await this.db.sync_rules.findOne( - { - state: storage.SyncRuleState.ACTIVE - }, - { - session, - sort: { _id: -1 }, - limit: 1, - projection: { - _id: 1, - last_checkpoint: 1, - last_checkpoint_lsn: 1 - } - } - ); - const time = session.clusterTime?.clusterTime ?? null; - clusterTime = time; - }); - if (clusterTime == null) { - throw new ServiceError(ErrorCode.PSYNC_S2401, 'Could not get clusterTime'); - } - - if (signal.aborted) { - return; - } - - if (doc) { - yield this.makeActiveCheckpoint(doc); - } - - const stream = this.db.sync_rules.watch(pipeline, { - fullDocument: 'updateLookup', - // Start at the cluster time where we got the initial doc, to make sure - // we don't skip any updates. - // This may result in the first operation being a duplicate, but we filter - // it out anyway. - startAtOperationTime: clusterTime - }); - - signal.addEventListener( - 'abort', - () => { - stream.close(); - }, - { once: true } - ); - - let lastOp: storage.ActiveCheckpoint | null = null; - - for await (const update of stream.stream()) { - if (signal.aborted) { - break; - } - if (update.operationType != 'insert' && update.operationType != 'update') { - continue; - } - const doc = update.fullDocument!; - if (doc == null) { - continue; - } - - const op = this.makeActiveCheckpoint(doc); - // Check for LSN / checkpoint changes - ignore other metadata changes - if (lastOp == null || op.lsn != lastOp.lsn || op.checkpoint != lastOp.checkpoint) { - lastOp = op; - yield op; - } - } - } - - // Nothing is done here until a subscriber starts to iterate - private readonly sharedIter = new sync.BroadcastIterable((signal) => { - return this.watchActiveCheckpoint(signal); - }); - - /** - * User-specific watch on the latest checkpoint and/or write checkpoint. - */ - async *watchWriteCheckpoint(user_id: string, signal: AbortSignal): AsyncIterable { - let lastCheckpoint: utils.OpId | null = null; - let lastWriteCheckpoint: bigint | null = null; - - const iter = wrapWithAbort(this.sharedIter, signal); - for await (const cp of iter) { - const { checkpoint, lsn } = cp; - - // lsn changes are not important by itself. - // What is important is: - // 1. checkpoint (op_id) changes. - // 2. write checkpoint changes for the specific user - const bucketStorage = await cp.getBucketStorage(); - if (!bucketStorage) { - continue; - } - - const lsnFilters: Record = lsn ? { 1: lsn } : {}; - - const currentWriteCheckpoint = await bucketStorage.lastWriteCheckpoint({ - user_id, - heads: { - ...lsnFilters - } - }); - - if (currentWriteCheckpoint == lastWriteCheckpoint && checkpoint == lastCheckpoint) { - // No change - wait for next one - // In some cases, many LSNs may be produced in a short time. - // Add a delay to throttle the write checkpoint lookup a bit. - await timers.setTimeout(20 + 10 * Math.random()); - continue; - } - - lastWriteCheckpoint = currentWriteCheckpoint; - lastCheckpoint = checkpoint; - - yield { base: cp, writeCheckpoint: currentWriteCheckpoint }; - } - } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 2b329f233..6d87be042 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -3,8 +3,8 @@ import { SqlEventDescriptor, SqliteRow, SqlSyncRules } from '@powersync/service- import * as bson from 'bson'; import { + BaseObserver, container, - DisposableObserver, ErrorCode, errors, logger, @@ -49,7 +49,7 @@ export interface MongoBucketBatchOptions { } export class MongoBucketBatch - extends DisposableObserver + extends BaseObserver implements storage.BucketStorageBatch { private readonly client: mongo.MongoClient; @@ -610,7 +610,7 @@ export class MongoBucketBatch async [Symbol.asyncDispose]() { await this.session.endSession(); - super[Symbol.dispose](); + super.clearListeners(); } private lastWaitingLogThottled = 0; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index a2d51edfa..70abf4f56 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -1,20 +1,36 @@ import * as lib_mongo from '@powersync/lib-service-mongodb'; import { mongo } from '@powersync/lib-service-mongodb'; -import { DisposableObserver, logger, ServiceAssertionError } from '@powersync/lib-services-framework'; -import { storage, utils } from '@powersync/service-core'; +import { + BaseObserver, + ErrorCode, + logger, + ServiceAssertionError, + ServiceError +} from '@powersync/lib-services-framework'; +import { + BroadcastIterable, + CHECKPOINT_INVALIDATE_ALL, + CheckpointChanges, + GetCheckpointChangesOptions, + ReplicationCheckpoint, + storage, + utils, + WatchWriteCheckpointOptions +} from '@powersync/service-core'; import { SqliteJsonRow, SqliteJsonValue, SqlSyncRules } from '@powersync/service-sync-rules'; import * as bson from 'bson'; +import { wrapWithAbort } from 'ix/asynciterable/operators/withabort.js'; import * as timers from 'timers/promises'; import { MongoBucketStorage } from '../MongoBucketStorage.js'; import { PowerSyncMongo } from './db.js'; -import { BucketDataDocument, BucketDataKey, SourceKey } from './models.js'; +import { BucketDataDocument, BucketDataKey, SourceKey, SyncRuleCheckpointState, SyncRuleDocument } from './models.js'; import { MongoBucketBatch } from './MongoBucketBatch.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; import { idPrefixFilter, mapOpEntry, readSingleBatch } from './util.js'; export class MongoSyncBucketStorage - extends DisposableObserver + extends BaseObserver implements storage.SyncRulesBucketStorage { private readonly db: PowerSyncMongo; @@ -608,4 +624,232 @@ export class MongoSyncBucketStorage async compact(options?: storage.CompactOptions) { return new MongoCompactor(this.db, this.group_id, options).compact(); } + + private makeActiveCheckpoint(doc: SyncRuleCheckpointState | null) { + return { + checkpoint: utils.timestampToOpId(doc?.last_checkpoint ?? 0n), + lsn: doc?.last_checkpoint_lsn ?? null + }; + } + + /** + * Instance-wide watch on the latest available checkpoint (op_id + lsn). + */ + private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable { + // Use this form instead of (doc: SyncRuleCheckpointState | null = null), + // otherwise we get weird "doc: never" issues. + let doc = null as SyncRuleCheckpointState | null; + let clusterTime = null as mongo.Timestamp | null; + const syncRulesId = this.group_id; + + await this.db.client.withSession(async (session) => { + doc = await this.db.sync_rules.findOne( + { + _id: syncRulesId, + state: storage.SyncRuleState.ACTIVE + }, + { + session, + sort: { _id: -1 }, + limit: 1, + projection: { + _id: 1, + state: 1, + last_checkpoint: 1, + last_checkpoint_lsn: 1 + } + } + ); + const time = session.clusterTime?.clusterTime ?? null; + clusterTime = time; + }); + if (clusterTime == null) { + throw new ServiceError(ErrorCode.PSYNC_S2401, 'Could not get clusterTime'); + } + + if (signal.aborted) { + return; + } + + if (doc == null) { + // Sync rules not present or not active. + // Abort the connections - clients will have to retry later. + // Should this error instead? + return; + } + + yield this.makeActiveCheckpoint(doc); + + // We only watch changes to the active sync rules. + // If it changes to inactive, we abort and restart with the new sync rules. + + const pipeline = this.getChangeStreamPipeline(); + + const stream = this.db.sync_rules.watch(pipeline, { + // Start at the cluster time where we got the initial doc, to make sure + // we don't skip any updates. + // This may result in the first operation being a duplicate, but we filter + // it out anyway. + startAtOperationTime: clusterTime + }); + + signal.addEventListener( + 'abort', + () => { + stream.close(); + }, + { once: true } + ); + + let lastOp: storage.ReplicationCheckpoint | null = null; + let lastDoc: SyncRuleCheckpointState | null = doc; + + for await (const update of stream.stream()) { + if (signal.aborted) { + break; + } + if (update.operationType != 'insert' && update.operationType != 'update' && update.operationType != 'replace') { + continue; + } + + const doc = await this.getOperationDoc(lastDoc, update as lib_mongo.mongo.ChangeStreamDocument); + if (doc == null) { + // Irrelevant update + continue; + } + if (doc.state != storage.SyncRuleState.ACTIVE) { + // Sync rules have changed - abort and restart. + // Should this error instead? + break; + } + + lastDoc = doc; + + const op = this.makeActiveCheckpoint(doc); + // Check for LSN / checkpoint changes - ignore other metadata changes + if (lastOp == null || op.lsn != lastOp.lsn || op.checkpoint != lastOp.checkpoint) { + lastOp = op; + yield op; + } + } + } + + // Nothing is done here until a subscriber starts to iterate + private readonly sharedIter = new BroadcastIterable((signal) => { + return this.watchActiveCheckpoint(signal); + }); + + /** + * User-specific watch on the latest checkpoint and/or write checkpoint. + */ + async *watchWriteCheckpoint(options: WatchWriteCheckpointOptions): AsyncIterable { + const { user_id, signal } = options; + let lastCheckpoint: utils.OpId | null = null; + let lastWriteCheckpoint: bigint | null = null; + + const iter = wrapWithAbort(this.sharedIter, signal); + for await (const event of iter) { + const { checkpoint, lsn } = event; + + // lsn changes are not important by itself. + // What is important is: + // 1. checkpoint (op_id) changes. + // 2. write checkpoint changes for the specific user + + const lsnFilters: Record = lsn ? { 1: lsn } : {}; + + const currentWriteCheckpoint = await this.lastWriteCheckpoint({ + user_id, + heads: { + ...lsnFilters + } + }); + + if (currentWriteCheckpoint == lastWriteCheckpoint && checkpoint == lastCheckpoint) { + // No change - wait for next one + // In some cases, many LSNs may be produced in a short time. + // Add a delay to throttle the write checkpoint lookup a bit. + await timers.setTimeout(20 + 10 * Math.random()); + continue; + } + + const updates: CheckpointChanges = + lastCheckpoint == null + ? { + invalidateDataBuckets: true, + invalidateParameterBuckets: true, + updatedDataBuckets: [], + updatedParameterBucketDefinitions: [] + } + : await this.getCheckpointChanges({ + lastCheckpoint: lastCheckpoint, + nextCheckpoint: checkpoint + }); + + lastWriteCheckpoint = currentWriteCheckpoint; + lastCheckpoint = checkpoint; + + yield { + base: event, + writeCheckpoint: currentWriteCheckpoint, + update: updates + }; + } + } + + private async getOperationDoc( + lastDoc: SyncRuleCheckpointState, + update: lib_mongo.mongo.ChangeStreamDocument + ): Promise { + if (update.operationType == 'insert' || update.operationType == 'replace') { + return update.fullDocument; + } else if (update.operationType == 'update') { + const updatedFields = update.updateDescription.updatedFields ?? {}; + if (lastDoc._id != update.documentKey._id) { + throw new ServiceAssertionError(`Sync rules id mismatch: ${lastDoc._id} != ${update.documentKey._id}`); + } + + const mergedDoc: SyncRuleCheckpointState = { + _id: lastDoc._id, + last_checkpoint: updatedFields.last_checkpoint ?? lastDoc.last_checkpoint, + last_checkpoint_lsn: updatedFields.last_checkpoint_lsn ?? lastDoc.last_checkpoint_lsn, + state: updatedFields.state ?? lastDoc.state + }; + + return mergedDoc; + } else { + // Unknown event type + return null; + } + } + + private getChangeStreamPipeline() { + const syncRulesId = this.group_id; + const pipeline: mongo.Document[] = [ + { + $match: { + 'documentKey._id': syncRulesId, + operationType: { $in: ['insert', 'update', 'replace'] } + } + }, + { + $project: { + operationType: 1, + 'documentKey._id': 1, + 'updateDescription.updatedFields.state': 1, + 'updateDescription.updatedFields.last_checkpoint': 1, + 'updateDescription.updatedFields.last_checkpoint_lsn': 1, + 'fullDocument._id': 1, + 'fullDocument.state': 1, + 'fullDocument.last_checkpoint': 1, + 'fullDocument.last_checkpoint_lsn': 1 + } + } + ]; + return pipeline; + } + + async getCheckpointChanges(options: GetCheckpointChangesOptions): Promise { + return CHECKPOINT_INVALIDATE_ALL; + } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index b24e9595e..f12447613 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -141,6 +141,11 @@ export interface SyncRuleDocument { content: string; } +export type SyncRuleCheckpointState = Pick< + SyncRuleDocument, + 'last_checkpoint' | 'last_checkpoint_lsn' | '_id' | 'state' +>; + export interface CustomWriteCheckpointDocument { _id: bson.ObjectId; user_id: string; diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index 2617671e5..1352586ae 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -19,7 +19,7 @@ describe('sync - mongodb', () => { - SELECT id, description FROM "%" ` ); - using factory = await INITIALIZED_MONGO_STORAGE_FACTORY(); + await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY(); const bucketStorage = factory.getInstance(sync_rules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 90763fd0f..a2cc07187 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -1,5 +1,5 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { ActiveCheckpoint, BucketStorageFactory, OpId, SyncRulesBucketStorage } from '@powersync/service-core'; +import { BucketStorageFactory, OpId, ReplicationCheckpoint, SyncRulesBucketStorage } from '@powersync/service-core'; import { test_utils } from '@powersync/service-core-tests'; import { ChangeStream, ChangeStreamOptions } from '@module/replication/ChangeStream.js'; @@ -138,7 +138,7 @@ export class ChangeStreamTestContext { export async function getClientCheckpoint( client: mongo.MongoClient, db: mongo.Db, - bucketStorage: BucketStorageFactory, + storageFactory: BucketStorageFactory, options?: { timeout?: number } ): Promise { const start = Date.now(); @@ -147,14 +147,15 @@ export async function getClientCheckpoint( // Since we don't use LSNs anymore, the only way to get that is to wait. const timeout = options?.timeout ?? 50_000; - let lastCp: ActiveCheckpoint | null = null; + let lastCp: ReplicationCheckpoint | null = null; while (Date.now() - start < timeout) { - const cp = await bucketStorage.getActiveCheckpoint(); - lastCp = cp; - if (!cp.hasSyncRules()) { + const storage = await storageFactory.getActiveStorage(); + const cp = await storage?.getCheckpoint(); + if (cp == null) { throw new Error('No sync rules available'); } + lastCp = cp; if (cp.lsn && cp.lsn >= lsn) { return cp.checkpoint; } diff --git a/modules/module-mysql/test/src/BinlogStreamUtils.ts b/modules/module-mysql/test/src/BinlogStreamUtils.ts index 8b4c331c0..36da91f06 100644 --- a/modules/module-mysql/test/src/BinlogStreamUtils.ts +++ b/modules/module-mysql/test/src/BinlogStreamUtils.ts @@ -3,10 +3,10 @@ import { BinLogStream, BinLogStreamOptions } from '@module/replication/BinLogStr import { MySQLConnectionManager } from '@module/replication/MySQLConnectionManager.js'; import { logger } from '@powersync/lib-services-framework'; import { - ActiveCheckpoint, BucketStorageFactory, OpId, OplogEntry, + ReplicationCheckpoint, storage, SyncRulesBucketStorage } from '@powersync/service-core'; @@ -148,7 +148,7 @@ export class BinlogStreamTestContext { export async function getClientCheckpoint( connection: mysqlPromise.Connection, - bucketStorage: BucketStorageFactory, + storageFactory: BucketStorageFactory, options?: { timeout?: number } ): Promise { const start = Date.now(); @@ -157,16 +157,16 @@ export async function getClientCheckpoint( // Since we don't use LSNs anymore, the only way to get that is to wait. const timeout = options?.timeout ?? 50_000; - let lastCp: ActiveCheckpoint | null = null; + let lastCp: ReplicationCheckpoint | null = null; logger.info('Expected Checkpoint: ' + gtid.comparable); while (Date.now() - start < timeout) { - const cp = await bucketStorage.getActiveCheckpoint(); - lastCp = cp; - //logger.info('Last Checkpoint: ' + lastCp.lsn); - if (!cp.hasSyncRules()) { + const storage = await storageFactory.getActiveStorage(); + const cp = await storage?.getCheckpoint(); + if (cp == null) { throw new Error('No sync rules available'); } + lastCp = cp; if (cp.lsn && cp.lsn >= gtid.comparable) { return cp.checkpoint; } diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index b9a70e6c5..2ef50a697 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -1,11 +1,8 @@ import * as framework from '@powersync/lib-services-framework'; -import { storage, sync, UpdateSyncRulesOptions, utils } from '@powersync/service-core'; +import { storage, SyncRulesBucketStorage, UpdateSyncRulesOptions } from '@powersync/service-core'; import * as pg_wire from '@powersync/service-jpgwire'; import * as sync_rules from '@powersync/service-sync-rules'; import crypto from 'crypto'; -import { wrapWithAbort } from 'ix/asynciterable/operators/withabort.js'; -import { LRUCache } from 'lru-cache/min'; -import * as timers from 'timers/promises'; import * as uuid from 'uuid'; import * as lib_postgres from '@powersync/lib-service-postgres'; @@ -22,38 +19,13 @@ export type PostgresBucketStorageOptions = { }; export class PostgresBucketStorageFactory - extends framework.DisposableObserver + extends framework.BaseObserver implements storage.BucketStorageFactory { readonly db: lib_postgres.DatabaseClient; public readonly slot_name_prefix: string; - private sharedIterator = new sync.BroadcastIterable((signal) => this.watchActiveCheckpoint(signal)); - - private readonly storageCache = new LRUCache({ - max: 3, - fetchMethod: async (id) => { - const syncRulesRow = await this.db.sql` - SELECT - * - FROM - sync_rules - WHERE - id = ${{ value: id, type: 'int4' }} - ` - .decoded(models.SyncRules) - .first(); - if (syncRulesRow == null) { - // Deleted in the meantime? - return undefined; - } - const rules = new PostgresPersistedSyncRulesContent(this.db, syncRulesRow); - return this.getInstance(rules); - }, - dispose: (storage) => { - storage[Symbol.dispose](); - } - }); + private activeStorageCache: storage.SyncRulesBucketStorage | undefined; constructor(protected options: PostgresBucketStorageOptions) { super(); @@ -70,7 +42,6 @@ export class PostgresBucketStorageFactory } async [Symbol.asyncDispose]() { - super[Symbol.dispose](); await this.db[Symbol.asyncDispose](); } @@ -89,8 +60,7 @@ export class PostgresBucketStorageFactory this.iterateListeners((cb) => cb.syncStorageCreated?.(storage)); storage.registerListener({ batchStarted: (batch) => { - // This nested listener will be automatically disposed when the storage is disposed - batch.registerManagedListener(storage, { + batch.registerListener({ replicationEvent: (payload) => this.iterateListeners((cb) => cb.replicationEvent?.(payload)) }); } @@ -382,126 +352,21 @@ export class PostgresBucketStorageFactory return rows.map((row) => new PostgresPersistedSyncRulesContent(this.db, row)); } - async getActiveCheckpoint(): Promise { - const activeCheckpoint = await this.db.sql` - SELECT - id, - last_checkpoint, - last_checkpoint_lsn - FROM - sync_rules - WHERE - state = ${{ value: storage.SyncRuleState.ACTIVE, type: 'varchar' }} - ORDER BY - id DESC - LIMIT - 1 - ` - .decoded(models.ActiveCheckpoint) - .first(); - - return this.makeActiveCheckpoint(activeCheckpoint); - } - - async *watchWriteCheckpoint(user_id: string, signal: AbortSignal): AsyncIterable { - let lastCheckpoint: utils.OpId | null = null; - let lastWriteCheckpoint: bigint | null = null; - - const iter = wrapWithAbort(this.sharedIterator, signal); - for await (const cp of iter) { - const { checkpoint, lsn } = cp; - - // lsn changes are not important by itself. - // What is important is: - // 1. checkpoint (op_id) changes. - // 2. write checkpoint changes for the specific user - const bucketStorage = await cp.getBucketStorage(); - if (!bucketStorage) { - continue; - } - - const lsnFilters: Record = lsn ? { 1: lsn } : {}; - - const currentWriteCheckpoint = await bucketStorage.lastWriteCheckpoint({ - user_id, - heads: { - ...lsnFilters - } - }); - - if (currentWriteCheckpoint == lastWriteCheckpoint && checkpoint == lastCheckpoint) { - // No change - wait for next one - // In some cases, many LSNs may be produced in a short time. - // Add a delay to throttle the write checkpoint lookup a bit. - await timers.setTimeout(20 + 10 * Math.random()); - continue; - } - - lastWriteCheckpoint = currentWriteCheckpoint; - lastCheckpoint = checkpoint; - - yield { base: cp, writeCheckpoint: currentWriteCheckpoint }; + async getActiveStorage(): Promise { + const content = await this.getActiveSyncRulesContent(); + if (content == null) { + return null; } - } - - protected async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable { - const doc = await this.db.sql` - SELECT - id, - last_checkpoint, - last_checkpoint_lsn - FROM - sync_rules - WHERE - state = ${{ type: 'varchar', value: storage.SyncRuleState.ACTIVE }} - LIMIT - 1 - ` - .decoded(models.ActiveCheckpoint) - .first(); - - const sink = new sync.LastValueSink(undefined); - const disposeListener = this.db.registerListener({ - notification: (notification) => sink.next(notification.payload) - }); - - signal.addEventListener('aborted', async () => { - disposeListener(); - sink.complete(); - }); - - yield this.makeActiveCheckpoint(doc); - - let lastOp: storage.ActiveCheckpoint | null = null; - for await (const payload of sink.withSignal(signal)) { - if (signal.aborted) { - return; - } - - const notification = models.ActiveCheckpointNotification.decode(payload); - const activeCheckpoint = this.makeActiveCheckpoint(notification.active_checkpoint); - - if (lastOp == null || activeCheckpoint.lsn != lastOp.lsn || activeCheckpoint.checkpoint != lastOp.checkpoint) { - lastOp = activeCheckpoint; - yield activeCheckpoint; - } + // It is important that this instance is cached. + // Not for the instance construction itself, but to ensure that internal caches on the instance + // are re-used properly. + if (this.activeStorageCache?.group_id == content.id) { + return this.activeStorageCache; + } else { + const instance = this.getInstance(content); + this.activeStorageCache = instance; + return instance; } } - - private makeActiveCheckpoint(row: models.ActiveCheckpointDecoded | null) { - return { - checkpoint: utils.timestampToOpId(row?.last_checkpoint ?? 0n), - lsn: row?.last_checkpoint_lsn ?? null, - hasSyncRules() { - return row != null; - }, - getBucketStorage: async () => { - if (row == null) { - return null; - } - return (await this.storageCache.fetch(Number(row.id))) ?? null; - } - } satisfies storage.ActiveCheckpoint; - } } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 47735bd88..3edcb6825 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -1,6 +1,15 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; -import { DisposableObserver, ReplicationAssertionError } from '@powersync/lib-services-framework'; -import { storage, utils } from '@powersync/service-core'; +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { + BroadcastIterable, + CHECKPOINT_INVALIDATE_ALL, + CheckpointChanges, + GetCheckpointChangesOptions, + LastValueSink, + storage, + utils, + WatchWriteCheckpointOptions +} from '@powersync/service-core'; import { JSONBig } from '@powersync/service-jsonbig'; import * as sync_rules from '@powersync/service-sync-rules'; import * as uuid from 'uuid'; @@ -8,7 +17,9 @@ import { BIGINT_MAX } from '../types/codecs.js'; import { models, RequiredOperationBatchLimits } from '../types/types.js'; import { replicaIdToSubkey } from '../utils/bson.js'; import { mapOpEntry } from '../utils/bucket-data.js'; +import * as timers from 'timers/promises'; +import * as framework from '@powersync/lib-services-framework'; import { StatementParam } from '@powersync/service-jpgwire'; import { StoredRelationId } from '../types/models/SourceTable.js'; import { pick } from '../utils/ts-codec.js'; @@ -16,6 +27,7 @@ import { PostgresBucketBatch } from './batch/PostgresBucketBatch.js'; import { PostgresWriteCheckpointAPI } from './checkpoints/PostgresWriteCheckpointAPI.js'; import { PostgresBucketStorageFactory } from './PostgresBucketStorageFactory.js'; import { PostgresCompactor } from './PostgresCompactor.js'; +import { wrapWithAbort } from 'ix/asynciterable/operators/withabort.js'; export type PostgresSyncRulesStorageOptions = { factory: PostgresBucketStorageFactory; @@ -26,7 +38,7 @@ export type PostgresSyncRulesStorageOptions = { }; export class PostgresSyncRulesStorage - extends DisposableObserver + extends framework.BaseObserver implements storage.SyncRulesBucketStorage { public readonly group_id: number; @@ -34,6 +46,8 @@ export class PostgresSyncRulesStorage public readonly slot_name: string; public readonly factory: PostgresBucketStorageFactory; + private sharedIterator = new BroadcastIterable((signal) => this.watchActiveCheckpoint(signal)); + protected db: lib_postgres.DatabaseClient; protected writeCheckpointAPI: PostgresWriteCheckpointAPI; @@ -272,7 +286,7 @@ export class PostgresSyncRulesStorage const checkpoint_lsn = syncRules?.last_checkpoint_lsn ?? null; - await using batch = new PostgresBucketBatch({ + const batch = new PostgresBucketBatch({ db: this.db, sync_rules: this.sync_rules.parsed(options).sync_rules, group_id: this.group_id, @@ -663,4 +677,137 @@ export class PostgresSyncRulesStorage }) ); } + + async getActiveCheckpoint(): Promise { + const activeCheckpoint = await this.db.sql` + SELECT + id, + last_checkpoint, + last_checkpoint_lsn + FROM + sync_rules + WHERE + state = ${{ value: storage.SyncRuleState.ACTIVE, type: 'varchar' }} + ORDER BY + id DESC + LIMIT + 1 + ` + .decoded(models.ActiveCheckpoint) + .first(); + + return this.makeActiveCheckpoint(activeCheckpoint); + } + + async *watchWriteCheckpoint(options: WatchWriteCheckpointOptions): AsyncIterable { + let lastCheckpoint: utils.OpId | null = null; + let lastWriteCheckpoint: bigint | null = null; + + const { signal, user_id } = options; + + const iter = wrapWithAbort(this.sharedIterator, signal); + for await (const cp of iter) { + const { checkpoint, lsn } = cp; + + // lsn changes are not important by itself. + // What is important is: + // 1. checkpoint (op_id) changes. + // 2. write checkpoint changes for the specific user + const lsnFilters: Record = lsn ? { 1: lsn } : {}; + + const currentWriteCheckpoint = await this.lastWriteCheckpoint({ + user_id, + heads: { + ...lsnFilters + } + }); + + if (currentWriteCheckpoint == lastWriteCheckpoint && checkpoint == lastCheckpoint) { + // No change - wait for next one + // In some cases, many LSNs may be produced in a short time. + // Add a delay to throttle the write checkpoint lookup a bit. + await timers.setTimeout(20 + 10 * Math.random()); + continue; + } + + lastWriteCheckpoint = currentWriteCheckpoint; + lastCheckpoint = checkpoint; + + yield { + base: cp, + writeCheckpoint: currentWriteCheckpoint, + update: CHECKPOINT_INVALIDATE_ALL + }; + } + } + + protected async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable { + const doc = await this.db.sql` + SELECT + id, + last_checkpoint, + last_checkpoint_lsn + FROM + sync_rules + WHERE + state = ${{ type: 'varchar', value: storage.SyncRuleState.ACTIVE }} + LIMIT + 1 + ` + .decoded(models.ActiveCheckpoint) + .first(); + + if (doc == null) { + // Abort the connections - clients will have to retry later. + throw new framework.ServiceError(framework.ErrorCode.PSYNC_S2302, 'No active sync rules available'); + } + + const sink = new LastValueSink(undefined); + + const disposeListener = this.db.registerListener({ + notification: (notification) => sink.next(notification.payload) + }); + + signal.addEventListener('aborted', async () => { + disposeListener(); + sink.complete(); + }); + + yield this.makeActiveCheckpoint(doc); + + let lastOp: storage.ReplicationCheckpoint | null = null; + for await (const payload of sink.withSignal(signal)) { + if (signal.aborted) { + return; + } + + const notification = models.ActiveCheckpointNotification.decode(payload); + if (notification.active_checkpoint == null) { + continue; + } + if (Number(notification.active_checkpoint.id) != doc.id) { + // Active sync rules changed - abort and restart the stream + break; + } + + const activeCheckpoint = this.makeActiveCheckpoint(notification.active_checkpoint); + + if (lastOp == null || activeCheckpoint.lsn != lastOp.lsn || activeCheckpoint.checkpoint != lastOp.checkpoint) { + lastOp = activeCheckpoint; + yield activeCheckpoint; + } + } + } + + async getCheckpointChanges(options: GetCheckpointChangesOptions): Promise { + // We do not track individual changes yet + return CHECKPOINT_INVALIDATE_ALL; + } + + private makeActiveCheckpoint(row: models.ActiveCheckpointDecoded | null) { + return { + checkpoint: utils.timestampToOpId(row?.last_checkpoint ?? 0n), + lsn: row?.last_checkpoint_lsn ?? null + } satisfies storage.ReplicationCheckpoint; + } } diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 1a3b98c39..edbc5c76a 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -1,7 +1,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { + BaseObserver, container, - DisposableObserver, ErrorCode, errors, logger, @@ -51,7 +51,7 @@ type StatefulCheckpointDecoded = t.Decoded; const MAX_ROW_SIZE = 15 * 1024 * 1024; export class PostgresBucketBatch - extends DisposableObserver + extends BaseObserver implements storage.BucketStorageBatch { public last_flushed_op: bigint | null = null; @@ -87,6 +87,10 @@ export class PostgresBucketBatch return this.last_checkpoint_lsn; } + async [Symbol.asyncDispose]() { + super.clearListeners(); + } + async save(record: storage.SaveOptions): Promise { // TODO maybe share with abstract class const { after, before, sourceTable, tag } = record; diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 5977ec220..2aa5a2605 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -26,7 +26,7 @@ describe('Postgres Sync Bucket Storage', () => { - SELECT id, description FROM "%" ` ); - using factory = await POSTGRES_STORAGE_FACTORY(); + await using factory = await POSTGRES_STORAGE_FACTORY(); const bucketStorage = factory.getInstance(sync_rules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { diff --git a/modules/module-postgres/test/src/checkpoints.test.ts b/modules/module-postgres/test/src/checkpoints.test.ts index 84907cdea..0f60e3ed8 100644 --- a/modules/module-postgres/test/src/checkpoints.test.ts +++ b/modules/module-postgres/test/src/checkpoints.test.ts @@ -25,13 +25,14 @@ describe('checkpoint tests', () => { await context.replicateSnapshot(); context.startStreaming(); + const storage = context.storage!; const controller = new AbortController(); try { - const stream = context.factory.watchWriteCheckpoint( - checkpointUserId('test_user', 'test_client'), - controller.signal - ); + const stream = storage.watchWriteCheckpoint({ + user_id: checkpointUserId('test_user', 'test_client'), + signal: controller.signal + }); let lastWriteCheckpoint: bigint | null = null; diff --git a/modules/module-postgres/test/src/slow_tests.test.ts b/modules/module-postgres/test/src/slow_tests.test.ts index 438d3bb98..fae84be08 100644 --- a/modules/module-postgres/test/src/slow_tests.test.ts +++ b/modules/module-postgres/test/src/slow_tests.test.ts @@ -71,21 +71,13 @@ function defineSlowTests(factory: storage.TestStorageFactory) { // Past issues that this could reproduce intermittently: // * Skipping LSNs after a keepalive message // * Skipping LSNs when source transactions overlap - test( - 'repeated replication - basic', - async () => { - await testRepeatedReplication({ compact: false, maxBatchSize: 50, numBatches: 5 }); - }, - { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS } - ); - - test( - 'repeated replication - compacted', - async () => { - await testRepeatedReplication({ compact: true, maxBatchSize: 100, numBatches: 2 }); - }, - { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS } - ); + test('repeated replication - basic', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { + await testRepeatedReplication({ compact: false, maxBatchSize: 50, numBatches: 5 }); + }); + + test('repeated replication - compacted', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { + await testRepeatedReplication({ compact: true, maxBatchSize: 100, numBatches: 2 }); + }); async function testRepeatedReplication(testOptions: { compact: boolean; maxBatchSize: number; numBatches: number }) { const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); @@ -101,7 +93,7 @@ bucket_definitions: - SELECT * FROM "test_data" `; const syncRules = await f.updateSyncRules({ content: syncRuleContent }); - using storage = f.getInstance(syncRules); + const storage = f.getInstance(syncRules); abortController = new AbortController(); const options: WalStreamOptions = { abort_signal: abortController.signal, @@ -314,116 +306,112 @@ bucket_definitions: // // If the first LSN does not correctly match with the first replication transaction, // we may miss some updates. - test( - 'repeated initial replication', - async () => { - const pool = await connectPgPool(); - await clearTestDb(pool); - await using f = await factory(); - - const syncRuleContent = ` + test('repeated initial replication', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { + const pool = await connectPgPool(); + await clearTestDb(pool); + await using f = await factory(); + + const syncRuleContent = ` bucket_definitions: global: data: - SELECT id, description FROM "test_data" `; - const syncRules = await f.updateSyncRules({ content: syncRuleContent }); - using storage = f.getInstance(syncRules); + const syncRules = await f.updateSyncRules({ content: syncRuleContent }); + const storage = f.getInstance(syncRules); - // 1. Setup some base data that will be replicated in initial replication - await pool.query(`CREATE TABLE test_data(id uuid primary key default uuid_generate_v4(), description text)`); + // 1. Setup some base data that will be replicated in initial replication + await pool.query(`CREATE TABLE test_data(id uuid primary key default uuid_generate_v4(), description text)`); - let statements: pgwire.Statement[] = []; + let statements: pgwire.Statement[] = []; - const n = Math.floor(Math.random() * 200); - for (let i = 0; i < n; i++) { - statements.push({ - statement: `INSERT INTO test_data(description) VALUES('test_init')` - }); - } - await pool.query(...statements); - - const start = Date.now(); - let i = 0; - - while (Date.now() - start < TEST_DURATION_MS) { - // 2. Each iteration starts with a clean slate - await pool.query(`SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE active = FALSE`); - i += 1; - - const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); - const replicationConnection = await connections.replicationConnection(); - - abortController = new AbortController(); - const options: WalStreamOptions = { - abort_signal: abortController.signal, - connections, - storage: storage - }; - walStream = new WalStream(options); - - await storage.clear(); - - // 3. Start initial replication, then streaming, but don't wait for any of this - let initialReplicationDone = false; - streamPromise = (async () => { - await walStream.initReplication(replicationConnection); - await storage.autoActivate(); + const n = Math.floor(Math.random() * 200); + for (let i = 0; i < n; i++) { + statements.push({ + statement: `INSERT INTO test_data(description) VALUES('test_init')` + }); + } + await pool.query(...statements); + + const start = Date.now(); + let i = 0; + + while (Date.now() - start < TEST_DURATION_MS) { + // 2. Each iteration starts with a clean slate + await pool.query(`SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE active = FALSE`); + i += 1; + + const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); + const replicationConnection = await connections.replicationConnection(); + + abortController = new AbortController(); + const options: WalStreamOptions = { + abort_signal: abortController.signal, + connections, + storage: storage + }; + walStream = new WalStream(options); + + await storage.clear(); + + // 3. Start initial replication, then streaming, but don't wait for any of this + let initialReplicationDone = false; + streamPromise = (async () => { + await walStream.initReplication(replicationConnection); + await storage.autoActivate(); + initialReplicationDone = true; + await walStream.streamChanges(replicationConnection); + })() + .catch((e) => { initialReplicationDone = true; - await walStream.streamChanges(replicationConnection); - })() - .catch((e) => { - initialReplicationDone = true; - throw e; - }) - .then((v) => { - return v; - }); + throw e; + }) + .then((v) => { + return v; + }); - // 4. While initial replication is still running, write more changes - while (!initialReplicationDone) { - let statements: pgwire.Statement[] = []; - const n = Math.floor(Math.random() * 10) + 1; - for (let i = 0; i < n; i++) { - const description = `test${i}`; - statements.push({ - statement: `INSERT INTO test_data(description) VALUES('test1') returning id as test_id`, - params: [{ type: 'varchar', value: description }] - }); - } - const results = await pool.query(...statements); - const ids = results.results.map((sub) => { - return sub.rows[0][0] as string; - }); - await new Promise((resolve) => setTimeout(resolve, Math.random() * 30)); - const deleteStatements: pgwire.Statement[] = ids.map((id) => { - return { - statement: `DELETE FROM test_data WHERE id = $1`, - params: [{ type: 'uuid', value: id }] - }; + // 4. While initial replication is still running, write more changes + while (!initialReplicationDone) { + let statements: pgwire.Statement[] = []; + const n = Math.floor(Math.random() * 10) + 1; + for (let i = 0; i < n; i++) { + const description = `test${i}`; + statements.push({ + statement: `INSERT INTO test_data(description) VALUES('test1') returning id as test_id`, + params: [{ type: 'varchar', value: description }] }); - await pool.query(...deleteStatements); - await new Promise((resolve) => setTimeout(resolve, Math.random() * 10)); - } - - // 5. Once initial replication is done, wait for the streaming changes to complete syncing. - // getClientCheckpoint() effectively waits for the above replication to complete - // Race with streamingPromise to catch replication errors here. - let checkpoint = await Promise.race([ - getClientCheckpoint(pool, storage.factory, { timeout: TIMEOUT_MARGIN_MS }), - streamPromise - ]); - if (typeof checkpoint == undefined) { - // This indicates an issue with the test setup - streamingPromise completed instead - // of getClientCheckpoint() - throw new Error('Test failure - streamingPromise completed'); } + const results = await pool.query(...statements); + const ids = results.results.map((sub) => { + return sub.rows[0][0] as string; + }); + await new Promise((resolve) => setTimeout(resolve, Math.random() * 30)); + const deleteStatements: pgwire.Statement[] = ids.map((id) => { + return { + statement: `DELETE FROM test_data WHERE id = $1`, + params: [{ type: 'uuid', value: id }] + }; + }); + await pool.query(...deleteStatements); + await new Promise((resolve) => setTimeout(resolve, Math.random() * 10)); + } - abortController.abort(); - await streamPromise; - await connections.end(); + // 5. Once initial replication is done, wait for the streaming changes to complete syncing. + // getClientCheckpoint() effectively waits for the above replication to complete + // Race with streamingPromise to catch replication errors here. + let checkpoint = await Promise.race([ + getClientCheckpoint(pool, storage.factory, { timeout: TIMEOUT_MARGIN_MS }), + streamPromise + ]); + if (typeof checkpoint == undefined) { + // This indicates an issue with the test setup - streamingPromise completed instead + // of getClientCheckpoint() + throw new Error('Test failure - streamingPromise completed'); } - }, - { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS } - ); + + abortController.abort(); + await streamPromise; + await connections.end(); + } + }); } diff --git a/modules/module-postgres/test/src/util.ts b/modules/module-postgres/test/src/util.ts index 0f3ad3519..0a8c76696 100644 --- a/modules/module-postgres/test/src/util.ts +++ b/modules/module-postgres/test/src/util.ts @@ -62,7 +62,7 @@ export function connectPgPool() { export async function getClientCheckpoint( db: pgwire.PgClient, - bucketStorage: BucketStorageFactory, + storageFactory: BucketStorageFactory, options?: { timeout?: number } ): Promise { const start = Date.now(); @@ -77,8 +77,9 @@ export async function getClientCheckpoint( logger.info(`Waiting for LSN checkpoint: ${lsn}`); while (Date.now() - start < timeout) { - const cp = await bucketStorage.getActiveCheckpoint(); - if (!cp.hasSyncRules()) { + const storage = await storageFactory.getActiveStorage(); + const cp = await storage?.getCheckpoint(); + if (cp == null) { throw new Error('No sync rules available'); } if (cp.lsn && cp.lsn >= lsn) { diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 25af4347f..459e0cee0 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -45,7 +45,6 @@ export class WalStreamTestContext implements AsyncDisposable { this.abortController.abort(); await this.streamPromise; await this.connectionManager.destroy(); - this.storage?.[Symbol.dispose](); await this.factory?.[Symbol.asyncDispose](); } diff --git a/packages/service-core-tests/src/tests/register-data-storage-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-tests.ts index a67ae9cab..53743f1fa 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-tests.ts @@ -140,7 +140,7 @@ bucket_definitions: ); await using factory = await generateStorageFactory(); - await using bucketStorage = factory.getInstance(sync_rules); + const bucketStorage = factory.getInstance(sync_rules); const table = test_utils.makeTestTable('todos', ['id', 'list_id']); @@ -394,11 +394,10 @@ bucket_definitions: const parameter_sets = await bucketStorage.getParameterSets(checkpoint, lookups); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }]); - const buckets = await sync_rules.queryBucketDescriptions({ + const buckets = await sync_rules.getBucketParameterQuerier(parameters).queryDynamicBucketDescriptions({ getParameterSets(lookups) { return bucketStorage.getParameterSets(checkpoint, lookups); - }, - parameters + } }); expect(buckets).toEqual([{ bucket: 'by_workspace["workspace1"]', priority: 3 }]); }); @@ -466,11 +465,10 @@ bucket_definitions: parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }, { workspace_id: 'workspace3' }]); - const buckets = await sync_rules.queryBucketDescriptions({ + const buckets = await sync_rules.getBucketParameterQuerier(parameters).queryDynamicBucketDescriptions({ getParameterSets(lookups) { return bucketStorage.getParameterSets(checkpoint, lookups); - }, - parameters + } }); buckets.sort((a, b) => a.bucket.localeCompare(b.bucket)); expect(buckets).toEqual([ @@ -566,11 +564,10 @@ bucket_definitions: // Test final values - the important part const buckets = ( - await sync_rules.queryBucketDescriptions({ + await sync_rules.getBucketParameterQuerier(parameters).queryDynamicBucketDescriptions({ getParameterSets(lookups) { return bucketStorage.getParameterSets(checkpoint, lookups); - }, - parameters + } }) ).map((e) => e.bucket); buckets.sort(); @@ -1420,84 +1417,6 @@ bucket_definitions: expect(test_utils.getBatchMeta(batch3)).toEqual(null); }); - test('batch should be disposed automatically', async () => { - const sync_rules = test_utils.testRules(` - bucket_definitions: - global: - data: [] - `); - - await using factory = await generateStorageFactory(); - const bucketStorage = factory.getInstance(sync_rules); - - let isDisposed = false; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.registerListener({ - disposed: () => { - isDisposed = true; - } - }); - }); - expect(isDisposed).true; - - isDisposed = false; - let errorCaught = false; - try { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.registerListener({ - disposed: () => { - isDisposed = true; - } - }); - throw new Error(`Testing exceptions`); - }); - } catch (ex) { - errorCaught = true; - expect(ex.message.includes('Testing')).true; - } - expect(errorCaught).true; - expect(isDisposed).true; - }); - - test('batch should be disposed automatically', async () => { - const sync_rules = test_utils.testRules(` - bucket_definitions: - global: - data: [] - `); - - await using factory = await generateStorageFactory(); - const bucketStorage = factory.getInstance(sync_rules); - - let isDisposed = false; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.registerListener({ - disposed: () => { - isDisposed = true; - } - }); - }); - expect(isDisposed).true; - - isDisposed = false; - let errorCaught = false; - try { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.registerListener({ - disposed: () => { - isDisposed = true; - } - }); - throw new Error(`Testing exceptions`); - }); - } catch (ex) { - errorCaught = true; - expect(ex.message.includes('Testing')).true; - } - expect(errorCaught).true; - expect(isDisposed).true; - }); - test('empty storage metrics', async () => { await using f = await generateStorageFactory({ dropAll: true }); const metrics = await f.getStorageMetrics(); diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index d96dc34ad..72548e6ac 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -67,13 +67,13 @@ export function registerSyncTests(factory: storage.TestStorageFactory) { }); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any @@ -128,13 +128,13 @@ bucket_definitions: }); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any @@ -191,42 +191,58 @@ bucket_definitions: }); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any }); let sentCheckpoints = 0; - for await (const next of stream) { + let sentRows = 0; + + for await (let next of stream) { + if (typeof next == 'string') { + next = JSON.parse(next); + } if (typeof next === 'object' && next !== null) { if ('partial_checkpoint_complete' in next) { - expect(sentCheckpoints).toBe(1); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add another high-priority row. This should interrupt the long-running low-priority sync. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'Another high-priority row' - }, - afterReplicaId: 'highprio2' + if (sentCheckpoints == 1) { + // Save new data to interrupt the low-priority sync. + + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + // Add another high-priority row. This should interrupt the long-running low-priority sync. + await batch.save({ + sourceTable: TEST_TABLE, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'Another high-priority row' + }, + afterReplicaId: 'highprio2' + }); + + await batch.commit('0/2'); }); - - await batch.commit('0/2'); - }); + } else { + // Low-priority sync from the first checkpoint was interrupted. This should not happen before + // 1000 low-priority items were synchronized. + expect(sentCheckpoints).toBe(2); + expect(sentRows).toBeGreaterThan(1000); + } } if ('checkpoint' in next || 'checkpoint_diff' in next) { sentCheckpoints += 1; } + + if ('data' in next) { + sentRows += next.data.data.length; + } if ('checkpoint_complete' in next) { break; } @@ -234,6 +250,7 @@ bucket_definitions: } expect(sentCheckpoints).toBe(2); + expect(sentRows).toBe(10002); }); test('sync legacy non-raw data', async () => { @@ -262,13 +279,13 @@ bucket_definitions: }); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: false }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any @@ -287,17 +304,17 @@ bucket_definitions: content: BASIC_SYNC_RULES }); - const storage = await f.getInstance(syncRules); - await storage.autoActivate(); + const bucketStorage = await f.getInstance(syncRules); + await bucketStorage.autoActivate(); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: 0 } as any @@ -318,13 +335,13 @@ bucket_definitions: await bucketStorage.autoActivate(); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any @@ -375,19 +392,19 @@ bucket_definitions: content: BASIC_SYNC_RULES }); - const storage = await f.getInstance(syncRules); - await storage.autoActivate(); + const bucketStorage = await f.getInstance(syncRules); + await bucketStorage.autoActivate(); const exp = Date.now() / 1000 + 0.1; const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: exp } as any @@ -441,13 +458,13 @@ bucket_definitions: }); const stream = sync.streamResponse({ - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: '' }, {}), token: { exp: Date.now() / 1000 + 10 } as any @@ -564,13 +581,13 @@ bucket_definitions: }); const params: sync.SyncStreamParameters = { - storage: f, + bucketStorage: bucketStorage, + syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, raw_data: true }, - parseOptions: test_utils.PARSE_OPTIONS, tracker, syncParams: new RequestParameters({ sub: 'test' }, {}), token: { sub: 'test', exp: Date.now() / 1000 + 10 } as any diff --git a/packages/service-core/src/api/RouteAPI.ts b/packages/service-core/src/api/RouteAPI.ts index 4cebc47a5..14967dba6 100644 --- a/packages/service-core/src/api/RouteAPI.ts +++ b/packages/service-core/src/api/RouteAPI.ts @@ -1,6 +1,6 @@ import { SqlSyncRules, TablePattern } from '@powersync/service-sync-rules'; import * as types from '@powersync/service-types'; -import { ParseSyncRulesOptions, SyncRulesBucketStorage } from '../storage/BucketStorage.js'; +import { ParseSyncRulesOptions, SyncRulesBucketStorage } from '../storage/storage-index.js'; export interface PatternResult { schema: string; diff --git a/packages/service-core/src/api/diagnostics.ts b/packages/service-core/src/api/diagnostics.ts index 72231c9ce..3d562e5fd 100644 --- a/packages/service-core/src/api/diagnostics.ts +++ b/packages/service-core/src/api/diagnostics.ts @@ -57,7 +57,7 @@ export async function getSyncRulesStatus( // This method can run under some situations if no connection is configured yet. // It will return a default tag in such a case. This default tag is not module specific. const tag = sourceConfig.tag ?? DEFAULT_TAG; - using systemStorage = live_status ? bucketStorage.getInstance(sync_rules) : undefined; + const systemStorage = live_status ? bucketStorage.getInstance(sync_rules) : undefined; const status = await systemStorage?.getStatus(); let replication_lag_bytes: number | undefined = undefined; diff --git a/packages/service-core/src/entry/commands/compact-action.ts b/packages/service-core/src/entry/commands/compact-action.ts index 31f49e7fb..ae50a8f03 100644 --- a/packages/service-core/src/entry/commands/compact-action.ts +++ b/packages/service-core/src/entry/commands/compact-action.ts @@ -50,14 +50,13 @@ export function registerCompactAction(program: Command) { await serviceContext.lifeCycleEngine.start(); const bucketStorage = serviceContext.storageEngine.activeBucketStorage; - const active = await bucketStorage.getActiveSyncRulesContent(); + const active = await bucketStorage.getActiveStorage(); if (active == null) { logger.info('No active instance to compact'); return; } - using p = bucketStorage.getInstance(active); logger.info('Performing compaction...'); - await p.compact({ memoryLimitMB: COMPACT_MEMORY_LIMIT_MB, compactBuckets: buckets }); + await active.compact({ memoryLimitMB: COMPACT_MEMORY_LIMIT_MB, compactBuckets: buckets }); logger.info('Successfully compacted storage.'); } catch (e) { logger.error(`Failed to compact: ${e.toString()}`); diff --git a/packages/service-core/src/modules/AbstractModule.ts b/packages/service-core/src/modules/AbstractModule.ts index 0cdb8c626..a6eecd32e 100644 --- a/packages/service-core/src/modules/AbstractModule.ts +++ b/packages/service-core/src/modules/AbstractModule.ts @@ -1,7 +1,7 @@ import { ServiceContextContainer } from '../system/ServiceContext.js'; import { logger } from '@powersync/lib-services-framework'; import winston from 'winston'; -import { PersistedSyncRulesContent } from '../storage/BucketStorage.js'; +import { PersistedSyncRulesContent } from '../storage/storage-index.js'; export interface TearDownOptions { /** diff --git a/packages/service-core/src/replication/AbstractReplicator.ts b/packages/service-core/src/replication/AbstractReplicator.ts index dd505da7b..b219a3265 100644 --- a/packages/service-core/src/replication/AbstractReplicator.ts +++ b/packages/service-core/src/replication/AbstractReplicator.ts @@ -199,7 +199,6 @@ export abstract class AbstractReplicator= head) { diff --git a/packages/service-core/src/routes/endpoints/socket-route.ts b/packages/service-core/src/routes/endpoints/socket-route.ts index eca069324..db9b841a4 100644 --- a/packages/service-core/src/routes/endpoints/socket-route.ts +++ b/packages/service-core/src/routes/endpoints/socket-route.ts @@ -49,9 +49,9 @@ export const syncStreamReactive: SocketRouteGenerator = (router) => const { storageEngine: { activeBucketStorage } } = service_context; - // Sanity check before we start the stream - const cp = await activeBucketStorage.getActiveCheckpoint(); - if (!cp.hasSyncRules()) { + + const bucketStorage = await activeBucketStorage.getActiveStorage(); + if (bucketStorage == null) { responder.onError( new errors.ServiceError({ status: 500, @@ -63,6 +63,8 @@ export const syncStreamReactive: SocketRouteGenerator = (router) => return; } + const syncRules = bucketStorage.getParsedSyncRules(routerEngine!.getAPI().getParseSyncRulesOptions()); + const removeStopHandler = routerEngine!.addStopHandler(() => { controller.abort(); }); @@ -71,8 +73,8 @@ export const syncStreamReactive: SocketRouteGenerator = (router) => const tracker = new sync.RequestTracker(); try { for await (const data of sync.streamResponse({ - storage: activeBucketStorage, - parseOptions: routerEngine!.getAPI().getParseSyncRulesOptions(), + bucketStorage: bucketStorage, + syncRules: syncRules, params: { ...params, binary_data: true // always true for web sockets diff --git a/packages/service-core/src/routes/endpoints/sync-stream.ts b/packages/service-core/src/routes/endpoints/sync-stream.ts index aaf84ae07..b8bcd88a1 100644 --- a/packages/service-core/src/routes/endpoints/sync-stream.ts +++ b/packages/service-core/src/routes/endpoints/sync-stream.ts @@ -36,15 +36,18 @@ export const syncStreamed = routeDefinition({ const params: util.StreamingSyncRequest = payload.params; const syncParams = new RequestParameters(payload.context.token_payload!, payload.params.parameters ?? {}); - // Sanity check before we start the stream - const cp = await storageEngine.activeBucketStorage.getActiveCheckpoint(); - if (!cp.hasSyncRules()) { + const bucketStorage = await storageEngine.activeBucketStorage.getActiveStorage(); + + if (bucketStorage == null) { throw new errors.ServiceError({ status: 500, code: ErrorCode.PSYNC_S2302, description: 'No sync rules available' }); } + + const syncRules = bucketStorage.getParsedSyncRules(routerEngine!.getAPI().getParseSyncRulesOptions()); + const controller = new AbortController(); const tracker = new sync.RequestTracker(); try { @@ -53,8 +56,8 @@ export const syncStreamed = routeDefinition({ sync.transformToBytesTracked( sync.ndjson( sync.streamResponse({ - storage: storageEngine.activeBucketStorage, - parseOptions: routerEngine!.getAPI().getParseSyncRulesOptions(), + bucketStorage, + syncRules: syncRules, params, syncParams, token: payload.context.token_payload!, diff --git a/packages/service-core/src/runner/teardown.ts b/packages/service-core/src/runner/teardown.ts index b6b7bb5ac..c9832b6d6 100644 --- a/packages/service-core/src/runner/teardown.ts +++ b/packages/service-core/src/runner/teardown.ts @@ -51,7 +51,7 @@ async function terminateSyncRules(storageFactory: storage.BucketStorageFactory, // Mark the sync rules as terminated for (let syncRules of combinedSyncRules) { - using syncRulesStorage = storageFactory.getInstance(syncRules); + const syncRulesStorage = storageFactory.getInstance(syncRules); // The storage will be dropped at the end of the teardown, so we don't need to clear it here await syncRulesStorage.terminate({ clearStorage: false }); } diff --git a/packages/service-core/src/storage/BucketStorage.ts b/packages/service-core/src/storage/BucketStorage.ts index 193b3d1f8..2a2787fa0 100644 --- a/packages/service-core/src/storage/BucketStorage.ts +++ b/packages/service-core/src/storage/BucketStorage.ts @@ -1,33 +1,4 @@ -import { - AsyncDisposableObserverClient, - DisposableListener, - DisposableObserverClient -} from '@powersync/lib-services-framework'; -import { - EvaluatedParameters, - EvaluatedRow, - SqlSyncRules, - SqliteJsonRow, - SqliteJsonValue, - SqliteRow, - ToastableSqliteRow -} from '@powersync/service-sync-rules'; -import { BSON } from 'bson'; -import * as util from '../util/util-index.js'; -import { ReplicationEventPayload } from './ReplicationEventPayload.js'; -import { SourceEntityDescriptor } from './SourceEntity.js'; -import { SourceTable } from './SourceTable.js'; -import { BatchedCustomWriteCheckpointOptions } from './storage-index.js'; -import { SyncStorageWriteCheckpointAPI } from './WriteCheckpointAPI.js'; - -/** - * Replica id uniquely identifying a row on the source database. - * - * Can be any value serializable to BSON. - * - * If the value is an entire document, the data serialized to a v5 UUID may be a good choice here. - */ -export type ReplicaId = BSON.UUID | BSON.Document | any; +import { ToastableSqliteRow } from '@powersync/service-sync-rules'; export enum SyncRuleState { /** @@ -55,479 +26,10 @@ export enum SyncRuleState { */ TERMINATED = 'TERMINATED' } -export interface BucketStorageFactoryListener extends DisposableListener { - syncStorageCreated: (storage: SyncRulesBucketStorage) => void; - replicationEvent: (event: ReplicationEventPayload) => void; -} - -export interface BucketStorageSystemIdentifier { - /** - * A unique identifier for the system used for storage. - * For Postgres this can be the cluster `system_identifier` and database name. - * For MongoDB this can be the replica set name. - */ - id: string; - /** - * A unique type for the storage implementation. - * e.g. `mongodb`, `postgresql`. - */ - type: string; -} - -export interface BucketStorageFactory extends AsyncDisposableObserverClient { - /** - * Update sync rules from configuration, if changed. - */ - configureSyncRules( - options: UpdateSyncRulesOptions - ): Promise<{ updated: boolean; persisted_sync_rules?: PersistedSyncRulesContent; lock?: ReplicationLock }>; - - /** - * Get a storage instance to query sync data for specific sync rules. - */ - getInstance(options: PersistedSyncRulesContent): SyncRulesBucketStorage; - - /** - * Deploy new sync rules. - * - * Similar to configureSyncRules, but applies the update unconditionally. - */ - updateSyncRules(options: UpdateSyncRulesOptions): Promise; - - /** - * Indicate that a slot was removed, and we should re-sync by creating - * a new sync rules instance. - * - * This is roughly the same as deploying a new version of the current sync - * rules, but also accounts for cases where the current sync rules are not - * the latest ones. - * - * Replication should be restarted after this. - * - * @param slot_name The removed slot - */ - slotRemoved(slot_name: string): Promise; - - /** - * Get the sync rules used for querying. - */ - getActiveSyncRules(options: ParseSyncRulesOptions): Promise; - - /** - * Get the sync rules used for querying. - */ - getActiveSyncRulesContent(): Promise; - - /** - * Get the sync rules that will be active next once done with initial replicatino. - */ - getNextSyncRules(options: ParseSyncRulesOptions): Promise; - - /** - * Get the sync rules that will be active next once done with initial replicatino. - */ - getNextSyncRulesContent(): Promise; - - /** - * Get all sync rules currently replicating. Typically this is the "active" and "next" sync rules. - */ - getReplicatingSyncRules(): Promise; - - /** - * Get all sync rules stopped but not terminated yet. - */ - getStoppedSyncRules(): Promise; - - /** - * Same as: - * getInstance(await getActiveSyncRules()).getCheckpoint(). - */ - getActiveCheckpoint(): Promise; - - /** - * Yields the latest user write checkpoint whenever the sync checkpoint updates. - */ - watchWriteCheckpoint(user_id: string, signal: AbortSignal): AsyncIterable; - - /** - * Get storage size of active sync rules. - */ - getStorageMetrics(): Promise; - - /** - * Get the unique identifier for this instance of Powersync - */ - getPowerSyncInstanceId(): Promise; - - /** - * Get a unique identifier for the system used for storage. - */ - getSystemIdentifier(): Promise; -} - -export interface ReplicationCheckpoint { - readonly checkpoint: util.OpId; - readonly lsn: string | null; -} - -export interface ActiveCheckpoint extends ReplicationCheckpoint { - hasSyncRules(): boolean; - - getBucketStorage(): Promise; -} - -export interface WriteCheckpoint { - base: ActiveCheckpoint; - writeCheckpoint: bigint | null; -} - -export interface StorageMetrics { - /** - * Size of operations (bucket_data) - */ - operations_size_bytes: number; - - /** - * Size of parameter storage. - * - * Replication storage -> raw data as received from Postgres. - */ - parameters_size_bytes: number; - - /** - * Size of current_data. - */ - replication_size_bytes: number; -} - -export interface ParseSyncRulesOptions { - defaultSchema: string; -} - -export interface PersistedSyncRulesContent { - readonly id: number; - readonly sync_rules_content: string; - readonly slot_name: string; - - readonly last_fatal_error?: string | null; - readonly last_keepalive_ts?: Date | null; - readonly last_checkpoint_ts?: Date | null; - - parsed(options: ParseSyncRulesOptions): PersistedSyncRules; - - lock(): Promise; -} - -export interface ReplicationLock { - sync_rules_id: number; - - release(): Promise; -} - -export interface PersistedSyncRules { - readonly id: number; - readonly sync_rules: SqlSyncRules; - readonly slot_name: string; -} - -export interface UpdateSyncRulesOptions { - content: string; - lock?: boolean; - validate?: boolean; -} - -export interface SyncRulesBucketStorageOptions { - sync_rules: SqlSyncRules; - group_id: number; -} export const DEFAULT_DOCUMENT_BATCH_LIMIT = 1000; export const DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES = 1 * 1024 * 1024; -export interface BucketDataBatchOptions { - /** Limit number of documents returned. Defaults to 1000. */ - limit?: number; - - /** - * Limit size of chunks returned. Defaults to 1MB. - * - * This is a lower bound, not an upper bound. As soon as the chunk size goes over this limit, - * it is returned. - * - * Note that an individual data row can be close to 16MB in size, so this does not help in - * extreme cases. - */ - chunkLimitBytes?: number; -} - -export interface StartBatchOptions extends ParseSyncRulesOptions { - zeroLSN: string; - /** - * Whether or not to store a copy of the current data. - * - * This is needed if we need to apply partial updates, for example - * when we get TOAST values from Postgres. - * - * This is not needed when we get the full document from the source - * database, for example from MongoDB. - */ - storeCurrentData: boolean; - - /** - * Set to true for initial replication. - * - * This will avoid creating new operations for rows previously replicated. - */ - skipExistingRows?: boolean; -} - -export interface SyncRulesBucketStorageListener extends DisposableListener { - batchStarted: (batch: BucketStorageBatch) => void; -} - -export interface SyncRulesBucketStorage - extends DisposableObserverClient, - SyncStorageWriteCheckpointAPI { - readonly group_id: number; - readonly slot_name: string; - - readonly factory: BucketStorageFactory; - - resolveTable(options: ResolveTableOptions): Promise; - - startBatch( - options: StartBatchOptions, - callback: (batch: BucketStorageBatch) => Promise - ): Promise; - - getCheckpoint(): Promise; - - getParsedSyncRules(options: ParseSyncRulesOptions): SqlSyncRules; - - getParameterSets(checkpoint: util.OpId, lookups: SqliteJsonValue[][]): Promise; - - /** - * Get a "batch" of data for a checkpoint. - * - * The results will be split into separate SyncBucketData chunks to: - * 1. Separate buckets. - * 2. Limit the size of each individual chunk according to options.batchSizeLimitBytes. - * - * @param checkpoint the checkpoint - * @param dataBuckets current bucket states - * @param options batch size options - */ - getBucketDataBatch( - checkpoint: util.OpId, - dataBuckets: Map, - options?: BucketDataBatchOptions - ): AsyncIterable; - - /** - * Compute checksums for a given list of buckets. - * - * Returns zero checksums for any buckets not found. - */ - getChecksums(checkpoint: util.OpId, buckets: string[]): Promise; - - /** - * Terminate the sync rules. - * - * This clears the storage, and sets state to TERMINATED. - * - * Must only be called on stopped sync rules. - */ - terminate(options?: TerminateOptions): Promise; - - getStatus(): Promise; - - /** - * Clear the storage, without changing state. - */ - clear(): Promise; - - autoActivate(): Promise; - - /** - * Record a replication error. - * - * This could be a recoverable error (e.g. temporary network failure), - * or a permanent error (e.g. missing toast data). - * - * Errors are cleared on commit. - */ - reportError(e: any): Promise; - - compact(options?: CompactOptions): Promise; -} - -export interface SyncRuleStatus { - checkpoint_lsn: string | null; - active: boolean; - snapshot_done: boolean; -} -export interface ResolveTableOptions { - group_id: number; - connection_id: number; - connection_tag: string; - entity_descriptor: SourceEntityDescriptor; - - sync_rules: SqlSyncRules; -} - -export interface ResolveTableResult { - table: SourceTable; - dropTables: SourceTable[]; -} - -export interface FlushedResult { - flushed_op: string; -} - -export interface BucketBatchStorageListener extends DisposableListener { - replicationEvent: (payload: ReplicationEventPayload) => void; -} - -export interface BucketBatchCommitOptions { - /** - * Creates a new checkpoint even if there were no persisted operations. - * Defaults to true. - */ - createEmptyCheckpoints?: boolean; -} - -export type ResolvedBucketBatchCommitOptions = Required; - -export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { - createEmptyCheckpoints: true -}; - -export interface BucketStorageBatch extends DisposableObserverClient { - /** - * Save an op, and potentially flush. - * - * This can be an insert, update or delete op. - */ - save(record: SaveOptions): Promise; - - /** - * Replicate a truncate op - deletes all data in the specified tables. - */ - truncate(sourceTables: SourceTable[]): Promise; - - /** - * Drop one or more tables. - * - * This is the same as truncate, but additionally removes the SourceTable record. - */ - drop(sourceTables: SourceTable[]): Promise; - - /** - * Explicitly flush all pending changes in the batch. - * - * This does not create a new checkpoint until `commit()` is called. This means it's - * safe to flush multiple times in the middle of a large transaction. - * - * @returns null if there are no changes to flush. - */ - flush(): Promise; - - /** - * Flush and commit any saved ops. This creates a new checkpoint by default. - * - * Only call this after a transaction. - */ - commit(lsn: string, options?: BucketBatchCommitOptions): Promise; - - /** - * Advance the checkpoint LSN position, without any associated op. - * - * This must only be called when not inside a transaction. - * - * @returns true if the checkpoint was advanced, false if this was a no-op - */ - keepalive(lsn: string): Promise; - - /** - * Get the last checkpoint LSN, from either commit or keepalive. - */ - lastCheckpointLsn: string | null; - - markSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn: string): Promise; - - /** - * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. - */ - addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void; -} - -export interface SaveParameterData { - sourceTable: SourceTable; - /** UUID */ - sourceKey: string; - evaluated: EvaluatedParameters[]; -} - -export interface SaveBucketData { - sourceTable: SourceTable; - /** UUID */ - sourceKey: string; - - evaluated: EvaluatedRow[]; -} - -export type SaveOp = 'insert' | 'update' | 'delete'; - -export type SaveOptions = SaveInsert | SaveUpdate | SaveDelete; - -export enum SaveOperationTag { - INSERT = 'insert', - UPDATE = 'update', - DELETE = 'delete' -} - -export interface SaveInsert { - tag: SaveOperationTag.INSERT; - sourceTable: SourceTable; - before?: undefined; - beforeReplicaId?: undefined; - after: SqliteRow; - afterReplicaId: ReplicaId; -} - -export interface SaveUpdate { - tag: SaveOperationTag.UPDATE; - sourceTable: SourceTable; - - /** - * This is only present when the id has changed, and will only contain replica identity columns. - */ - before?: SqliteRow; - beforeReplicaId?: ReplicaId; - - /** - * A null value means null column. - * - * An undefined value means it's a TOAST value - must be copied from another record. - */ - after: ToastableSqliteRow; - afterReplicaId: ReplicaId; -} - -export interface SaveDelete { - tag: SaveOperationTag.DELETE; - sourceTable: SourceTable; - before?: SqliteRow; - beforeReplicaId: ReplicaId; - after?: undefined; - afterReplicaId?: undefined; -} - -export interface SyncBucketDataBatch { - batch: util.SyncBucketData; - targetOp: bigint | null; -} - export function mergeToast(record: ToastableSqliteRow, persisted: ToastableSqliteRow): ToastableSqliteRow { const newRecord: ToastableSqliteRow = {}; for (let key in record) { @@ -539,56 +41,3 @@ export function mergeToast(record: ToastableSqliteRow, persisted: ToastableSqlit } return newRecord; } - -export interface CompactOptions { - /** - * Heap memory limit for the compact process. - * - * Add around 64MB to this to determine the "--max-old-space-size" argument. - * Add another 80MB to get RSS usage / memory limits. - */ - memoryLimitMB?: number; - - /** - * If specified, ignore any operations newer than this when compacting. - * - * This is primarily for tests, where we want to test compacting at a specific - * point. - * - * This can also be used to create a "safe buffer" of recent operations that should - * not be compacted, to avoid invalidating checkpoints in use. - */ - maxOpId?: bigint; - - /** - * If specified, compact only the specific buckets. - * - * If not specified, compacts all buckets. - * - * These can be individual bucket names, or bucket definition names. - */ - compactBuckets?: string[]; -} - -export interface TerminateOptions { - /** - * If true, also clear the storage before terminating. - */ - clearStorage: boolean; -} - -/** - * Helper for tests. - * This is not in the `service-core-tests` package in order for storage modules - * to provide relevant factories without requiring `service-core-tests` as a direct dependency. - */ -export interface TestStorageOptions { - /** - * By default, collections are only cleared/ - * Setting this to true will drop the collections completely. - */ - dropAll?: boolean; - - doNotClear?: boolean; -} -export type TestStorageFactory = (options?: TestStorageOptions) => Promise; diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts new file mode 100644 index 000000000..b43755ebf --- /dev/null +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -0,0 +1,158 @@ +import { ObserverClient } from '@powersync/lib-services-framework'; +import { EvaluatedParameters, EvaluatedRow, SqliteRow, ToastableSqliteRow } from '@powersync/service-sync-rules'; +import { BSON } from 'bson'; +import { ReplicationEventPayload } from './ReplicationEventPayload.js'; +import { SourceTable } from './SourceTable.js'; +import { BatchedCustomWriteCheckpointOptions } from './storage-index.js'; + +export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { + createEmptyCheckpoints: true +}; + +export interface BucketStorageBatch extends ObserverClient, AsyncDisposable { + /** + * Save an op, and potentially flush. + * + * This can be an insert, update or delete op. + */ + save(record: SaveOptions): Promise; + + /** + * Replicate a truncate op - deletes all data in the specified tables. + */ + truncate(sourceTables: SourceTable[]): Promise; + + /** + * Drop one or more tables. + * + * This is the same as truncate, but additionally removes the SourceTable record. + */ + drop(sourceTables: SourceTable[]): Promise; + + /** + * Explicitly flush all pending changes in the batch. + * + * This does not create a new checkpoint until `commit()` is called. This means it's + * safe to flush multiple times in the middle of a large transaction. + * + * @returns null if there are no changes to flush. + */ + flush(): Promise; + + /** + * Flush and commit any saved ops. This creates a new checkpoint by default. + * + * Only call this after a transaction. + */ + commit(lsn: string, options?: BucketBatchCommitOptions): Promise; + + /** + * Advance the checkpoint LSN position, without any associated op. + * + * This must only be called when not inside a transaction. + * + * @returns true if the checkpoint was advanced, false if this was a no-op + */ + keepalive(lsn: string): Promise; + + /** + * Get the last checkpoint LSN, from either commit or keepalive. + */ + lastCheckpointLsn: string | null; + + markSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn: string): Promise; + + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void; +} + +/** + * Replica id uniquely identifying a row on the source database. + * + * Can be any value serializable to BSON. + * + * If the value is an entire document, the data serialized to a v5 UUID may be a good choice here. + */ +export type ReplicaId = BSON.UUID | BSON.Document | any; + +export interface SaveParameterData { + sourceTable: SourceTable; + /** UUID */ + sourceKey: string; + evaluated: EvaluatedParameters[]; +} + +export interface SaveBucketData { + sourceTable: SourceTable; + /** UUID */ + sourceKey: string; + + evaluated: EvaluatedRow[]; +} + +export type SaveOp = 'insert' | 'update' | 'delete'; + +export type SaveOptions = SaveInsert | SaveUpdate | SaveDelete; + +export enum SaveOperationTag { + INSERT = 'insert', + UPDATE = 'update', + DELETE = 'delete' +} + +export interface SaveInsert { + tag: SaveOperationTag.INSERT; + sourceTable: SourceTable; + before?: undefined; + beforeReplicaId?: undefined; + after: SqliteRow; + afterReplicaId: ReplicaId; +} + +export interface SaveUpdate { + tag: SaveOperationTag.UPDATE; + sourceTable: SourceTable; + + /** + * This is only present when the id has changed, and will only contain replica identity columns. + */ + before?: SqliteRow; + beforeReplicaId?: ReplicaId; + + /** + * A null value means null column. + * + * An undefined value means it's a TOAST value - must be copied from another record. + */ + after: ToastableSqliteRow; + afterReplicaId: ReplicaId; +} + +export interface SaveDelete { + tag: SaveOperationTag.DELETE; + sourceTable: SourceTable; + before?: SqliteRow; + beforeReplicaId: ReplicaId; + after?: undefined; + afterReplicaId?: undefined; +} + +export interface BucketBatchStorageListener { + replicationEvent: (payload: ReplicationEventPayload) => void; +} + +export interface FlushedResult { + flushed_op: string; +} + +export interface BucketBatchCommitOptions { + /** + * Creates a new checkpoint even if there were no persisted operations. + * Defaults to true. + */ + createEmptyCheckpoints?: boolean; +} + +export type ResolvedBucketBatchCommitOptions = Required; diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts new file mode 100644 index 000000000..3512b1cb3 --- /dev/null +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -0,0 +1,156 @@ +import { ObserverClient } from '@powersync/lib-services-framework'; +import { ParseSyncRulesOptions, PersistedSyncRules, PersistedSyncRulesContent } from './PersistedSyncRulesContent.js'; +import { ReplicationEventPayload } from './ReplicationEventPayload.js'; +import { ReplicationLock } from './ReplicationLock.js'; +import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; + +/** + * Represents a configured storage provider. + * + * The provider can handle multiple copies of sync rules concurrently, each with their own storage. + * This is to handle replication of a new version of sync rules, while the old version is still active. + * + * Storage APIs for a specific copy of sync rules are provided by the `SyncRulesBucketStorage` instances. + */ +export interface BucketStorageFactory extends ObserverClient, AsyncDisposable { + /** + * Update sync rules from configuration, if changed. + */ + configureSyncRules( + options: UpdateSyncRulesOptions + ): Promise<{ updated: boolean; persisted_sync_rules?: PersistedSyncRulesContent; lock?: ReplicationLock }>; + + /** + * Get a storage instance to query sync data for specific sync rules. + */ + getInstance(options: PersistedSyncRulesContent): SyncRulesBucketStorage; + + /** + * Deploy new sync rules. + */ + updateSyncRules(options: UpdateSyncRulesOptions): Promise; + + /** + * Indicate that a slot was removed, and we should re-sync by creating + * a new sync rules instance. + * + * This is roughly the same as deploying a new version of the current sync + * rules, but also accounts for cases where the current sync rules are not + * the latest ones. + * + * Replication should be restarted after this. + * + * @param slot_name The removed slot + */ + slotRemoved(slot_name: string): Promise; + + /** + * Get the sync rules used for querying. + */ + getActiveSyncRules(options: ParseSyncRulesOptions): Promise; + + /** + * Get the sync rules used for querying. + */ + getActiveSyncRulesContent(): Promise; + + /** + * Get the sync rules that will be active next once done with initial replicatino. + */ + getNextSyncRules(options: ParseSyncRulesOptions): Promise; + + /** + * Get the sync rules that will be active next once done with initial replicatino. + */ + getNextSyncRulesContent(): Promise; + + /** + * Get all sync rules currently replicating. Typically this is the "active" and "next" sync rules. + */ + getReplicatingSyncRules(): Promise; + + /** + * Get all sync rules stopped but not terminated yet. + */ + getStoppedSyncRules(): Promise; + + /** + * Get the active storage instance. + */ + getActiveStorage(): Promise; + + /** + * Get storage size of active sync rules. + */ + getStorageMetrics(): Promise; + + /** + * Get the unique identifier for this instance of Powersync + */ + getPowerSyncInstanceId(): Promise; + + /** + * Get a unique identifier for the system used for storage. + */ + getSystemIdentifier(): Promise; +} + +export interface BucketStorageFactoryListener { + syncStorageCreated: (storage: SyncRulesBucketStorage) => void; + replicationEvent: (event: ReplicationEventPayload) => void; +} + +export interface StorageMetrics { + /** + * Size of operations (bucket_data) + */ + operations_size_bytes: number; + + /** + * Size of parameter storage. + * + * Replication storage -> raw data as received from Postgres. + */ + parameters_size_bytes: number; + + /** + * Size of current_data. + */ + replication_size_bytes: number; +} + +export interface UpdateSyncRulesOptions { + content: string; + lock?: boolean; + validate?: boolean; +} + +export interface BucketStorageSystemIdentifier { + /** + * A unique identifier for the system used for storage. + * For Postgres this can be the cluster `system_identifier` and database name. + * For MongoDB this can be the replica set name. + */ + id: string; + /** + * A unique type for the storage implementation. + * e.g. `mongodb`, `postgresql`. + */ + type: string; +} + +/** + * Helper for tests. + * This is not in the `service-core-tests` package in order for storage modules + * to provide relevant factories without requiring `service-core-tests` as a direct dependency. + */ +export interface TestStorageOptions { + /** + * By default, collections are only cleared/ + * Setting this to true will drop the collections completely. + */ + dropAll?: boolean; + + doNotClear?: boolean; +} +export type TestStorageFactory = (options?: TestStorageOptions) => Promise; diff --git a/packages/service-core/src/storage/PersistedSyncRulesContent.ts b/packages/service-core/src/storage/PersistedSyncRulesContent.ts new file mode 100644 index 000000000..e1d5a4654 --- /dev/null +++ b/packages/service-core/src/storage/PersistedSyncRulesContent.ts @@ -0,0 +1,26 @@ +import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { ReplicationLock } from './ReplicationLock.js'; + +export interface ParseSyncRulesOptions { + defaultSchema: string; +} + +export interface PersistedSyncRulesContent { + readonly id: number; + readonly sync_rules_content: string; + readonly slot_name: string; + + readonly last_fatal_error?: string | null; + readonly last_keepalive_ts?: Date | null; + readonly last_checkpoint_ts?: Date | null; + + parsed(options: ParseSyncRulesOptions): PersistedSyncRules; + + lock(): Promise; +} + +export interface PersistedSyncRules { + readonly id: number; + readonly sync_rules: SqlSyncRules; + readonly slot_name: string; +} diff --git a/packages/service-core/src/storage/ReplicationEventPayload.ts b/packages/service-core/src/storage/ReplicationEventPayload.ts index c2fe0aa84..bd9ee01f8 100644 --- a/packages/service-core/src/storage/ReplicationEventPayload.ts +++ b/packages/service-core/src/storage/ReplicationEventPayload.ts @@ -1,6 +1,6 @@ import * as sync_rules from '@powersync/service-sync-rules'; -import { BucketStorageBatch, SaveOp } from './BucketStorage.js'; import { SourceTable } from './SourceTable.js'; +import { BucketStorageBatch, SaveOp } from './BucketStorageBatch.js'; export type EventData = { op: SaveOp; diff --git a/packages/service-core/src/storage/ReplicationLock.ts b/packages/service-core/src/storage/ReplicationLock.ts new file mode 100644 index 000000000..d561094c1 --- /dev/null +++ b/packages/service-core/src/storage/ReplicationLock.ts @@ -0,0 +1,5 @@ +export interface ReplicationLock { + sync_rules_id: number; + + release(): Promise; +} diff --git a/packages/service-core/src/storage/StorageEngine.ts b/packages/service-core/src/storage/StorageEngine.ts index a6639211a..63415ace0 100644 --- a/packages/service-core/src/storage/StorageEngine.ts +++ b/packages/service-core/src/storage/StorageEngine.ts @@ -1,17 +1,17 @@ -import { DisposableListener, DisposableObserver, logger } from '@powersync/lib-services-framework'; +import { BaseObserver, logger } from '@powersync/lib-services-framework'; import { ResolvedPowerSyncConfig } from '../util/util-index.js'; -import { BucketStorageFactory } from './BucketStorage.js'; import { ActiveStorage, BucketStorageProvider } from './StorageProvider.js'; +import { BucketStorageFactory } from './BucketStorageFactory.js'; export type StorageEngineOptions = { configuration: ResolvedPowerSyncConfig; }; -export interface StorageEngineListener extends DisposableListener { +export interface StorageEngineListener { storageActivated: (storage: BucketStorageFactory) => void; } -export class StorageEngine extends DisposableObserver { +export class StorageEngine extends BaseObserver { // TODO: This will need to revisited when we actually support multiple storage providers. private storageProviders: Map = new Map(); private currentActiveStorage: ActiveStorage | null = null; diff --git a/packages/service-core/src/storage/StorageProvider.ts b/packages/service-core/src/storage/StorageProvider.ts index 385a042fd..6db6b346f 100644 --- a/packages/service-core/src/storage/StorageProvider.ts +++ b/packages/service-core/src/storage/StorageProvider.ts @@ -1,5 +1,5 @@ import * as util from '../util/util-index.js'; -import { BucketStorageFactory } from './BucketStorage.js'; +import { BucketStorageFactory } from './BucketStorageFactory.js'; export interface ActiveStorage { storage: BucketStorageFactory; @@ -16,6 +16,9 @@ export interface GetStorageOptions { resolvedConfig: util.ResolvedPowerSyncConfig; } +/** + * Represents a provider that can create a storage instance for a specific storage type from configuration. + */ export interface BucketStorageProvider { /** * The storage type that this provider provides. diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts new file mode 100644 index 000000000..5aeb1f68e --- /dev/null +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -0,0 +1,256 @@ +import { ObserverClient } from '@powersync/lib-services-framework'; +import { SqlSyncRules, SqliteJsonRow, SqliteJsonValue } from '@powersync/service-sync-rules'; +import * as util from '../util/util-index.js'; +import { BucketStorageBatch, FlushedResult } from './BucketStorageBatch.js'; +import { BucketStorageFactory } from './BucketStorageFactory.js'; +import { ParseSyncRulesOptions } from './PersistedSyncRulesContent.js'; +import { SourceEntityDescriptor } from './SourceEntity.js'; +import { SourceTable } from './SourceTable.js'; +import { SyncStorageWriteCheckpointAPI } from './WriteCheckpointAPI.js'; + +/** + * Storage for a specific copy of sync rules. + */ +export interface SyncRulesBucketStorage + extends ObserverClient, + SyncStorageWriteCheckpointAPI { + readonly group_id: number; + readonly slot_name: string; + + readonly factory: BucketStorageFactory; + + /** + * Resolve a table, keeping track of it internally. + */ + resolveTable(options: ResolveTableOptions): Promise; + + /** + * Use this to get access to update storage data. + */ + startBatch( + options: StartBatchOptions, + callback: (batch: BucketStorageBatch) => Promise + ): Promise; + + getParsedSyncRules(options: ParseSyncRulesOptions): SqlSyncRules; + + /** + * Terminate the sync rules. + * + * This clears the storage, and sets state to TERMINATED. + * + * Must only be called on stopped sync rules. + */ + terminate(options?: TerminateOptions): Promise; + + getStatus(): Promise; + + /** + * Clear the storage, without changing state. + */ + clear(): Promise; + + autoActivate(): Promise; + + /** + * Record a replication error. + * + * This could be a recoverable error (e.g. temporary network failure), + * or a permanent error (e.g. missing toast data). + * + * Errors are cleared on commit. + */ + reportError(e: any): Promise; + + compact(options?: CompactOptions): Promise; + + // ## Read operations + + getCheckpoint(): Promise; + + /** + * Used to resolve "dynamic" parameter queries. + */ + getParameterSets(checkpoint: util.OpId, lookups: SqliteJsonValue[][]): Promise; + + getCheckpointChanges(options: GetCheckpointChangesOptions): Promise; + + /** + * Yields the latest user write checkpoint whenever the sync checkpoint updates. + * + * The stream stops or errors if this is not the active sync rules (anymore). + */ + watchWriteCheckpoint(options: WatchWriteCheckpointOptions): AsyncIterable; + + /** + * Get a "batch" of data for a checkpoint. + * + * The results will be split into separate SyncBucketData chunks to: + * 1. Separate buckets. + * 2. Limit the size of each individual chunk according to options.batchSizeLimitBytes. + * + * @param checkpoint the checkpoint + * @param dataBuckets current bucket states + * @param options batch size options + */ + getBucketDataBatch( + checkpoint: util.OpId, + dataBuckets: Map, + options?: BucketDataBatchOptions + ): AsyncIterable; + + /** + * Compute checksums for a given list of buckets. + * + * Returns zero checksums for any buckets not found. + */ + getChecksums(checkpoint: util.OpId, buckets: string[]): Promise; +} + +export interface SyncRulesBucketStorageListener { + batchStarted: (batch: BucketStorageBatch) => void; +} + +export interface SyncRuleStatus { + checkpoint_lsn: string | null; + active: boolean; + snapshot_done: boolean; +} +export interface ResolveTableOptions { + group_id: number; + connection_id: number; + connection_tag: string; + entity_descriptor: SourceEntityDescriptor; + + sync_rules: SqlSyncRules; +} + +export interface ResolveTableResult { + table: SourceTable; + dropTables: SourceTable[]; +} + +export interface StartBatchOptions extends ParseSyncRulesOptions { + zeroLSN: string; + /** + * Whether or not to store a copy of the current data. + * + * This is needed if we need to apply partial updates, for example + * when we get TOAST values from Postgres. + * + * This is not needed when we get the full document from the source + * database, for example from MongoDB. + */ + storeCurrentData: boolean; + + /** + * Set to true for initial replication. + * + * This will avoid creating new operations for rows previously replicated. + */ + skipExistingRows?: boolean; +} + +export interface CompactOptions { + /** + * Heap memory limit for the compact process. + * + * Add around 64MB to this to determine the "--max-old-space-size" argument. + * Add another 80MB to get RSS usage / memory limits. + */ + memoryLimitMB?: number; + + /** + * If specified, ignore any operations newer than this when compacting. + * + * This is primarily for tests, where we want to test compacting at a specific + * point. + * + * This can also be used to create a "safe buffer" of recent operations that should + * not be compacted, to avoid invalidating checkpoints in use. + */ + maxOpId?: bigint; + + /** + * If specified, compact only the specific buckets. + * + * If not specified, compacts all buckets. + * + * These can be individual bucket names, or bucket definition names. + */ + compactBuckets?: string[]; +} + +export interface TerminateOptions { + /** + * If true, also clear the storage before terminating. + */ + clearStorage: boolean; +} + +export interface BucketDataBatchOptions { + /** Limit number of documents returned. Defaults to 1000. */ + limit?: number; + + /** + * Limit size of chunks returned. Defaults to 1MB. + * + * This is a lower bound, not an upper bound. As soon as the chunk size goes over this limit, + * it is returned. + * + * Note that an individual data row can be close to 16MB in size, so this does not help in + * extreme cases. + */ + chunkLimitBytes?: number; +} + +export interface SyncBucketDataBatch { + batch: util.SyncBucketData; + targetOp: bigint | null; +} + +export interface ReplicationCheckpoint { + readonly checkpoint: util.OpId; + readonly lsn: string | null; +} + +export interface WatchWriteCheckpointOptions { + /** user_id and client_id combined. */ + user_id: string; + + signal: AbortSignal; +} + +export interface WatchFilterEvent { + changedDataBucket?: string; + changedParameterBucketDefinition?: string; + invalidate?: boolean; +} + +export interface WriteCheckpoint { + base: ReplicationCheckpoint; + writeCheckpoint: bigint | null; +} + +export interface StorageCheckpointUpdate extends WriteCheckpoint { + update: CheckpointChanges; +} + +export interface GetCheckpointChangesOptions { + lastCheckpoint: util.OpId; + nextCheckpoint: util.OpId; +} + +export interface CheckpointChanges { + updatedDataBuckets: string[]; + invalidateDataBuckets: boolean; + updatedParameterBucketDefinitions: string[]; + invalidateParameterBuckets: boolean; +} + +export const CHECKPOINT_INVALIDATE_ALL: CheckpointChanges = { + updatedDataBuckets: [], + invalidateDataBuckets: true, + updatedParameterBucketDefinitions: [], + invalidateParameterBuckets: true +}; diff --git a/packages/service-core/src/storage/bson.ts b/packages/service-core/src/storage/bson.ts index dd1c726a8..11175d17f 100644 --- a/packages/service-core/src/storage/bson.ts +++ b/packages/service-core/src/storage/bson.ts @@ -1,7 +1,7 @@ import * as bson from 'bson'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; -import { ReplicaId } from './BucketStorage.js'; +import { ReplicaId } from './BucketStorageBatch.js'; type NodeBuffer = Buffer; @@ -29,6 +29,11 @@ export const serializeLookup = (lookup: SqliteJsonValue[]) => { return new bson.Binary(serializeLookupBuffer(lookup)); }; +export const getLookupBucketDefinitionName = (lookup: bson.Binary) => { + const parsed = bson.deserialize(lookup.buffer, BSON_DESERIALIZE_OPTIONS).l as SqliteJsonValue[]; + return parsed[0] as string; +}; + /** * True if this is a bson.UUID. * diff --git a/packages/service-core/src/storage/storage-index.ts b/packages/service-core/src/storage/storage-index.ts index b3e2c15b3..b802b0c82 100644 --- a/packages/service-core/src/storage/storage-index.ts +++ b/packages/service-core/src/storage/storage-index.ts @@ -7,3 +7,8 @@ export * from './SourceTable.js'; export * from './StorageEngine.js'; export * from './StorageProvider.js'; export * from './WriteCheckpointAPI.js'; +export * from './BucketStorageFactory.js'; +export * from './BucketStorageBatch.js'; +export * from './SyncRulesBucketStorage.js'; +export * from './PersistedSyncRulesContent.js'; +export * from './ReplicationLock.js'; diff --git a/packages/service-core/src/sync/BucketChecksumState.ts b/packages/service-core/src/sync/BucketChecksumState.ts new file mode 100644 index 000000000..efe983c4a --- /dev/null +++ b/packages/service-core/src/sync/BucketChecksumState.ts @@ -0,0 +1,392 @@ +import { BucketDescription, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; + +import * as storage from '../storage/storage-index.js'; +import * as util from '../util/util-index.js'; + +import { ErrorCode, logger, ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; +import { BucketParameterQuerier } from '@powersync/service-sync-rules/src/BucketParameterQuerier.js'; +import { BucketSyncState } from './sync.js'; + +export interface BucketChecksumStateOptions { + bucketStorage: BucketChecksumStateStorage; + syncRules: SqlSyncRules; + syncParams: RequestParameters; + initialBucketPositions?: { name: string; after: string }[]; +} + +/** + * Represents the state of the checksums and data for a specific connection. + * + * Handles incrementally re-computing checkpoints. + */ +export class BucketChecksumState { + private readonly bucketStorage: BucketChecksumStateStorage; + + /** + * Bucket state of bucket id -> op_id. + * This starts with the state from the client. May contain buckets that the user do not have access to (anymore). + */ + public bucketDataPositions = new Map(); + + /** + * Last checksums sent to the client. We keep this to calculate checkpoint diffs. + */ + private lastChecksums: util.ChecksumMap | null = null; + private lastWriteCheckpoint: bigint | null = null; + + private readonly parameterState: BucketParameterState; + + /** + * Keep track of buckets that need to be downloaded. This is specifically relevant when + * partial checkpoints are sent. + */ + private pendingBucketDownloads = new Set(); + + constructor(options: BucketChecksumStateOptions) { + this.bucketStorage = options.bucketStorage; + this.parameterState = new BucketParameterState(options.bucketStorage, options.syncRules, options.syncParams); + this.bucketDataPositions = new Map(); + + for (let { name, after: start } of options.initialBucketPositions ?? []) { + this.bucketDataPositions.set(name, { start_op_id: start }); + } + } + + async buildNextCheckpointLine(next: storage.StorageCheckpointUpdate): Promise { + const { writeCheckpoint, base } = next; + const user_id = this.parameterState.syncParams.user_id; + + const storage = this.bucketStorage; + + const update = await this.parameterState.getCheckpointUpdate(next); + if (update == null) { + return null; + } + + const { buckets: allBuckets, updatedBuckets } = update; + + let dataBucketsNew = new Map(); + for (let bucket of allBuckets) { + dataBucketsNew.set(bucket.bucket, { + description: bucket, + start_op_id: this.bucketDataPositions.get(bucket.bucket)?.start_op_id ?? '0' + }); + } + this.bucketDataPositions = dataBucketsNew; + + let checksumMap: util.ChecksumMap; + if (updatedBuckets != null) { + if (this.lastChecksums == null) { + throw new ServiceAssertionError(`Bucket diff received without existing checksums`); + } + + // Re-check updated buckets only + let checksumLookups: string[] = []; + + let newChecksums = new Map(); + for (let bucket of dataBucketsNew.keys()) { + if (!updatedBuckets.has(bucket)) { + const existing = this.lastChecksums.get(bucket); + if (existing == null) { + // If this happens, it means updatedBuckets did not correctly include all new buckets + throw new ServiceAssertionError(`Existing checksum not found for bucket ${bucket}`); + } + // Bucket is not specifically updated, and we have a previous checksum + newChecksums.set(bucket, existing); + } else { + checksumLookups.push(bucket); + } + } + + let updatedChecksums = await storage.getChecksums(base.checkpoint, checksumLookups); + for (let [bucket, value] of updatedChecksums.entries()) { + newChecksums.set(bucket, value); + } + checksumMap = newChecksums; + } else { + // Re-check all buckets + const bucketList = [...dataBucketsNew.keys()]; + checksumMap = await storage.getChecksums(base.checkpoint, bucketList); + } + // Subset of buckets for which there may be new data in this batch. + let bucketsToFetch: BucketDescription[]; + + let checkpointLine: util.StreamingSyncCheckpointDiff | util.StreamingSyncCheckpoint; + + if (this.lastChecksums) { + // TODO: If updatedBuckets is present, we can use that to more efficiently calculate a diff, + // and avoid any unnecessary loops through the entire list of buckets. + const diff = util.checksumsDiff(this.lastChecksums, checksumMap); + + if ( + this.lastWriteCheckpoint == writeCheckpoint && + diff.removedBuckets.length == 0 && + diff.updatedBuckets.length == 0 + ) { + // No changes - don't send anything to the client + return null; + } + + let generateBucketsToFetch = new Set(); + for (let bucket of diff.updatedBuckets) { + generateBucketsToFetch.add(bucket.bucket); + } + for (let bucket of this.pendingBucketDownloads) { + // Bucket from a previous checkpoint that hasn't been downloaded yet. + // If we still have this bucket, include it in the list of buckets to fetch. + if (checksumMap.has(bucket)) { + generateBucketsToFetch.add(bucket); + } + } + + const updatedBucketDescriptions = diff.updatedBuckets.map((e) => ({ + ...e, + priority: this.bucketDataPositions.get(e.bucket)!.description!.priority + })); + bucketsToFetch = [...generateBucketsToFetch].map((b) => { + return { + bucket: b, + priority: this.bucketDataPositions.get(b)!.description!.priority + }; + }); + + let message = `Updated checkpoint: ${base.checkpoint} | `; + message += `write: ${writeCheckpoint} | `; + message += `buckets: ${allBuckets.length} | `; + message += `updated: ${limitedBuckets(diff.updatedBuckets, 20)} | `; + message += `removed: ${limitedBuckets(diff.removedBuckets, 20)}`; + logger.info(message, { + checkpoint: base.checkpoint, + user_id: user_id, + buckets: allBuckets.length, + updated: diff.updatedBuckets.length, + removed: diff.removedBuckets.length + }); + + checkpointLine = { + checkpoint_diff: { + last_op_id: base.checkpoint, + write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined, + removed_buckets: diff.removedBuckets, + updated_buckets: updatedBucketDescriptions + } + } satisfies util.StreamingSyncCheckpointDiff; + } else { + let message = `New checkpoint: ${base.checkpoint} | write: ${writeCheckpoint} | `; + message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`; + logger.info(message, { checkpoint: base.checkpoint, user_id: user_id, buckets: allBuckets.length }); + bucketsToFetch = allBuckets; + checkpointLine = { + checkpoint: { + last_op_id: base.checkpoint, + write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined, + buckets: [...checksumMap.values()].map((e) => ({ + ...e, + priority: this.bucketDataPositions.get(e.bucket)!.description!.priority + })) + } + } satisfies util.StreamingSyncCheckpoint; + } + + this.lastChecksums = checksumMap; + this.lastWriteCheckpoint = writeCheckpoint; + this.pendingBucketDownloads = new Set(bucketsToFetch.map((b) => b.bucket)); + + return { + checkpointLine, + bucketsToFetch + }; + } + + /** + * Get bucket positions to sync, given the list of buckets. + * + * @param bucketsToFetch List of buckets to fetch, typically from buildNextCheckpointLine, or a subset of that + * @returns + */ + getFilteredBucketPositions(bucketsToFetch: BucketDescription[]): Map { + const filtered = new Map(); + for (let bucket of bucketsToFetch) { + const state = this.bucketDataPositions.get(bucket.bucket); + if (state) { + filtered.set(bucket.bucket, state.start_op_id); + } + } + return filtered; + } + + /** + * Update the position of bucket data the client has. + * + * @param bucket the bucket name + * @param nextAfter sync operations >= this value in the next batch + */ + updateBucketPosition(options: { bucket: string; nextAfter: string; hasMore: boolean }) { + const state = this.bucketDataPositions.get(options.bucket); + if (state) { + state.start_op_id = options.nextAfter; + } + if (!options.hasMore) { + this.pendingBucketDownloads.delete(options.bucket); + } + } +} + +export interface CheckpointUpdate { + /** + * All buckets forming part of the checkpoint. + */ + buckets: BucketDescription[]; + + /** + * If present, a set of buckets that have been updated since the last checkpoint. + * + * If null, assume that any bucket in `buckets` may have been updated. + */ + updatedBuckets: Set | null; +} + +export class BucketParameterState { + public readonly bucketStorage: BucketChecksumStateStorage; + public readonly syncRules: SqlSyncRules; + public readonly syncParams: RequestParameters; + private readonly querier: BucketParameterQuerier; + private readonly staticBuckets: Map; + + constructor(bucketStorage: BucketChecksumStateStorage, syncRules: SqlSyncRules, syncParams: RequestParameters) { + this.bucketStorage = bucketStorage; + this.syncRules = syncRules; + this.syncParams = syncParams; + + this.querier = syncRules.getBucketParameterQuerier(this.syncParams); + this.staticBuckets = new Map(this.querier.staticBuckets.map((b) => [b.bucket, b])); + } + + async getCheckpointUpdate(checkpoint: storage.StorageCheckpointUpdate): Promise { + const querier = this.querier; + let update: CheckpointUpdate | null; + if (querier.hasDynamicBuckets) { + update = await this.getCheckpointUpdateDynamic(checkpoint); + } else { + update = await this.getCheckpointUpdateStatic(checkpoint); + } + + if (update == null) { + return null; + } + + if (update.buckets.length > 1000) { + // TODO: Limit number of buckets even before we get to this point + const error = new ServiceError(ErrorCode.PSYNC_S2305, `Too many buckets: ${update.buckets.length}`); + logger.error(error.message, { + checkpoint: checkpoint, + user_id: this.syncParams.user_id, + buckets: update.buckets.length + }); + + throw error; + } + return update; + } + + /** + * For static buckets, we can keep track of which buckets have been updated. + */ + private async getCheckpointUpdateStatic( + checkpoint: storage.StorageCheckpointUpdate + ): Promise { + const querier = this.querier; + const update = checkpoint.update; + + if (update.invalidateDataBuckets) { + return { + buckets: querier.staticBuckets, + updatedBuckets: null + }; + } + + let updatedBuckets = new Set(); + + for (let bucket of update.updatedDataBuckets ?? []) { + if (this.staticBuckets.has(bucket)) { + updatedBuckets.add(bucket); + } + } + + if (updatedBuckets.size == 0) { + // No change - skip this checkpoint + return null; + } + + return { + buckets: querier.staticBuckets, + updatedBuckets + }; + } + + /** + * For dynamic buckets, we need to re-query the list of buckets every time. + */ + private async getCheckpointUpdateDynamic( + checkpoint: storage.StorageCheckpointUpdate + ): Promise { + const querier = this.querier; + const storage = this.bucketStorage; + const staticBuckets = querier.staticBuckets; + const update = checkpoint.update; + + let hasChange = false; + if (update.invalidateDataBuckets || update.updatedDataBuckets?.length > 0) { + hasChange = true; + } else if (update.invalidateParameterBuckets) { + hasChange = true; + } else { + for (let bucket of update.updatedParameterBucketDefinitions ?? []) { + if (querier.dynamicBucketDefinitions.has(bucket)) { + hasChange = true; + break; + } + } + } + + if (!hasChange) { + return null; + } + + const dynamicBuckets = await querier.queryDynamicBucketDescriptions({ + getParameterSets(lookups) { + return storage.getParameterSets(checkpoint.base.checkpoint, lookups); + } + }); + const allBuckets = [...staticBuckets, ...dynamicBuckets]; + + return { + buckets: allBuckets, + // We cannot track individual bucket updates for dynamic lookups yet + updatedBuckets: null + }; + } +} + +export interface CheckpointLine { + checkpointLine: util.StreamingSyncCheckpointDiff | util.StreamingSyncCheckpoint; + bucketsToFetch: BucketDescription[]; +} + +// Use a more specific type to simplify testing +export type BucketChecksumStateStorage = Pick; + +function limitedBuckets(buckets: string[] | { bucket: string }[], limit: number) { + buckets = buckets.map((b) => { + if (typeof b != 'string') { + return b.bucket; + } else { + return b; + } + }); + if (buckets.length <= limit) { + return JSON.stringify(buckets); + } + const limited = buckets.slice(0, limit); + return `${JSON.stringify(limited)}...`; +} diff --git a/packages/service-core/src/sync/sync-index.ts b/packages/service-core/src/sync/sync-index.ts index 0d9d2f87c..9fadc8fa4 100644 --- a/packages/service-core/src/sync/sync-index.ts +++ b/packages/service-core/src/sync/sync-index.ts @@ -5,3 +5,4 @@ export * from './RequestTracker.js'; export * from './safeRace.js'; export * from './sync.js'; export * from './util.js'; +export * from './BucketChecksumState.js'; diff --git a/packages/service-core/src/sync/sync.ts b/packages/service-core/src/sync/sync.ts index fdf1b7f70..ae1e0c4ed 100644 --- a/packages/service-core/src/sync/sync.ts +++ b/packages/service-core/src/sync/sync.ts @@ -1,5 +1,5 @@ import { JSONBig, JsonContainer } from '@powersync/service-jsonbig'; -import { BucketDescription, BucketPriority, RequestParameters } from '@powersync/service-sync-rules'; +import { BucketDescription, BucketPriority, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; import { Semaphore, withTimeout } from 'async-mutex'; import { AbortError } from 'ix/aborterror.js'; @@ -9,9 +9,10 @@ import * as storage from '../storage/storage-index.js'; import * as util from '../util/util-index.js'; import { logger } from '@powersync/lib-services-framework'; +import { BucketChecksumState } from './BucketChecksumState.js'; import { mergeAsyncIterables } from './merge.js'; import { RequestTracker } from './RequestTracker.js'; -import { TokenStreamOptions, acquireSemaphoreAbortable, tokenStream } from './util.js'; +import { acquireSemaphoreAbortable, settledPromise, tokenStream, TokenStreamOptions } from './util.js'; /** * Maximum number of connections actively fetching data. @@ -32,11 +33,11 @@ const syncSemaphore = withTimeout( ); export interface SyncStreamParameters { - storage: storage.BucketStorageFactory; + bucketStorage: storage.SyncRulesBucketStorage; + syncRules: SqlSyncRules; params: util.StreamingSyncRequest; syncParams: RequestParameters; token: auth.JwtPayload; - parseOptions: storage.ParseSyncRulesOptions; /** * If this signal is aborted, the stream response ends as soon as possible, without error. */ @@ -49,7 +50,7 @@ export interface SyncStreamParameters { export async function* streamResponse( options: SyncStreamParameters ): AsyncIterable { - const { storage, params, syncParams, token, tokenStreamOptions, tracker, signal, parseOptions } = options; + const { bucketStorage, syncRules, params, syncParams, token, tokenStreamOptions, tracker, signal } = options; // We also need to be able to abort, so we create our own controller. const controller = new AbortController(); if (signal) { @@ -65,7 +66,7 @@ export async function* streamResponse( } } const ki = tokenStream(token, controller.signal, tokenStreamOptions); - const stream = streamResponseInner(storage, params, syncParams, tracker, parseOptions, controller.signal); + const stream = streamResponseInner(bucketStorage, syncRules, params, syncParams, tracker, controller.signal); // Merge the two streams, and abort as soon as one of the streams end. const merged = mergeAsyncIterables([stream, ki], controller.signal); @@ -84,152 +85,63 @@ export async function* streamResponse( } } -type BucketSyncState = { +export type BucketSyncState = { description?: BucketDescription; // Undefined if the bucket has not yet been resolved by us. start_op_id: string; }; async function* streamResponseInner( - storage: storage.BucketStorageFactory, + bucketStorage: storage.SyncRulesBucketStorage, + syncRules: SqlSyncRules, params: util.StreamingSyncRequest, syncParams: RequestParameters, tracker: RequestTracker, - parseOptions: storage.ParseSyncRulesOptions, signal: AbortSignal ): AsyncGenerator { - // Bucket state of bucket id -> op_id. - // This starts with the state from the client. May contain buckets that the user do not have access to (anymore). - let dataBuckets = new Map(); - - let lastChecksums: util.ChecksumMap | null = null; - let lastWriteCheckpoint: bigint | null = null; - const { raw_data, binary_data } = params; - if (params.buckets) { - for (let { name, after: start } of params.buckets) { - dataBuckets.set(name, { start_op_id: start }); - } - } - const checkpointUserId = util.checkpointUserId(syncParams.token_parameters.user_id as string, params.client_id); - const stream = storage.watchWriteCheckpoint(checkpointUserId, signal); + + const checksumState = new BucketChecksumState({ + bucketStorage, + syncRules, + syncParams, + initialBucketPositions: params.buckets + }); + const stream = bucketStorage.watchWriteCheckpoint({ + user_id: checkpointUserId, + signal + }); const newCheckpoints = stream[Symbol.asyncIterator](); try { - let nextCheckpointPromise: Promise> | undefined; + let nextCheckpointPromise: + | Promise>> + | undefined; do { if (!nextCheckpointPromise) { - nextCheckpointPromise = newCheckpoints.next(); + // Wrap in a settledPromise, so that abort errors after the parent stopped iterating + // does not result in uncaught errors. + nextCheckpointPromise = settledPromise(newCheckpoints.next()); } const next = await nextCheckpointPromise; nextCheckpointPromise = undefined; - if (next.done) { + if (next.status == 'rejected') { + throw next.reason; + } + if (next.value.done) { break; } - - const { base, writeCheckpoint } = next.value; - const checkpoint = base.checkpoint; - - const storage = await base.getBucketStorage(); - if (storage == null) { - // Sync rules deleted in the meantime - try again with the next checkpoint. + const line = await checksumState.buildNextCheckpointLine(next.value.value); + if (line == null) { + // No update to sync continue; } - const syncRules = storage.getParsedSyncRules(parseOptions); - - const allBuckets = await syncRules.queryBucketDescriptions({ - getParameterSets(lookups) { - return storage.getParameterSets(checkpoint, lookups); - }, - parameters: syncParams - }); - - if (allBuckets.length > 1000) { - logger.error(`Too many buckets`, { - checkpoint, - user_id: syncParams.user_id, - buckets: allBuckets.length - }); - // TODO: Limit number of buckets even before we get to this point - throw new Error(`Too many buckets: ${allBuckets.length}`); - } - - let dataBucketsNew = new Map(); - for (let bucket of allBuckets) { - dataBucketsNew.set(bucket.bucket, { - description: bucket, - start_op_id: dataBuckets.get(bucket.bucket)?.start_op_id ?? '0' - }); - } - dataBuckets = dataBucketsNew; - - const bucketList = [...dataBuckets.keys()]; - const checksumMap = await storage.getChecksums(checkpoint, bucketList); - // Subset of buckets for which there may be new data in this batch. - let bucketsToFetch: BucketDescription[]; - - if (lastChecksums) { - const diff = util.checksumsDiff(lastChecksums, checksumMap); - - if ( - lastWriteCheckpoint == writeCheckpoint && - diff.removedBuckets.length == 0 && - diff.updatedBuckets.length == 0 - ) { - // No changes - don't send anything to the client - continue; - } - const updatedBucketDescriptions = diff.updatedBuckets.map((e) => ({ - ...e, - priority: dataBuckets.get(e.bucket)!.description!.priority - })); - bucketsToFetch = updatedBucketDescriptions; - - let message = `Updated checkpoint: ${checkpoint} | `; - message += `write: ${writeCheckpoint} | `; - message += `buckets: ${allBuckets.length} | `; - message += `updated: ${limitedBuckets(diff.updatedBuckets, 20)} | `; - message += `removed: ${limitedBuckets(diff.removedBuckets, 20)}`; - logger.info(message, { - checkpoint, - user_id: syncParams.user_id, - buckets: allBuckets.length, - updated: diff.updatedBuckets.length, - removed: diff.removedBuckets.length - }); - const checksum_line: util.StreamingSyncCheckpointDiff = { - checkpoint_diff: { - last_op_id: checkpoint, - write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined, - removed_buckets: diff.removedBuckets, - updated_buckets: updatedBucketDescriptions - } - }; - - yield checksum_line; - } else { - let message = `New checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `; - message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`; - logger.info(message, { checkpoint, user_id: syncParams.user_id, buckets: allBuckets.length }); - bucketsToFetch = allBuckets; - const checksum_line: util.StreamingSyncCheckpoint = { - checkpoint: { - last_op_id: checkpoint, - write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined, - buckets: [...checksumMap.values()].map((e) => ({ - ...e, - priority: dataBuckets.get(e.bucket)!.description!.priority - })) - } - }; - yield checksum_line; - } - lastChecksums = checksumMap; - lastWriteCheckpoint = writeCheckpoint; + const { checkpointLine, bucketsToFetch } = line; + yield checkpointLine; // Start syncing data for buckets up to the checkpoint. As soon as we have completed at least one priority and // at least 1000 operations, we also start listening for new checkpoints concurrently. When a new checkpoint comes // in while we're still busy syncing data for lower priorities, interrupt the current operation and start syncing @@ -246,8 +158,10 @@ async function* streamResponseInner( function maybeRaceForNewCheckpoint() { if (syncedOperations >= 1000 && nextCheckpointPromise === undefined) { nextCheckpointPromise = (async () => { - const next = await newCheckpoints.next(); - if (!next.done) { + const next = await settledPromise(newCheckpoints.next()); + if (next.status == 'rejected') { + abortCheckpointController.abort(); + } else if (!next.value.done) { // Stop the running bucketDataInBatches() iterations, making the main flow reach the new checkpoint. abortCheckpointController.abort(); } @@ -272,10 +186,10 @@ async function* streamResponseInner( } yield* bucketDataInBatches({ - storage, - checkpoint, + bucketStorage: bucketStorage, + checkpoint: next.value.value.base.checkpoint, bucketsToFetch: buckets, - dataBuckets, + checksumState, raw_data, binary_data, onRowsSent: markOperationsSent, @@ -298,11 +212,11 @@ async function* streamResponseInner( } interface BucketDataRequest { - storage: storage.SyncRulesBucketStorage; + bucketStorage: storage.SyncRulesBucketStorage; checkpoint: string; bucketsToFetch: BucketDescription[]; - /** Bucket data position, modified by the request. */ - dataBuckets: Map; + /** Contains current bucket state. Modified by the request as data is sent. */ + checksumState: BucketChecksumState; raw_data: boolean | undefined; binary_data: boolean | undefined; /** Signals that the connection was aborted and that streaming should stop ASAP. */ @@ -359,10 +273,10 @@ interface BucketDataBatchResult { */ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator { const { - storage, + bucketStorage: storage, checkpoint, bucketsToFetch, - dataBuckets, + checksumState, raw_data, binary_data, abort_connection, @@ -393,14 +307,12 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator [bucket.bucket, dataBuckets.get(bucket.bucket)?.start_op_id!]) - ); - const data = storage.getBucketDataBatch(checkpoint, filteredBuckets); + const filteredBuckets = checksumState.getFilteredBucketPositions(bucketsToFetch); + const dataBatches = storage.getBucketDataBatch(checkpoint, filteredBuckets); let has_more = false; - for await (let { batch: r, targetOp } of data) { + for await (let { batch: r, targetOp } of dataBatches) { // Abort in current batch if the connection is closed if (abort_connection.aborted) { return; @@ -444,7 +356,7 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator { - if (typeof b != 'string') { - return b.bucket; - } else { - return b; - } - }); - if (buckets.length <= limit) { - return JSON.stringify(buckets); - } - const limited = buckets.slice(0, limit); - return `${JSON.stringify(limited)}...`; -} diff --git a/packages/service-core/src/sync/util.ts b/packages/service-core/src/sync/util.ts index f55b0b15f..7ccb95746 100644 --- a/packages/service-core/src/sync/util.ts +++ b/packages/service-core/src/sync/util.ts @@ -130,3 +130,26 @@ export function acquireSemaphoreAbortable( }, reject); }); } + +/** + * Wrap a promise in the style of Promise.allSettled. + * + * This is specifically useful if rejections should not be treated as uncaught rejections + * if it is not specifically handled. + */ +export function settledPromise(promise: Promise): Promise> { + return promise.then( + (result) => { + return { + status: 'fulfilled', + value: result + }; + }, + (error) => { + return { + status: 'rejected', + reason: error + }; + } + ); +} diff --git a/packages/service-core/src/util/checkpointing.ts b/packages/service-core/src/util/checkpointing.ts index 3f4b3f19b..99c8a8747 100644 --- a/packages/service-core/src/util/checkpointing.ts +++ b/packages/service-core/src/util/checkpointing.ts @@ -1,6 +1,6 @@ -import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; +import { ErrorCode, ServiceError } from '@powersync/lib-services-framework'; import { RouteAPI } from '../api/RouteAPI.js'; -import { BucketStorageFactory } from '../storage/BucketStorage.js'; +import { BucketStorageFactory } from '../storage/storage-index.js'; export interface CreateWriteCheckpointOptions { userId: string | undefined; @@ -11,13 +11,11 @@ export interface CreateWriteCheckpointOptions { export async function createWriteCheckpoint(options: CreateWriteCheckpointOptions) { const full_user_id = checkpointUserId(options.userId, options.clientId); - const activeSyncRules = await options.storage.getActiveSyncRulesContent(); - if (!activeSyncRules) { + const syncBucketStorage = await options.storage.getActiveStorage(); + if (!syncBucketStorage) { throw new ServiceError(ErrorCode.PSYNC_S2302, `Cannot create Write Checkpoint since no sync rules are active.`); } - using syncBucketStorage = options.storage.getInstance(activeSyncRules); - const { writeCheckpoint, currentCheckpoint } = await options.api.createReplicationHead(async (currentCheckpoint) => { const writeCheckpoint = await syncBucketStorage.createManagedWriteCheckpoint({ user_id: full_user_id, diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts new file mode 100644 index 000000000..b7102c4ad --- /dev/null +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -0,0 +1,565 @@ +import { + BucketChecksum, + BucketChecksumState, + BucketChecksumStateStorage, + CHECKPOINT_INVALIDATE_ALL, + ChecksumMap, + OpId, + WatchFilterEvent +} from '@/index.js'; +import { RequestParameters, SqliteJsonRow, SqliteJsonValue, SqlSyncRules } from '@powersync/service-sync-rules'; +import { describe, expect, test } from 'vitest'; + +describe('BucketChecksumState', () => { + // Single global[] bucket. + // We don't care about data in these tests + const SYNC_RULES_GLOBAL = SqlSyncRules.fromYaml( + ` +bucket_definitions: + global: + data: [] + `, + { defaultSchema: 'public' } + ); + + // global[1] and global[2] + const SYNC_RULES_GLOBAL_TWO = SqlSyncRules.fromYaml( + ` +bucket_definitions: + global: + parameters: + - select 1 as id + - select 2 as id + data: [] + `, + { defaultSchema: 'public' } + ); + + // by_project[n] + const SYNC_RULES_DYNAMIC = SqlSyncRules.fromYaml( + ` +bucket_definitions: + by_project: + parameters: select id from projects where user_id = request.user_id() + data: [] + `, + { defaultSchema: 'public' } + ); + + test('global bucket with update', async () => { + const storage = new MockBucketChecksumStateStorage(); + // Set intial state + storage.updateTestChecksum({ bucket: 'global[]', checksum: 1, count: 1 }); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL, + bucketStorage: storage + }); + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [{ bucket: 'global[]', checksum: 1, count: 1, priority: 3 }], + last_op_id: '1', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'global[]', + priority: 3 + } + ]); + // This is the bucket data to be fetched + expect(state.getFilteredBucketPositions(line.bucketsToFetch)).toEqual(new Map([['global[]', '0']])); + + // This similuates the bucket data being sent + state.updateBucketPosition({ bucket: 'global[]', nextAfter: '1', hasMore: false }); + + // Update bucket storage state + storage.updateTestChecksum({ bucket: 'global[]', checksum: 2, count: 2 }); + + // Now we get a new line + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '2', lsn: '2' }, + writeCheckpoint: null, + update: { + updatedDataBuckets: ['global[]'], + invalidateDataBuckets: false, + updatedParameterBucketDefinitions: [], + invalidateParameterBuckets: false + } + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [{ bucket: 'global[]', checksum: 2, count: 2, priority: 3 }], + last_op_id: '2', + write_checkpoint: undefined + } + }); + expect(state.getFilteredBucketPositions(line2.bucketsToFetch)).toEqual(new Map([['global[]', '1']])); + }); + + test('global bucket with initial state', async () => { + // This tests the client sending an initial state + // This does not affect the checkpoint, but does affect the data to be fetched + /// (getFilteredBucketStates) + const storage = new MockBucketChecksumStateStorage(); + // Set intial state + storage.updateTestChecksum({ bucket: 'global[]', checksum: 1, count: 1 }); + + const state = new BucketChecksumState({ + // Client sets the initial state here + initialBucketPositions: [{ name: 'global[]', after: '1' }], + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL, + bucketStorage: storage + }); + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [{ bucket: 'global[]', checksum: 1, count: 1, priority: 3 }], + last_op_id: '1', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'global[]', + priority: 3 + } + ]); + // This is the main difference between this and the previous test + expect(state.getFilteredBucketPositions(line.bucketsToFetch)).toEqual(new Map([['global[]', '1']])); + }); + + test('multiple static buckets', async () => { + const storage = new MockBucketChecksumStateStorage(); + // Set intial state + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 1, count: 1 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 1, count: 1 }); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL_TWO, + bucketStorage: storage + }); + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [ + { bucket: 'global[1]', checksum: 1, count: 1, priority: 3 }, + { bucket: 'global[2]', checksum: 1, count: 1, priority: 3 } + ], + last_op_id: '1', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'global[1]', + priority: 3 + }, + { + bucket: 'global[2]', + priority: 3 + } + ]); + + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 2, count: 2 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 2, count: 2 }); + + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '2', lsn: '2' }, + writeCheckpoint: null, + update: { + ...CHECKPOINT_INVALIDATE_ALL, + updatedDataBuckets: ['global[1]', 'global[2]'], + invalidateDataBuckets: false + } + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [ + { bucket: 'global[1]', checksum: 2, count: 2, priority: 3 }, + { bucket: 'global[2]', checksum: 2, count: 2, priority: 3 } + ], + last_op_id: '2', + write_checkpoint: undefined + } + }); + }); + + test('removing a static bucket', async () => { + // This tests the client sending an initial state, with a bucket that we don't have. + // This makes effectively no difference to the output. By not including the bucket + // in the output, the client will remove the bucket. + const storage = new MockBucketChecksumStateStorage(); + + const state = new BucketChecksumState({ + // Client sets the initial state here + initialBucketPositions: [{ name: 'something_here[]', after: '1' }], + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL, + bucketStorage: storage + }); + + storage.updateTestChecksum({ bucket: 'global[]', checksum: 1, count: 1 }); + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [{ bucket: 'global[]', checksum: 1, count: 1, priority: 3 }], + last_op_id: '1', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'global[]', + priority: 3 + } + ]); + expect(state.getFilteredBucketPositions(line.bucketsToFetch)).toEqual(new Map([['global[]', '0']])); + }); + + test('invalidating individual bucket', async () => { + // We manually control the filter events here. + + const storage = new MockBucketChecksumStateStorage(); + // Set initial state + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 1, count: 1 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 1, count: 1 }); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL_TWO, + bucketStorage: storage + }); + + // We specifically do not set this here, so that we have manual control over the events. + // storage.filter = state.checkpointFilter; + + await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }); + + state.updateBucketPosition({ bucket: 'global[1]', nextAfter: '1', hasMore: false }); + state.updateBucketPosition({ bucket: 'global[2]', nextAfter: '1', hasMore: false }); + + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 2, count: 2 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 2, count: 2 }); + + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '2', lsn: '2' }, + writeCheckpoint: null, + update: { + ...CHECKPOINT_INVALIDATE_ALL, + // Invalidate the state for global[1] - will only re-check the single bucket. + // This is essentially inconsistent state, but is the simplest way to test that + // the filter is working. + updatedDataBuckets: ['global[1]'], + invalidateDataBuckets: false + } + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [ + // This does not include global[2], since it was not invalidated. + { bucket: 'global[1]', checksum: 2, count: 2, priority: 3 } + ], + last_op_id: '2', + write_checkpoint: undefined + } + }); + expect(line2.bucketsToFetch).toEqual([{ bucket: 'global[1]', priority: 3 }]); + }); + + test('invalidating all buckets', async () => { + // We manually control the filter events here. + const storage = new MockBucketChecksumStateStorage(); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL_TWO, + bucketStorage: storage + }); + + // We specifically do not set this here, so that we have manual control over the events. + // storage.filter = state.checkpointFilter; + + // Set initial state + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 1, count: 1 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 1, count: 1 }); + + await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }); + + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 2, count: 2 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 2, count: 2 }); + + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '2', lsn: '2' }, + writeCheckpoint: null, + // Invalidate the state - will re-check all buckets + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [ + { bucket: 'global[1]', checksum: 2, count: 2, priority: 3 }, + { bucket: 'global[2]', checksum: 2, count: 2, priority: 3 } + ], + last_op_id: '2', + write_checkpoint: undefined + } + }); + expect(line2.bucketsToFetch).toEqual([ + { bucket: 'global[1]', priority: 3 }, + { bucket: 'global[2]', priority: 3 } + ]); + }); + + test('interrupt and resume static buckets checkpoint', async () => { + const storage = new MockBucketChecksumStateStorage(); + // Set intial state + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 3, count: 3 }); + storage.updateTestChecksum({ bucket: 'global[2]', checksum: 3, count: 3 }); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: '' }, {}), + syncRules: SYNC_RULES_GLOBAL_TWO, + bucketStorage: storage + }); + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '3', lsn: '3' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [ + { bucket: 'global[1]', checksum: 3, count: 3, priority: 3 }, + { bucket: 'global[2]', checksum: 3, count: 3, priority: 3 } + ], + last_op_id: '3', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'global[1]', + priority: 3 + }, + { + bucket: 'global[2]', + priority: 3 + } + ]); + + // This is the bucket data to be fetched + expect(state.getFilteredBucketPositions(line.bucketsToFetch)).toEqual( + new Map([ + ['global[1]', '0'], + ['global[2]', '0'] + ]) + ); + + // No data changes here. + // We simulate partial data sent, before a checkpoint is interrupted. + state.updateBucketPosition({ bucket: 'global[1]', nextAfter: '3', hasMore: false }); + state.updateBucketPosition({ bucket: 'global[2]', nextAfter: '1', hasMore: true }); + storage.updateTestChecksum({ bucket: 'global[1]', checksum: 4, count: 4 }); + + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '4', lsn: '4' }, + writeCheckpoint: null, + update: { + ...CHECKPOINT_INVALIDATE_ALL, + invalidateDataBuckets: false, + updatedDataBuckets: ['global[1]'] + } + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [ + { + bucket: 'global[1]', + checksum: 4, + count: 4, + priority: 3 + } + ], + last_op_id: '4', + write_checkpoint: undefined + } + }); + // This should contain both buckets, even though only one changed. + expect(line2.bucketsToFetch).toEqual([ + { + bucket: 'global[1]', + priority: 3 + }, + { + bucket: 'global[2]', + priority: 3 + } + ]); + + expect(state.getFilteredBucketPositions(line2.bucketsToFetch)).toEqual( + new Map([ + ['global[1]', '3'], + ['global[2]', '1'] + ]) + ); + }); + + test('dynamic buckets with updates', async () => { + const storage = new MockBucketChecksumStateStorage(); + // Set intial state + storage.updateTestChecksum({ bucket: 'by_project[1]', checksum: 1, count: 1 }); + storage.updateTestChecksum({ bucket: 'by_project[2]', checksum: 1, count: 1 }); + storage.updateTestChecksum({ bucket: 'by_project[3]', checksum: 1, count: 1 }); + + const state = new BucketChecksumState({ + syncParams: new RequestParameters({ sub: 'u1' }, {}), + syncRules: SYNC_RULES_DYNAMIC, + bucketStorage: storage + }); + + storage.getParameterSets = async (checkpoint: OpId, lookups: SqliteJsonValue[][]): Promise => { + expect(checkpoint).toEqual('1'); + expect(lookups).toEqual([['by_project', '1', 'u1']]); + return [{ id: 1 }, { id: 2 }]; + }; + + const line = (await state.buildNextCheckpointLine({ + base: { checkpoint: '1', lsn: '1' }, + writeCheckpoint: null, + update: CHECKPOINT_INVALIDATE_ALL + }))!; + expect(line.checkpointLine).toEqual({ + checkpoint: { + buckets: [ + { bucket: 'by_project[1]', checksum: 1, count: 1, priority: 3 }, + { bucket: 'by_project[2]', checksum: 1, count: 1, priority: 3 } + ], + last_op_id: '1', + write_checkpoint: undefined + } + }); + expect(line.bucketsToFetch).toEqual([ + { + bucket: 'by_project[1]', + priority: 3 + }, + { + bucket: 'by_project[2]', + priority: 3 + } + ]); + // This is the bucket data to be fetched + expect(state.getFilteredBucketPositions(line.bucketsToFetch)).toEqual( + new Map([ + ['by_project[1]', '0'], + ['by_project[2]', '0'] + ]) + ); + + state.updateBucketPosition({ bucket: 'by_project[1]', nextAfter: '1', hasMore: false }); + state.updateBucketPosition({ bucket: 'by_project[2]', nextAfter: '1', hasMore: false }); + + storage.getParameterSets = async (checkpoint: OpId, lookups: SqliteJsonValue[][]): Promise => { + expect(checkpoint).toEqual('2'); + expect(lookups).toEqual([['by_project', '1', 'u1']]); + return [{ id: 1 }, { id: 2 }, { id: 3 }]; + }; + + // Now we get a new line + const line2 = (await state.buildNextCheckpointLine({ + base: { checkpoint: '2', lsn: '2' }, + writeCheckpoint: null, + update: { + invalidateDataBuckets: false, + updatedDataBuckets: [], + updatedParameterBucketDefinitions: ['by_project'], + invalidateParameterBuckets: false + } + }))!; + expect(line2.checkpointLine).toEqual({ + checkpoint_diff: { + removed_buckets: [], + updated_buckets: [{ bucket: 'by_project[3]', checksum: 1, count: 1, priority: 3 }], + last_op_id: '2', + write_checkpoint: undefined + } + }); + expect(state.getFilteredBucketPositions(line2.bucketsToFetch)).toEqual(new Map([['by_project[3]', '0']])); + }); +}); + +class MockBucketChecksumStateStorage implements BucketChecksumStateStorage { + private state: ChecksumMap = new Map(); + public filter?: (event: WatchFilterEvent) => boolean; + + constructor() {} + + updateTestChecksum(checksum: BucketChecksum): void { + this.state.set(checksum.bucket, checksum); + this.filter?.({ changedDataBucket: checksum.bucket }); + } + + invalidate() { + this.filter?.({ invalidate: true }); + } + + async getChecksums(checkpoint: OpId, buckets: string[]): Promise { + return new Map( + buckets.map((bucket) => { + const checksum = this.state.get(bucket); + return [ + bucket, + { + bucket: bucket, + checksum: checksum?.checksum ?? 0, + count: checksum?.count ?? 0 + } + ]; + }) + ); + } + + async getParameterSets(checkpoint: OpId, lookups: SqliteJsonValue[][]): Promise { + throw new Error('Method not implemented.'); + } +} diff --git a/packages/service-errors/src/codes.ts b/packages/service-errors/src/codes.ts index 5e607f3d5..55c83c31a 100644 --- a/packages/service-errors/src/codes.ts +++ b/packages/service-errors/src/codes.ts @@ -324,6 +324,13 @@ export enum ErrorCode { */ PSYNC_S2304 = 'PSYNC_S2304', + /** + * Too many buckets. + * + * There is currently a limit of 1000 buckets per active connection. + */ + PSYNC_S2305 = 'PSYNC_S2305', + // ## PSYNC_S23xx: Sync API errors - MongoDB Storage /** diff --git a/packages/sync-rules/src/BucketParameterQuerier.ts b/packages/sync-rules/src/BucketParameterQuerier.ts new file mode 100644 index 000000000..c229fd6e1 --- /dev/null +++ b/packages/sync-rules/src/BucketParameterQuerier.ts @@ -0,0 +1,65 @@ +import { BucketDescription } from './BucketDescription.js'; +import { RequestParameters, SqliteJsonRow, SqliteJsonValue } from './types.js'; + +/** + * Represents a set of parameter queries for a specific request. + */ +export interface BucketParameterQuerier { + /** + * These buckets do not change for the lifetime of the connection. + * + * This includes parameter queries such as: + * + * select request.user_id() as user_id() + * select value as project_id from json_each(request.jwt() -> 'project_ids') + */ + readonly staticBuckets: BucketDescription[]; + + /** + * True if there are dynamic buckets, meaning queryDynamicBucketDescriptions() should be used. + * + * If this is false, queryDynamicBucketDescriptions() will always return an empty array, + * and dynamicBucketDefinitions.size == 0. + */ + readonly hasDynamicBuckets: boolean; + + readonly dynamicBucketDefinitions: Set; + + /** + * These buckets depend on parameter storage, and needs to be retrieved dynamically for each checkpoint. + * + * The ParameterLookupSource should perform the query for the current checkpoint - that is not passed + * as a parameter. + * + * This includes parameter queries such as: + * + * select id as user_id from users where users.id = request.user_id() + */ + queryDynamicBucketDescriptions(source: ParameterLookupSource): Promise; +} + +export interface ParameterLookupSource { + getParameterSets: (lookups: SqliteJsonValue[][]) => Promise; +} + +export interface QueryBucketDescriptorOptions extends ParameterLookupSource { + parameters: RequestParameters; +} + +export function mergeBucketParameterQueriers(queriers: BucketParameterQuerier[]): BucketParameterQuerier { + const dynamicBucketDefinitions = new Set(queriers.flatMap((q) => [...q.dynamicBucketDefinitions])); + return { + staticBuckets: queriers.flatMap((q) => q.staticBuckets), + hasDynamicBuckets: dynamicBucketDefinitions.size > 0, + dynamicBucketDefinitions, + async queryDynamicBucketDescriptions(source: ParameterLookupSource) { + let results: BucketDescription[] = []; + for (let q of queriers) { + if (q.hasDynamicBuckets) { + results.push(...(await q.queryDynamicBucketDescriptions(source))); + } + } + return results; + } + }; +} diff --git a/packages/sync-rules/src/SqlBucketDescriptor.ts b/packages/sync-rules/src/SqlBucketDescriptor.ts index 8071a0012..a0fa59f0f 100644 --- a/packages/sync-rules/src/SqlBucketDescriptor.ts +++ b/packages/sync-rules/src/SqlBucketDescriptor.ts @@ -1,4 +1,5 @@ import { BucketDescription } from './BucketDescription.js'; +import { BucketParameterQuerier, mergeBucketParameterQueriers } from './BucketParameterQuerier.js'; import { IdSequence } from './IdSequence.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { SqlDataQuery } from './SqlDataQuery.js'; @@ -8,10 +9,9 @@ import { StaticSqlParameterQuery } from './StaticSqlParameterQuery.js'; import { TablePattern } from './TablePattern.js'; import { SqlRuleError } from './errors.js'; import { - EvaluateRowOptions, EvaluatedParametersResult, + EvaluateRowOptions, EvaluationResult, - QueryBucketIdOptions, QueryParseOptions, RequestParameters, SqliteRow @@ -108,6 +108,23 @@ export class SqlBucketDescriptor { return results; } + getBucketParameterQuerier(parameters: RequestParameters): BucketParameterQuerier { + const staticBuckets = this.getStaticBucketDescriptions(parameters); + const staticQuerier = { + staticBuckets, + hasDynamicBuckets: false, + dynamicBucketDefinitions: new Set(), + queryDynamicBucketDescriptions: async () => [] + } satisfies BucketParameterQuerier; + + if (this.parameter_queries.length == 0) { + return staticQuerier; + } + + const dynamicQueriers = this.parameter_queries.map((query) => query.getBucketParameterQuerier(parameters)); + return mergeBucketParameterQueriers([staticQuerier, ...dynamicQueriers]); + } + getStaticBucketDescriptions(parameters: RequestParameters): BucketDescription[] { let results: BucketDescription[] = []; for (let query of this.global_parameter_queries) { @@ -116,14 +133,6 @@ export class SqlBucketDescriptor { return results; } - async queryBucketDescriptions(options: QueryBucketIdOptions): Promise { - let result = this.getStaticBucketDescriptions(options.parameters); - for (let query of this.parameter_queries) { - result.push(...(await query.queryBucketDescriptions(options))); - } - return result; - } - getSourceTables(): Set { let result = new Set(); for (let query of this.parameter_queries) { diff --git a/packages/sync-rules/src/SqlParameterQuery.ts b/packages/sync-rules/src/SqlParameterQuery.ts index 32e681437..5c008ddc8 100644 --- a/packages/sync-rules/src/SqlParameterQuery.ts +++ b/packages/sync-rules/src/SqlParameterQuery.ts @@ -1,4 +1,6 @@ import { parse, SelectedColumn } from 'pgsql-ast-parser'; +import { BucketDescription, BucketPriority, defaultBucketPriority } from './BucketDescription.js'; +import { BucketParameterQuerier, ParameterLookupSource } from './BucketParameterQuerier.js'; import { SqlRuleError } from './errors.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { SqlTools } from './sql_filters.js'; @@ -6,13 +8,13 @@ import { checkUnsupportedFeatures, isClauseError, isParameterValueClause } from import { StaticSqlParameterQuery } from './StaticSqlParameterQuery.js'; import { TablePattern } from './TablePattern.js'; import { TableQuerySchema } from './TableQuerySchema.js'; +import { TableValuedFunctionSqlParameterQuery } from './TableValuedFunctionSqlParameterQuery.js'; import { EvaluatedParameters, EvaluatedParametersResult, InputParameter, ParameterMatchClause, ParameterValueClause, - QueryBucketIdOptions, QueryParseOptions, QuerySchema, RequestParameters, @@ -22,8 +24,6 @@ import { SqliteRow } from './types.js'; import { filterJsonRow, getBucketId, isJsonValue, isSelectStatement } from './utils.js'; -import { TableValuedFunctionSqlParameterQuery } from './TableValuedFunctionSqlParameterQuery.js'; -import { BucketDescription, BucketPriority, defaultBucketPriority } from './BucketDescription.js'; /** * Represents a parameter query, such as: @@ -189,6 +189,10 @@ export class SqlParameterQuery { priority?: BucketPriority; filter?: ParameterMatchClause; + + /** + * Bucket definition name. + */ descriptor_name?: string; /** _Input_ token / user parameters */ @@ -363,22 +367,28 @@ export class SqlParameterQuery { } } - /** - * Given sync parameters (token and user parameters), return bucket ids and priorities. - * - * This is done in three steps: - * 1. Given the parameters, get lookups we need to perform on the database. - * 2. Perform the lookups, returning parameter sets (partial rows). - * 3. Given the parameter sets, resolve bucket ids. - */ - async queryBucketDescriptions(options: QueryBucketIdOptions): Promise { - let lookups = this.getLookups(options.parameters); + getBucketParameterQuerier(requestParameters: RequestParameters): BucketParameterQuerier { + const lookups = this.getLookups(requestParameters); if (lookups.length == 0) { - return []; + // This typically happens when the query is pre-filtered using a where clause + // on the parameters, and does not depend on the database state. + return { + staticBuckets: [], + hasDynamicBuckets: false, + dynamicBucketDefinitions: new Set(), + queryDynamicBucketDescriptions: async () => [] + }; } - const parameters = await options.getParameterSets(lookups); - return this.resolveBucketDescriptions(parameters, options.parameters); + return { + staticBuckets: [], + hasDynamicBuckets: true, + dynamicBucketDefinitions: new Set([this.descriptor_name!]), + queryDynamicBucketDescriptions: async (source: ParameterLookupSource) => { + const bucketParameters = await source.getParameterSets(lookups); + return this.resolveBucketDescriptions(bucketParameters, requestParameters); + } + }; } get hasAuthenticatedBucketParameters(): boolean { diff --git a/packages/sync-rules/src/SqlSyncRules.ts b/packages/sync-rules/src/SqlSyncRules.ts index 99f08b083..2db268f2a 100644 --- a/packages/sync-rules/src/SqlSyncRules.ts +++ b/packages/sync-rules/src/SqlSyncRules.ts @@ -16,7 +16,6 @@ import { isEvaluatedParameters, isEvaluatedRow, isEvaluationError, - QueryBucketIdOptions, QueryParseOptions, RequestParameters, SourceSchema, @@ -24,6 +23,11 @@ import { SyncRules } from './types.js'; import { BucketDescription, BucketPriority, isValidPriority } from './BucketDescription.js'; +import { + BucketParameterQuerier, + mergeBucketParameterQueriers, + ParameterLookupSource +} from './BucketParameterQuerier.js'; const ACCEPT_POTENTIALLY_DANGEROUS_QUERIES = Symbol('ACCEPT_POTENTIALLY_DANGEROUS_QUERIES'); @@ -317,26 +321,9 @@ export class SqlSyncRules implements SyncRules { return { results, errors }; } - /** - * @deprecated For testing only. - */ - getStaticBucketDescriptions(parameters: RequestParameters) { - let results: BucketDescription[] = []; - for (let bucket of this.bucket_descriptors) { - results.push(...bucket.getStaticBucketDescriptions(parameters)); - } - return results; - } - - /** - * Note: This can error hard. - */ - async queryBucketDescriptions(options: QueryBucketIdOptions): Promise { - let results: BucketDescription[] = []; - for (let bucket of this.bucket_descriptors) { - results.push(...(await bucket.queryBucketDescriptions(options))); - } - return results; + getBucketParameterQuerier(parameters: RequestParameters): BucketParameterQuerier { + const queriers = this.bucket_descriptors.map((query) => query.getBucketParameterQuerier(parameters)); + return mergeBucketParameterQueriers(queriers); } getSourceTables(): TablePattern[] { diff --git a/packages/sync-rules/src/types.ts b/packages/sync-rules/src/types.ts index 6ae8ca013..f4828d0d1 100644 --- a/packages/sync-rules/src/types.ts +++ b/packages/sync-rules/src/types.ts @@ -4,7 +4,7 @@ import { SourceTableInterface } from './SourceTableInterface.js'; import { SyncRulesOptions } from './SqlSyncRules.js'; import { TablePattern } from './TablePattern.js'; import { toSyncRulesParameters } from './utils.js'; -import { BucketPriority } from './BucketDescription.js'; +import { BucketDescription, BucketPriority } from './BucketDescription.js'; export interface SyncRules { evaluateRow(options: EvaluateRowOptions): EvaluationResult[]; @@ -342,11 +342,6 @@ export type CompiledClause = RowValueClause | ParameterMatchClause | ParameterVa */ export type TrueIfParametersMatch = FilterParameters[]; -export interface QueryBucketIdOptions { - getParameterSets: (lookups: SqliteJsonValue[][]) => Promise; - parameters: RequestParameters; -} - export interface SourceSchemaTable { table: string; getColumn(column: string): ColumnDefinition | undefined; diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index 90d75c697..1ffa64cb4 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -37,9 +37,11 @@ bucket_definitions: bucket: 'mybucket[]' } ]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({}))).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({}))).toMatchObject({ + staticBuckets: [{ bucket: 'mybucket[]', priority: 3 }], + hasDynamicBuckets: false, + dynamicBucketDefinitions: new Set() + }); }); test('parse global sync rules with filter', () => { @@ -60,11 +62,19 @@ bucket_definitions: expect(param_query.filter!.lookupParameterValue(normalizeTokenParameters({ is_admin: 1n }))).toEqual(1n); expect(param_query.filter!.lookupParameterValue(normalizeTokenParameters({ is_admin: 0n }))).toEqual(0n); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ is_admin: true }))).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ is_admin: false }))).toEqual([]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({}))).toEqual([]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ is_admin: true }))).toMatchObject({ + staticBuckets: [{ bucket: 'mybucket[]', priority: 3 }], + hasDynamicBuckets: false + }); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ is_admin: false }))).toMatchObject({ + staticBuckets: [], + hasDynamicBuckets: false + }); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({}))).toMatchObject({ + staticBuckets: [], + hasDynamicBuckets: false, + dynamicBucketDefinitions: new Set() + }); }); test('parse global sync rules with table filter', () => { @@ -106,7 +116,8 @@ bucket_definitions: const param_query = bucket.global_parameter_queries[0]; expect(param_query.bucket_parameters).toEqual(['user_id', 'device_id']); expect( - rules.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }, { device_id: 'device1' })) + rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'user1' }, { device_id: 'device1' })) + .staticBuckets ).toEqual([{ bucket: 'mybucket["user1","device1"]', priority: 3 }]); const data_query = bucket.data_queries[0]; @@ -151,7 +162,7 @@ bucket_definitions: expect(bucket.bucket_parameters).toEqual(['user_id']); const param_query = bucket.global_parameter_queries[0]; expect(param_query.bucket_parameters).toEqual(['user_id']); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }))).toEqual([ + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'user1' })).staticBuckets).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); @@ -295,9 +306,10 @@ bucket_definitions: ); const bucket = rules.bucket_descriptors[0]; expect(bucket.bucket_parameters).toEqual(['user_id']); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }))).toEqual([ - { bucket: 'mybucket["USER1"]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'user1' }))).toMatchObject({ + staticBuckets: [{ bucket: 'mybucket["USER1"]', priority: 3 }], + hasDynamicBuckets: false + }); expect( rules.evaluateRow({ @@ -332,9 +344,10 @@ bucket_definitions: ); const bucket = rules.bucket_descriptors[0]; expect(bucket.bucket_parameters).toEqual(['user_id']); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }))).toEqual([ - { bucket: 'mybucket["USER1"]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'user1' }))).toMatchObject({ + staticBuckets: [{ bucket: 'mybucket["USER1"]', priority: 3 }], + hasDynamicBuckets: false + }); expect( rules.evaluateRow({ @@ -504,7 +517,7 @@ bucket_definitions: } ]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ is_admin: true }))).toEqual([ + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ is_admin: true })).staticBuckets).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); }); @@ -548,8 +561,8 @@ bucket_definitions: PARSE_OPTIONS ); expect( - rules.getStaticBucketDescriptions(normalizeTokenParameters({ int1: 314, float1: 3.14, float2: 314 })) - ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); + rules.getBucketParameterQuerier(normalizeTokenParameters({ int1: 314, float1: 3.14, float2: 314 })) + ).toMatchObject({ staticBuckets: [{ bucket: 'mybucket[314,3.14,314]', priority: 3 }] }); expect( rules.evaluateRow({ sourceTable: ASSETS, record: { id: 'asset1', int1: 314n, float1: 3.14, float2: 314 } }) @@ -577,9 +590,10 @@ bucket_definitions: PARSE_OPTIONS ); expect(rules.errors).toEqual([]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'test' }))).toEqual([ - { bucket: 'mybucket["TEST"]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'test' }))).toMatchObject({ + staticBuckets: [{ bucket: 'mybucket["TEST"]', priority: 3 }], + hasDynamicBuckets: false + }); }); test('custom table and id', () => { @@ -834,10 +848,12 @@ bucket_definitions: expect(rules.errors).toEqual([]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({}))).toEqual([ - { bucket: 'highprio[]', priority: 0 }, - { bucket: 'defaultprio[]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({}))).toMatchObject({ + staticBuckets: [ + { bucket: 'highprio[]', priority: 0 }, + { bucket: 'defaultprio[]', priority: 3 } + ] + }); }); test('priorities on bucket', () => { @@ -857,10 +873,12 @@ bucket_definitions: expect(rules.errors).toEqual([]); - expect(rules.getStaticBucketDescriptions(normalizeTokenParameters({}))).toEqual([ - { bucket: 'highprio[]', priority: 0 }, - { bucket: 'defaultprio[]', priority: 3 } - ]); + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({}))).toMatchObject({ + staticBuckets: [ + { bucket: 'highprio[]', priority: 0 }, + { bucket: 'defaultprio[]', priority: 3 } + ] + }); }); test(`invalid priority on bucket`, () => { @@ -893,4 +911,46 @@ bucket_definitions: ) ).toThrowError(/Cannot set priority multiple times/); }); + + test('dynamic bucket definitions list', () => { + const rules = SqlSyncRules.fromYaml( + ` +bucket_definitions: + mybucket: + parameters: + - SELECT request.user_id() as user_id + - SELECT id as user_id FROM users WHERE id = request.user_id() + data: [] + + by_list: + parameters: + - SELECT id as list_id FROM lists WHERE owner_id = request.user_id() + data: [] + + admin_only: + parameters: + - SELECT id as list_id FROM lists WHERE (request.jwt() ->> 'is_admin' IS NULL) + data: [] + `, + PARSE_OPTIONS + ); + const bucket = rules.bucket_descriptors[0]; + expect(bucket.bucket_parameters).toEqual(['user_id']); + + expect(rules.getBucketParameterQuerier(normalizeTokenParameters({ user_id: 'user1' }))).toMatchObject({ + hasDynamicBuckets: true, + dynamicBucketDefinitions: new Set([ + 'mybucket', + 'by_list', + // These are not filtered out yet, due to how the lookups are structured internally + 'admin_only' + ]), + staticBuckets: [ + { + bucket: 'mybucket["user1"]', + priority: 3 + } + ] + }); + }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8581c3f16..42d713dfd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -754,6 +754,9 @@ importers: jose: specifier: ^4.15.1 version: 4.15.9 + ws: + specifier: ^8.18.0 + version: 8.18.0 yaml: specifier: ^2.5.0 version: 2.5.0 @@ -761,6 +764,9 @@ importers: '@types/node': specifier: ^22.13.1 version: 22.13.1 + '@types/ws': + specifier: ~8.2.0 + version: 8.2.3 typescript: specifier: ^5.7.3 version: 5.7.3 @@ -4992,7 +4998,7 @@ snapshots: '@opentelemetry/semantic-conventions': 1.25.1 '@prisma/instrumentation': 5.16.1 '@sentry/core': 8.17.0 - '@sentry/opentelemetry': 8.17.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1) + '@sentry/opentelemetry': 8.17.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.6.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1) '@sentry/types': 8.17.0 '@sentry/utils': 8.17.0 optionalDependencies: @@ -5000,7 +5006,7 @@ snapshots: transitivePeerDependencies: - supports-color - '@sentry/opentelemetry@8.17.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': + '@sentry/opentelemetry@8.17.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.6.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)': dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) diff --git a/test-client/package.json b/test-client/package.json index 1bc5918ba..46e4e3fb8 100644 --- a/test-client/package.json +++ b/test-client/package.json @@ -10,6 +10,7 @@ "scripts": { "fetch-operations": "tsc -b && node dist/bin.js fetch-operations", "generate-token": "tsc -b && node dist/bin.js generate-token", + "concurrent-connections": "tsc -b && node dist/bin.js concurrent-connections", "build": "tsc -b", "clean": "rm -rf ./dist && tsc -b --clean" }, @@ -17,10 +18,12 @@ "@powersync/service-core": "workspace:*", "commander": "^12.0.0", "jose": "^4.15.1", + "ws": "^8.18.0", "yaml": "^2.5.0" }, "devDependencies": { "@types/node": "^22.13.1", + "@types/ws": "~8.2.0", "typescript": "^5.7.3" } } diff --git a/test-client/src/auth.ts b/test-client/src/auth.ts index 4b35c747d..bb471daad 100644 --- a/test-client/src/auth.ts +++ b/test-client/src/auth.ts @@ -9,7 +9,12 @@ export interface CredentialsOptions { sub?: string; } -export async function getCredentials(options: CredentialsOptions): Promise<{ endpoint: string; token: string }> { +export interface Credentials { + endpoint: string; + token: string; +} + +export async function getCredentials(options: CredentialsOptions): Promise { if (options.token != null) { if (options.endpoint != null) { return { token: options.token, endpoint: options.endpoint }; diff --git a/test-client/src/bin.ts b/test-client/src/bin.ts index 08354f376..1b53d18dc 100644 --- a/test-client/src/bin.ts +++ b/test-client/src/bin.ts @@ -2,6 +2,7 @@ import { program } from 'commander'; import { getCheckpointData } from './client.js'; import { getCredentials } from './auth.js'; import * as jose from 'jose'; +import { concurrentConnections } from './load-testing/load-test.js'; program .command('fetch-operations') @@ -30,4 +31,19 @@ program console.log(credentials.token); }); +program + .command('concurrent-connections') + .description('Load test the service by connecting a number of concurrent clients') + .option('-t, --token [token]', 'JWT to use for authentication') + .option('-e, --endpoint [endpoint]', 'endpoint URI') + .option('-c, --config [config]', 'path to powersync.yaml, to auto-generate a token from a HS256 key') + .option('-u, --sub [sub]', 'sub field for auto-generated token') + .option('-n, --num-clients [num-clients]', 'number of clients to connect') + .option('-m, --mode [mode]', 'http or websocket') + .action(async (options) => { + const credentials = await getCredentials(options); + + await concurrentConnections(credentials, options['numClients'] ?? 10, options.mode ?? 'http'); + }); + await program.parseAsync(); diff --git a/test-client/src/load-testing/http-worker.ts b/test-client/src/load-testing/http-worker.ts new file mode 100644 index 000000000..895289157 --- /dev/null +++ b/test-client/src/load-testing/http-worker.ts @@ -0,0 +1,59 @@ +import { ndjsonStream } from '../ndjson.js'; + +import { parentPort, workerData } from 'worker_threads'; + +if (parentPort == null) { + throw new Error(`Can only run this script in a worker_thread`); +} + +const { i, url, token } = workerData; + +const response = await fetch(url + '/sync/stream', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Token ${token}` + }, + body: JSON.stringify({ + raw_data: true, + include_checksums: true + }) +}); +if (!response.ok || response.body == null) { + throw new Error(response.statusText + '\n' + (await response.text())); +} + +let size = 0; +let numOperations = 0; +let lastCheckpointStart = 0; + +for await (let chunk of ndjsonStream(response.body)) { + size += JSON.stringify(chunk).length; + if (chunk?.checkpoint_complete) { + const duration = performance.now() - lastCheckpointStart; + console.log( + new Date().toISOString(), + i, + `checkpoint_complete op_id: ${chunk.checkpoint_complete.last_op_id}, ops: ${numOperations}, bytes: ${size}, duration: ${duration.toFixed(0)}ms` + ); + } else if (chunk?.data) { + numOperations += chunk.data.data.length; + } else if (chunk?.checkpoint) { + lastCheckpointStart = performance.now(); + console.log(new Date().toISOString(), i, `checkpoint buckets: ${chunk.checkpoint.buckets.length}`); + } else if (chunk?.checkpoint_diff) { + lastCheckpointStart = performance.now(); + console.log( + new Date().toISOString(), + i, + `checkpoint_diff removed_buckets: ${chunk.checkpoint_diff.removed_buckets.length} updated_buckets: ${chunk.checkpoint_diff.updated_buckets.length}` + ); + } else { + const key = Object.keys(chunk)[0]; + if (key != 'token_expires_in' && key != 'data') { + console.log(new Date().toISOString(), i, key); + } + } +} + +parentPort.postMessage({ done: true }); diff --git a/test-client/src/load-testing/load-test.ts b/test-client/src/load-testing/load-test.ts new file mode 100644 index 000000000..1898b1a15 --- /dev/null +++ b/test-client/src/load-testing/load-test.ts @@ -0,0 +1,38 @@ +import { Worker } from 'worker_threads'; +import { Credentials } from '../auth.js'; + +export type Mode = 'http' | 'websocket'; + +export async function stream(i: number, credentials: Credentials, mode: Mode) { + const worker = + mode == 'websocket' + ? new Worker(new URL('./rsocket-worker.js', import.meta.url), { + workerData: { i, token: credentials.token, url: credentials.endpoint.replace(/^http/, 'ws') } + }) + : new Worker(new URL('./http-worker.js', import.meta.url), { + workerData: { i, token: credentials.token, url: credentials.endpoint } + }); + await new Promise((resolve, reject) => { + worker.on('message', (event) => resolve(event)); + worker.on('error', (err) => reject(err)); + }); + worker.terminate(); +} + +export async function streamForever(i: number, credentials: Credentials, mode: Mode) { + while (true) { + try { + await stream(i, credentials, mode); + console.log(new Date().toISOString(), i, 'Stream ended'); + } catch (e) { + console.error(new Date().toISOString(), i, e.message); + await new Promise((resolve) => setTimeout(resolve, 1000 + Math.random())); + } + } +} + +export async function concurrentConnections(credentials: Credentials, numClients: number, mode: Mode) { + for (let i = 0; i < numClients; i++) { + streamForever(i, credentials, mode); + } +} diff --git a/test-client/src/load-testing/rsocket-worker.ts b/test-client/src/load-testing/rsocket-worker.ts new file mode 100644 index 000000000..17b99a773 --- /dev/null +++ b/test-client/src/load-testing/rsocket-worker.ts @@ -0,0 +1,84 @@ +import { RSocketConnector } from 'rsocket-core'; +import { serialize, deserialize } from 'bson'; +import WebSocket from 'ws'; +import { WebsocketClientTransport } from 'rsocket-websocket-client'; + +import { parentPort, workerData } from 'worker_threads'; + +if (parentPort == null) { + throw new Error(`Can only run this script in a worker_thread`); +} + +const { i, url, token } = workerData; + +const client = new RSocketConnector({ + transport: new WebsocketClientTransport({ + url, + wsCreator: (url) => { + return new WebSocket(url) as any; + } + }), + setup: { + dataMimeType: 'application/bson', + metadataMimeType: 'application/bson', + payload: { + data: null, + metadata: Buffer.from( + serialize({ + token: `Token ${token}` + }) + ) + } + } +}); + +const rsocket = await client.connect(); + +const SYNC_QUEUE_REQUEST_N = 2; + +let pendingEventsCount = SYNC_QUEUE_REQUEST_N; +let size = 0; + +const stream = rsocket.requestStream( + { + data: Buffer.from(serialize({})), + metadata: Buffer.from( + serialize({ + path: '/sync/stream' + }) + ) + }, + SYNC_QUEUE_REQUEST_N, // The initial N amount + { + onError: (e) => { + console.error(e); + }, + onNext: (payload) => { + const { data } = payload; + // Less events are now pending + pendingEventsCount--; + if (!data) { + return; + } + + size += data.byteLength; + + const chunk = deserialize(data); + if (chunk?.checkpoint_complete) { + console.log(new Date().toISOString(), i, 'checkpoint', chunk.checkpoint_complete.last_op_id, size); + } else { + console.log(new Date().toISOString(), i, Object.keys(chunk)[0]); + } + + const required = SYNC_QUEUE_REQUEST_N - pendingEventsCount; + if (required > 0) { + stream.request(SYNC_QUEUE_REQUEST_N - pendingEventsCount); + pendingEventsCount = SYNC_QUEUE_REQUEST_N; + } + }, + onComplete: () => { + stream.cancel(); + }, + onExtension: () => {} + } +);