diff --git a/.changeset/perfect-ads-sin.md b/.changeset/perfect-ads-sin.md new file mode 100644 index 000000000..36345013a --- /dev/null +++ b/.changeset/perfect-ads-sin.md @@ -0,0 +1,13 @@ +--- +'@powersync/service-module-postgres-storage': minor +'@powersync/service-module-mongodb-storage': minor +'@powersync/service-module-postgres': minor +'@powersync/service-module-mongodb': minor +'@powersync/service-core': minor +'@powersync/lib-services-framework': minor +'@powersync/service-jpgwire': minor +'@powersync/service-errors': patch +'@powersync/service-module-mysql': patch +--- + +Implement resuming of initial replication snapshots. diff --git a/libs/lib-services/src/logger/Logger.ts b/libs/lib-services/src/logger/Logger.ts index f49f5e10e..81f1eacf7 100644 --- a/libs/lib-services/src/logger/Logger.ts +++ b/libs/lib-services/src/logger/Logger.ts @@ -1,8 +1,22 @@ import winston from 'winston'; +const prefixFormat = winston.format((info) => { + if (info.prefix) { + info.message = `${info.prefix}${info.message}`; + } + return { + ...info, + prefix: undefined + }; +}); + export namespace LogFormat { - export const development = winston.format.combine(winston.format.colorize({ level: true }), winston.format.simple()); - export const production = winston.format.combine(winston.format.timestamp(), winston.format.json()); + export const development = winston.format.combine( + prefixFormat(), + winston.format.colorize({ level: true }), + winston.format.simple() + ); + export const production = winston.format.combine(prefixFormat(), winston.format.timestamp(), winston.format.json()); } export const logger = winston.createLogger(); diff --git a/libs/lib-services/src/migrations/AbstractMigrationAgent.ts b/libs/lib-services/src/migrations/AbstractMigrationAgent.ts index 3be96acf3..fa063d360 100644 --- a/libs/lib-services/src/migrations/AbstractMigrationAgent.ts +++ b/libs/lib-services/src/migrations/AbstractMigrationAgent.ts @@ -70,7 +70,7 @@ export abstract class AbstractMigrationAgent implements storage.BucketStorageBatch { + private logger: Logger; + private readonly client: mongo.MongoClient; public readonly db: PowerSyncMongo; public readonly session: mongo.ClientSession; @@ -65,6 +79,7 @@ export class MongoBucketBatch private batch: OperationBatch | null = null; private write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; + private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; /** * Last LSN received associated with a checkpoint. @@ -86,6 +101,7 @@ export class MongoBucketBatch constructor(options: MongoBucketBatchOptions) { super(); + this.logger = options.logger ?? defaultLogger; this.client = options.db.client; this.db = options.db; this.group_id = options.groupId; @@ -96,6 +112,7 @@ export class MongoBucketBatch this.sync_rules = options.syncRules; this.storeCurrentData = options.storeCurrentData; this.skipExistingRows = options.skipExistingRows; + this.markRecordUnavailable = options.markRecordUnavailable; this.batch = new OperationBatch(); this.persisted_op = options.keepaliveOp ?? null; @@ -232,7 +249,9 @@ export class MongoBucketBatch current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc); } - let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize); + let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, { + logger: this.logger + }); for (let op of b) { if (resumeBatch) { @@ -311,11 +330,18 @@ export class MongoBucketBatch // Not an error if we re-apply a transaction existing_buckets = []; existing_lookups = []; - // Log to help with debugging if there was a consistency issue if (this.storeCurrentData) { - logger.warn( - `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` - ); + if (this.markRecordUnavailable != null) { + // This will trigger a "resnapshot" of the record. + // This is not relevant if storeCurrentData is false, since we'll get the full row + // directly in the replication stream. + this.markRecordUnavailable(record); + } else { + // Log to help with debugging if there was a consistency issue + this.logger.warn( + `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` + ); + } } } else { existing_buckets = result.buckets; @@ -332,8 +358,8 @@ export class MongoBucketBatch existing_buckets = []; existing_lookups = []; // Log to help with debugging if there was a consistency issue - if (this.storeCurrentData) { - logger.warn( + if (this.storeCurrentData && this.markRecordUnavailable == null) { + this.logger.warn( `Cannot find previous record for delete on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` ); } @@ -430,7 +456,7 @@ export class MongoBucketBatch } } ); - logger.error( + this.logger.error( `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}` ); } @@ -470,7 +496,7 @@ export class MongoBucketBatch } } ); - logger.error( + this.logger.error( `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${after.id}: ${error.error}` ); } @@ -524,7 +550,7 @@ export class MongoBucketBatch if (e instanceof mongo.MongoError && e.hasErrorLabel('TransientTransactionError')) { // Likely write conflict caused by concurrent write stream replicating } else { - logger.warn('Transaction error', e as Error); + this.logger.warn('Transaction error', e as Error); } await timers.setTimeout(Math.random() * 50); throw e; @@ -549,7 +575,7 @@ export class MongoBucketBatch await this.withTransaction(async () => { flushTry += 1; if (flushTry % 10 == 0) { - logger.info(`${this.slot_name} ${description} - try ${flushTry}`); + this.logger.info(`${description} - try ${flushTry}`); } if (flushTry > 20 && Date.now() > lastTry) { throw new ServiceError(ErrorCode.PSYNC_S1402, 'Max transaction tries exceeded'); @@ -619,13 +645,13 @@ export class MongoBucketBatch if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { // When re-applying transactions, don't create a new checkpoint until // we are past the last transaction. - logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); + this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); // Cannot create a checkpoint yet - return false return false; } if (lsn < this.no_checkpoint_before_lsn) { if (Date.now() - this.lastWaitingLogThottled > 5_000) { - logger.info( + this.logger.info( `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}` ); this.lastWaitingLogThottled = Date.now(); @@ -677,7 +703,8 @@ export class MongoBucketBatch _id: this.group_id }, { - $set: update + $set: update, + $unset: { snapshot_lsn: 1 } }, { session: this.session } ); @@ -699,7 +726,7 @@ export class MongoBucketBatch if (this.persisted_op != null) { // The commit may have been skipped due to "no_checkpoint_before_lsn". // Apply it now if relevant - logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); + this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); return await this.commit(lsn); } @@ -713,7 +740,8 @@ export class MongoBucketBatch snapshot_done: true, last_fatal_error: null, last_keepalive_ts: new Date() - } + }, + $unset: { snapshot_lsn: 1 } }, { session: this.session } ); @@ -722,6 +750,22 @@ export class MongoBucketBatch return true; } + async setSnapshotLsn(lsn: string): Promise { + const update: Partial = { + snapshot_lsn: lsn + }; + + await this.db.sync_rules.updateOne( + { + _id: this.group_id + }, + { + $set: update + }, + { session: this.session } + ); + } + async save(record: storage.SaveOptions): Promise { const { after, before, sourceTable, tag } = record; for (const event of this.getTableEvents(sourceTable)) { @@ -746,7 +790,7 @@ export class MongoBucketBatch return null; } - logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); + this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); this.batch ??= new OperationBatch(); this.batch.push(new RecordOperation(record)); @@ -817,7 +861,7 @@ export class MongoBucketBatch session: session }); const batch = await cursor.toArray(); - const persistedBatch = new PersistedBatch(this.group_id, 0); + const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger }); for (let value of batch) { persistedBatch.saveBucketData({ @@ -847,6 +891,37 @@ export class MongoBucketBatch return last_op!; } + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const copy = table.clone(); + const snapshotStatus = { + totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, + replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, + lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null + }; + copy.snapshotStatus = snapshotStatus; + + await this.withTransaction(async () => { + await this.db.source_tables.updateOne( + { _id: table.id }, + { + $set: { + snapshot_status: { + last_key: snapshotStatus.lastKey == null ? null : new bson.Binary(snapshotStatus.lastKey), + total_estimated_count: snapshotStatus.totalEstimatedCount, + replicated_count: snapshotStatus.replicatedCount + } + } + }, + { session: this.session } + ); + }); + + return copy; + } + async markSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn: string) { const session = this.session; const ids = tables.map((table) => table.id); @@ -857,6 +932,9 @@ export class MongoBucketBatch { $set: { snapshot_done: true + }, + $unset: { + snapshot_status: 1 } }, { session } @@ -880,17 +958,8 @@ export class MongoBucketBatch } }); return tables.map((table) => { - const copy = new storage.SourceTable( - table.id, - table.connectionTag, - table.objectId, - table.schema, - table.table, - table.replicaIdColumns, - table.snapshotComplete - ); - copy.syncData = table.syncData; - copy.syncParameters = table.syncParameters; + const copy = table.clone(); + copy.snapshotComplete = true; return copy; }); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 354b4aab5..e46c2bf17 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -116,9 +116,15 @@ export class MongoSyncBucketStorage const doc = await this.db.sync_rules.findOne( { _id: this.group_id }, { - projection: { last_checkpoint: 1, last_checkpoint_lsn: 1 } + projection: { last_checkpoint: 1, last_checkpoint_lsn: 1, snapshot_done: 1 } } ); + if (!doc?.snapshot_done) { + return { + checkpoint: 0n, + lsn: null + }; + } return { checkpoint: doc?.last_checkpoint ?? 0n, lsn: doc?.last_checkpoint_lsn ?? null @@ -138,6 +144,7 @@ export class MongoSyncBucketStorage const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; await using batch = new MongoBucketBatch({ + logger: options.logger, db: this.db, syncRules: this.sync_rules.parsed(options).sync_rules, groupId: this.group_id, @@ -146,7 +153,8 @@ export class MongoSyncBucketStorage noCheckpointBeforeLsn: doc?.no_checkpoint_before ?? options.zeroLSN, keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null, storeCurrentData: options.storeCurrentData, - skipExistingRows: options.skipExistingRows ?? false + skipExistingRows: options.skipExistingRows ?? false, + markRecordUnavailable: options.markRecordUnavailable }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); @@ -193,7 +201,8 @@ export class MongoSyncBucketStorage table_name: table, replica_id_columns: null, replica_id_columns2: columns, - snapshot_done: false + snapshot_done: false, + snapshot_status: undefined }; await col.insertOne(doc, { session }); @@ -210,6 +219,14 @@ export class MongoSyncBucketStorage sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable); sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable); sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; let dropTables: storage.SourceTable[] = []; // Detect tables that are either renamed, or have different replica_id_columns @@ -521,7 +538,8 @@ export class MongoSyncBucketStorage projection: { snapshot_done: 1, last_checkpoint_lsn: 1, - state: 1 + state: 1, + snapshot_lsn: 1 } } ); @@ -531,6 +549,7 @@ export class MongoSyncBucketStorage return { snapshot_done: doc.snapshot_done, + snapshot_lsn: doc.snapshot_lsn ?? null, active: doc.state == 'ACTIVE', checkpoint_lsn: doc.last_checkpoint_lsn }; @@ -572,6 +591,9 @@ export class MongoSyncBucketStorage last_checkpoint_lsn: null, last_checkpoint: null, no_checkpoint_before: null + }, + $unset: { + snapshot_lsn: 1 } }, { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index 800308391..8770fae83 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -3,7 +3,7 @@ import { JSONBig } from '@powersync/service-jsonbig'; import { EvaluatedParameters, EvaluatedRow } from '@powersync/service-sync-rules'; import * as bson from 'bson'; -import { logger } from '@powersync/lib-services-framework'; +import { Logger, logger as defaultLogger } from '@powersync/lib-services-framework'; import { InternalOpId, storage, utils } from '@powersync/service-core'; import { currentBucketKey, MAX_ROW_SIZE } from './MongoBucketBatch.js'; import { MongoIdSequence } from './MongoIdSequence.js'; @@ -46,6 +46,7 @@ const MAX_TRANSACTION_DOC_COUNT = 2_000; * multiple transactions. */ export class PersistedBatch { + logger: Logger; bucketData: mongo.AnyBulkWriteOperation[] = []; bucketParameters: mongo.AnyBulkWriteOperation[] = []; currentData: mongo.AnyBulkWriteOperation[] = []; @@ -63,9 +64,11 @@ export class PersistedBatch { constructor( private group_id: number, - writtenSize: number + writtenSize: number, + options?: { logger?: Logger } ) { this.currentSize = writtenSize; + this.logger = options?.logger ?? defaultLogger; } private incrementBucket(bucket: string, op_id: InternalOpId) { @@ -107,7 +110,7 @@ export class PersistedBatch { // the BSON size is small enough, but the JSON size is too large. // In these cases, we can't store the data, so we skip it, or generate a REMOVE operation if the row // was synced previously. - logger.error(`powersync_${this.group_id} Row ${key} too large: ${recordData.length} bytes. Removing.`); + this.logger.error(`Row ${key} too large: ${recordData.length} bytes. Removing.`); continue; } @@ -312,8 +315,8 @@ export class PersistedBatch { if (options?.oldestUncommittedChange != null) { const replicationLag = Math.round((Date.now() - options.oldestUncommittedChange.getTime()) / 1000); - logger.info( - `powersync_${this.group_id} Flushed ${this.bucketData.length} + ${this.bucketParameters.length} + ${ + this.logger.info( + `Flushed ${this.bucketData.length} + ${this.bucketParameters.length} + ${ this.currentData.length } updates, ${Math.round(this.currentSize / 1024)}kb in ${duration}ms. Last op_id: ${this.debugLastOpId}. Replication lag: ${replicationLag}s`, { @@ -328,8 +331,8 @@ export class PersistedBatch { } ); } else { - logger.info( - `powersync_${this.group_id} Flushed ${this.bucketData.length} + ${this.bucketParameters.length} + ${ + this.logger.info( + `Flushed ${this.bucketData.length} + ${this.bucketParameters.length} + ${ this.currentData.length } updates, ${Math.round(this.currentSize / 1024)}kb in ${duration}ms. Last op_id: ${this.debugLastOpId}`, { diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index de36aaba5..00d95236d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -1,4 +1,4 @@ -import { storage } from '@powersync/service-core'; +import { storage, TableSnapshotStatus } from '@powersync/service-core'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; import * as bson from 'bson'; @@ -73,6 +73,13 @@ export interface SourceTableDocument { replica_id_columns: string[] | null; replica_id_columns2: { name: string; type_oid?: number; type?: string }[] | undefined; snapshot_done: boolean | undefined; + snapshot_status: SourceTableDocumentSnapshotStatus | undefined; +} + +export interface SourceTableDocumentSnapshotStatus { + total_estimated_count: number; + replicated_count: number; + last_key: bson.Binary | null; } /** @@ -110,6 +117,13 @@ export interface SyncRuleDocument { */ snapshot_done: boolean; + /** + * If snapshot_done = false, this may be the lsn at which we started the snapshot. + * + * This can be used for resuming the snapshot after a restart. + */ + snapshot_lsn: string | undefined; + /** * The last consistent checkpoint. * diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 4e6e26eae..a1cbc1ebf 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -2,14 +2,21 @@ import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersyn import { container, DatabaseConnectionError, + logger as defaultLogger, ErrorCode, - logger, + Logger, ReplicationAbortedError, ReplicationAssertionError, - ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; -import { MetricsEngine, SaveOperationTag, SourceEntityDescriptor, SourceTable, storage } from '@powersync/service-core'; +import { + MetricsEngine, + RelationCache, + SaveOperationTag, + SourceEntityDescriptor, + SourceTable, + storage +} from '@powersync/service-core'; import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { MongoLSN } from '../common/MongoLSN.js'; @@ -23,6 +30,7 @@ import { getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; +import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; export interface ChangeStreamOptions { @@ -37,10 +45,18 @@ export interface ChangeStreamOptions { * in closing the stream. To cover that case, reduce the timeout for tests. */ maxAwaitTimeMS?: number; + + /** + * Override snapshotChunkLength for testing. + */ + snapshotChunkLength?: number; + + logger?: Logger; } interface InitResult { needsInitialSync: boolean; + snapshotLsn: string | null; } /** @@ -74,7 +90,7 @@ export class ChangeStream { private abort_signal: AbortSignal; - private relation_cache = new Map(); + private relationCache = new RelationCache(getCacheIdentifier); /** * Time of the oldest uncommitted change, according to the source db. @@ -89,12 +105,17 @@ export class ChangeStream { private checkpointStreamId = new mongo.ObjectId(); + private logger: Logger; + + private snapshotChunkLength: number; + constructor(options: ChangeStreamOptions) { this.storage = options.storage; this.metrics = options.metrics; this.group_id = options.storage.group_id; this.connections = options.connections; this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; + this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; this.client = this.connections.client; this.defaultDb = this.connections.db; this.sync_rules = options.storage.getParsedSyncRules({ @@ -109,6 +130,8 @@ export class ChangeStream { }, { once: true } ); + + this.logger = options.logger ?? defaultLogger; } get stopped() { @@ -123,10 +146,6 @@ export class ChangeStream { return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; } - private get logPrefix() { - return `[powersync_${this.group_id}]`; - } - /** * This resolves a pattern, persists the related metadata, and returns * the resulting SourceTables. @@ -162,7 +181,7 @@ export class ChangeStream { .toArray(); if (!tablePattern.isWildcard && collections.length == 0) { - logger.warn(`${this.logPrefix} Collection ${schema}.${tablePattern.name} not found`); + this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`); } for (let collection of collections) { @@ -182,37 +201,26 @@ export class ChangeStream { async initSlot(): Promise { const status = await this.storage.getStatus(); if (status.snapshot_done && status.checkpoint_lsn) { - logger.info(`${this.logPrefix} Initial replication already done`); - return { needsInitialSync: false }; + this.logger.info(`Initial replication already done`); + return { needsInitialSync: false, snapshotLsn: null }; } - return { needsInitialSync: true }; + return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; } async estimatedCount(table: storage.SourceTable): Promise { - const db = this.client.db(table.schema); - const count = await db.collection(table.table).estimatedDocumentCount(); + const count = await this.estimatedCountNumber(table); return `~${count}`; } - /** - * Start initial replication. - * - * If (partial) replication was done before on this slot, this clears the state - * and starts again from scratch. - */ - async startInitialReplication() { - await this.storage.clear(); - await this.initialReplication(); + async estimatedCountNumber(table: storage.SourceTable): Promise { + const db = this.client.db(table.schema); + return await db.collection(table.table).estimatedDocumentCount(); } - async initialReplication() { - const sourceTables = this.sync_rules.getSourceTables(); - await this.client.connect(); - - // We need to get the snapshot time before taking the initial snapshot. + private async getSnapshotLsn(): Promise { const hello = await this.defaultDb.command({ hello: 1 }); - const snapshotTime = hello.lastWrite?.majorityOpTime?.ts as mongo.Timestamp; + // Basic sanity check if (hello.msg == 'isdbgrid') { throw new ServiceError( ErrorCode.PSYNC_S1341, @@ -223,33 +231,139 @@ export class ChangeStream { ErrorCode.PSYNC_S1342, 'Standalone MongoDB instances are not supported - use a replicaset.' ); - } else if (snapshotTime == null) { - // Not known where this would happen apart from the above cases - throw new ReplicationAssertionError('MongoDB lastWrite timestamp not found.'); } + // Open a change stream just to get a resume token for later use. + // We could use clusterTime from the hello command, but that won't tell us if the + // snapshot isn't valid anymore. + // If we just use the first resumeToken from the stream, we get two potential issues: + // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes + // in source db or other stream issues. + // 2. The first actual change we get may have the same clusterTime, causing us to incorrect + // skip that event. + // Instead, we create a new checkpoint document, and wait until we get that document back in the stream. + // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document + // periodically until the timeout is reached. + + const LSN_TIMEOUT_SECONDS = 60; + const LSN_CREATE_INTERVAL_SECONDS = 1; + + await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + const startTime = performance.now(); + let lastCheckpointCreated = -10_000; + let eventsSeen = 0; + + while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) { + if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) { + await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + lastCheckpointCreated = performance.now(); + } + + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + const changeDocument = await stream.tryNext().catch((e) => { + throw mapChangeStreamError(e); + }); + if (changeDocument == null) { + continue; + } + + const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; + + if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) { + const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; + if (!this.checkpointStreamId.equals(checkpointId)) { + continue; + } + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + return lsn; + } + + eventsSeen += 1; + } + + // Could happen if there is a very large replication lag? + throw new ServiceError( + ErrorCode.PSYNC_S1301, + `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}` + ); + } + + private async validateSnapshotLsn(lsn: string) { + await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + try { + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + await stream.tryNext(); + } catch (e) { + // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though + // we possibly cannot recover from it. + throw mapChangeStreamError(e); + } + } + + async initialReplication(snapshotLsn: string | null) { + const sourceTables = this.sync_rules.getSourceTables(); + await this.client.connect(); + await this.storage.startBatch( - { zeroLSN: MongoLSN.ZERO.comparable, defaultSchema: this.defaultDb.databaseName, storeCurrentData: false }, + { + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + skipExistingRows: true + }, async (batch) => { + if (snapshotLsn == null) { + // First replication attempt - get a snapshot and store the timestamp + snapshotLsn = await this.getSnapshotLsn(); + await batch.setSnapshotLsn(snapshotLsn); + this.logger.info(`Marking snapshot at ${snapshotLsn}`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLsn}`); + // Check that the snapshot is still valid. + await this.validateSnapshotLsn(snapshotLsn); + } + // Start by resolving all tables. // This checks postImage configuration, and that should fail as - // earlier as possible. + // early as possible. let allSourceTables: SourceTable[] = []; for (let tablePattern of sourceTables) { const tables = await this.resolveQualifiedTableNames(batch, tablePattern); allSourceTables.push(...tables); } + let tablesWithStatus: SourceTable[] = []; for (let table of allSourceTables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + let count = await this.estimatedCountNumber(table); + const updated = await batch.updateTableProgress(table, { + totalEstimatedCount: count + }); + tablesWithStatus.push(updated); + this.relationCache.update(updated); + this.logger.info( + `To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` + ); + } + + for (let table of tablesWithStatus) { await this.snapshotTable(batch, table); await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable); await touch(); } - const { comparable: lsn } = new MongoLSN({ timestamp: snapshotTime }); - logger.info(`${this.logPrefix} Snapshot commit at ${snapshotTime.inspect()} / ${lsn}`); - await batch.commit(lsn); + this.logger.info(`Snapshot commit at ${snapshotLsn}`); + await batch.commit(snapshotLsn); } ); } @@ -315,27 +429,38 @@ export class ChangeStream { } private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) { - logger.info(`${this.logPrefix} Replicating ${table.qualifiedName}`); - const estimatedCount = await this.estimatedCount(table); - let at = 0; + const totalEstimatedCount = await this.estimatedCountNumber(table); + let at = table.snapshotStatus?.replicatedCount ?? 0; const db = this.client.db(table.schema); const collection = db.collection(table.table); - const cursor = collection.find({}, { batchSize: 6_000, readConcern: 'majority' }); + await using query = new ChunkedSnapshotQuery({ + collection, + key: table.snapshotStatus?.lastKey, + batchSize: this.snapshotChunkLength + }); + if (query.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } let lastBatch = performance.now(); - // hasNext() is the call that triggers fetching of the next batch, - // then we read it with readBufferedDocuments(). This gives us semi-explicit - // control over the fetching of each batch, and avoids a separate promise per document - let hasNextPromise = cursor.hasNext(); - while (await hasNextPromise) { - const docBatch = cursor.readBufferedDocuments(); + let nextChunkPromise = query.nextChunk(); + while (true) { + const { docs: docBatch, lastKey } = await nextChunkPromise; + if (docBatch.length == 0) { + break; + } + + if (this.abort_signal.aborted) { + throw new ReplicationAbortedError(`Aborted initial replication`); + } + // Pre-fetch next batch, so that we can read and write concurrently - hasNextPromise = cursor.hasNext(); + nextChunkPromise = query.nextChunk(); for (let document of docBatch) { - if (this.abort_signal.aborted) { - throw new ReplicationAbortedError(`Aborted initial replication`); - } - const record = constructAfterRecord(document); // This auto-flushes when the batch reaches its size limit @@ -349,20 +474,27 @@ export class ChangeStream { }); } + // Important: flush before marking progress + await batch.flush(); at += docBatch.length; this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); + + table = await batch.updateTableProgress(table, { + lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + this.relationCache.update(table); + const duration = performance.now() - lastBatch; lastBatch = performance.now(); - logger.info( - `${this.logPrefix} Replicating ${table.qualifiedName} ${at}/${estimatedCount} in ${duration.toFixed(0)}ms` + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms` ); await touch(); } // In case the loop was interrupted, make sure we await the last promise. - await hasNextPromise; - - await batch.flush(); - logger.info(`${this.logPrefix} Replicated ${at} documents for ${table.qualifiedName}`); + await nextChunkPromise; } private async getRelation( @@ -370,8 +502,7 @@ export class ChangeStream { descriptor: SourceEntityDescriptor, options: { snapshot: boolean } ): Promise { - const cacheId = getCacheIdentifier(descriptor); - const existing = this.relation_cache.get(cacheId); + const existing = this.relationCache.get(descriptor); if (existing != null) { return existing; } @@ -411,7 +542,7 @@ export class ChangeStream { collMod: collectionInfo.name, changeStreamPreAndPostImages: { enabled: true } }); - logger.info(`${this.logPrefix} Enabled postImages on ${db}.${collectionInfo.name}`); + this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`); } else if (!enabled) { throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`); } @@ -437,12 +568,12 @@ export class ChangeStream { entity_descriptor: descriptor, sync_rules: this.sync_rules }); - this.relation_cache.set(getCacheIdentifier(descriptor), result.table); + this.relationCache.update(result.table); // Drop conflicting collections. // This is generally not expected for MongoDB source dbs, so we log an error. if (result.dropTables.length > 0) { - logger.error( + this.logger.error( `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` ); await batch.drop(result.dropTables); @@ -454,7 +585,7 @@ export class ChangeStream { // 3. The table is used in sync rules. const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; if (shouldSnapshot) { - logger.info(`${this.logPrefix} New collection: ${descriptor.schema}.${descriptor.name}`); + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); // Truncate this table, in case a previous snapshot was interrupted. await batch.truncate([result.table]); @@ -474,7 +605,7 @@ export class ChangeStream { change: mongo.ChangeStreamDocument ): Promise { if (!table.syncAny) { - logger.debug(`${this.logPrefix} Collection ${table.qualifiedName} not used in sync rules - skipping`); + this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`); return null; } @@ -537,7 +668,11 @@ export class ChangeStream { const result = await this.initSlot(); await this.setupCheckpointsCollection(); if (result.needsInitialSync) { - await this.startInitialReplication(); + if (result.snapshotLsn == null) { + // Snapshot LSN is not present, so we need to start replication from scratch. + await this.storage.clear(); + } + await this.initialReplication(result.snapshotLsn); } } @@ -556,81 +691,99 @@ export class ChangeStream { } } + private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) { + const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null; + const startAfter = lastLsn?.timestamp; + const resumeAfter = lastLsn?.resumeToken; + + const filters = this.getSourceNamespaceFilters(); + + const pipeline: mongo.Document[] = [ + { + $match: filters.$match + }, + { $changeStreamSplitLargeEvent: {} } + ]; + + let fullDocument: 'required' | 'updateLookup'; + + if (this.usePostImages) { + // 'read_only' or 'auto_configure' + // Configuration happens during snapshot, or when we see new + // collections. + fullDocument = 'required'; + } else { + fullDocument = 'updateLookup'; + } + + const streamOptions: mongo.ChangeStreamOptions = { + showExpandedEvents: true, + maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS, + fullDocument: fullDocument + }; + + /** + * Only one of these options can be supplied at a time. + */ + if (resumeAfter) { + streamOptions.resumeAfter = resumeAfter; + } else { + // Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the + // case if we have an old one. + streamOptions.startAtOperationTime = startAfter; + } + + let stream: mongo.ChangeStream; + if (filters.multipleDatabases) { + // Requires readAnyDatabase@admin on Atlas + stream = this.client.watch(pipeline, streamOptions); + } else { + // Same general result, but requires less permissions than the above + stream = this.defaultDb.watch(pipeline, streamOptions); + } + + this.abort_signal.addEventListener('abort', () => { + stream.close(); + }); + + return { + stream, + filters, + [Symbol.asyncDispose]: async () => { + return stream.close(); + } + }; + } + async streamChangesInternal() { // Auto-activate as soon as initial replication is done await this.storage.autoActivate(); await this.storage.startBatch( - { zeroLSN: MongoLSN.ZERO.comparable, defaultSchema: this.defaultDb.databaseName, storeCurrentData: false }, + { + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false + }, async (batch) => { const { lastCheckpointLsn } = batch; - const lastLsn = lastCheckpointLsn ? MongoLSN.fromSerialized(lastCheckpointLsn) : null; - if (lastLsn == null) { - throw new ServiceAssertionError(`No resume timestamp found`); - } - const startAfter = lastLsn.timestamp; - const resumeAfter = lastLsn.resumeToken; + const lastLsn = MongoLSN.fromSerialized(lastCheckpointLsn!); + const startAfter = lastLsn?.timestamp; + // It is normal for this to be a minute or two old when there is a low volume // of ChangeStream events. const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000); - logger.info( - `${this.logPrefix} Resume streaming at ${startAfter.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s` - ); - - const filters = this.getSourceNamespaceFilters(); - - const pipeline: mongo.Document[] = [ - { - $match: filters.$match - }, - { $changeStreamSplitLargeEvent: {} } - ]; - - let fullDocument: 'required' | 'updateLookup'; - - if (this.usePostImages) { - // 'read_only' or 'auto_configure' - // Configuration happens during snapshot, or when we see new - // collections. - fullDocument = 'required'; - } else { - fullDocument = 'updateLookup'; - } - - const streamOptions: mongo.ChangeStreamOptions = { - showExpandedEvents: true, - maxAwaitTimeMS: this.maxAwaitTimeMS, - fullDocument: fullDocument - }; - - /** - * Only one of these options can be supplied at a time. - */ - if (resumeAfter) { - streamOptions.resumeAfter = resumeAfter; - } else { - streamOptions.startAtOperationTime = startAfter; - } - - let stream: mongo.ChangeStream; - if (filters.multipleDatabases) { - // Requires readAnyDatabase@admin on Atlas - stream = this.client.watch(pipeline, streamOptions); - } else { - // Same general result, but requires less permissions than the above - stream = this.defaultDb.watch(pipeline, streamOptions); - } + this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`); + await using streamManager = this.openChangeStream({ lsn: lastCheckpointLsn }); + const { stream, filters } = streamManager; if (this.abort_signal.aborted) { - stream.close(); + await stream.close(); return; } - this.abort_signal.addEventListener('abort', () => { - stream.close(); - }); - // Always start with a checkpoint. // This helps us to clear errors when restarting, even if there is // no data to replicate. @@ -681,8 +834,8 @@ export class ChangeStream { lastEmptyResume = performance.now(); // Log the token update. This helps as a general "replication is still active" message in the logs. // This token would typically be around 10s behind. - logger.info( - `${this.logPrefix} Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}` + this.logger.info( + `Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}` ); this.isStartingReplication = false; } @@ -737,8 +890,8 @@ export class ChangeStream { if (!flexDbNameWorkaroundLogged) { flexDbNameWorkaroundLogged = true; - logger.warn( - `${this.logPrefix} Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` + this.logger.warn( + `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` ); } } @@ -834,7 +987,7 @@ export class ChangeStream { }); if (table.syncAny) { await batch.drop([table]); - this.relation_cache.delete(getCacheIdentifier(rel)); + this.relationCache.delete(table); } } else if (changeDocument.operationType == 'rename') { const relFrom = getMongoRelation(changeDocument.ns); @@ -845,7 +998,7 @@ export class ChangeStream { }); if (tableFrom.syncAny) { await batch.drop([tableFrom]); - this.relation_cache.delete(getCacheIdentifier(relFrom)); + this.relationCache.delete(relFrom); } // Here we do need to snapshot the new table const collection = await this.getCollectionInfo(relTo.schema, relTo.name); diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts index 9498cfdf5..ff47c159b 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts @@ -1,4 +1,4 @@ -import { container } from '@powersync/lib-services-framework'; +import { container, logger as defaultLogger } from '@powersync/lib-services-framework'; import { replication } from '@powersync/service-core'; import { ChangeStream, ChangeStreamInvalidatedError } from './ChangeStream.js'; @@ -15,6 +15,8 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ constructor(options: ChangeStreamReplicationJobOptions) { super(options); this.connectionFactory = options.connectionFactory; + // We use a custom formatter to process the prefix + this.logger = defaultLogger.child({ prefix: `[powersync_${this.storage.group_id}] ` }); } async cleanUp(): Promise { @@ -72,7 +74,8 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ abort_signal: this.abortController.signal, storage: this.options.storage, metrics: this.options.metrics, - connections: connectionManager + connections: connectionManager, + logger: this.logger }); this.lastStream = stream; await stream.replicate(); @@ -80,7 +83,7 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ if (this.abortController.signal.aborted) { return; } - this.logger.error(`${this.slotName} Replication error`, e); + this.logger.error(`Replication error`, e); if (e.cause != null) { // Without this additional log, the cause may not be visible in the logs. this.logger.error(`cause`, e.cause); diff --git a/modules/module-mongodb/src/replication/MongoRelation.ts b/modules/module-mongodb/src/replication/MongoRelation.ts index 736ff149d..afaa09cff 100644 --- a/modules/module-mongodb/src/replication/MongoRelation.ts +++ b/modules/module-mongodb/src/replication/MongoRelation.ts @@ -20,7 +20,10 @@ export function getMongoRelation(source: mongo.ChangeStreamNameSpace): storage.S /** * For in-memory cache only. */ -export function getCacheIdentifier(source: storage.SourceEntityDescriptor): string { +export function getCacheIdentifier(source: storage.SourceEntityDescriptor | storage.SourceTable): string { + if (source instanceof storage.SourceTable) { + return `${source.schema}.${source.table}`; + } return `${source.schema}.${source.name}`; } diff --git a/modules/module-mongodb/src/replication/MongoSnapshotQuery.ts b/modules/module-mongodb/src/replication/MongoSnapshotQuery.ts new file mode 100644 index 000000000..c0ff8dc95 --- /dev/null +++ b/modules/module-mongodb/src/replication/MongoSnapshotQuery.ts @@ -0,0 +1,59 @@ +import { mongo } from '@powersync/lib-service-mongodb'; +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { bson } from '@powersync/service-core'; + +/** + * Performs a collection snapshot query, chunking by ranges of _id. + * + * This may miss some rows if they are modified during the snapshot query. + * In that case, the change stream replication will pick up those rows afterwards. + */ +export class ChunkedSnapshotQuery implements AsyncDisposable { + lastKey: any = null; + private lastCursor: mongo.FindCursor | null = null; + private collection: mongo.Collection; + private batchSize: number; + + public constructor(options: { collection: mongo.Collection; batchSize: number; key?: Uint8Array | null }) { + this.lastKey = options.key ? bson.deserialize(options.key, { useBigInt64: true })._id : null; + this.lastCursor = null; + this.collection = options.collection; + this.batchSize = options.batchSize; + } + + async nextChunk(): Promise<{ docs: mongo.Document[]; lastKey: Uint8Array } | { docs: []; lastKey: null }> { + let cursor = this.lastCursor; + let newCursor = false; + if (cursor == null || cursor.closed) { + const filter: mongo.Filter = this.lastKey == null ? {} : { _id: { $gt: this.lastKey as any } }; + cursor = this.collection.find(filter, { + batchSize: this.batchSize, + readConcern: 'majority', + limit: this.batchSize, + sort: { _id: 1 } + }); + newCursor = true; + } + const hasNext = await cursor.hasNext(); + if (!hasNext) { + this.lastCursor = null; + if (newCursor) { + return { docs: [], lastKey: null }; + } else { + return this.nextChunk(); + } + } + const docBatch = cursor.readBufferedDocuments(); + this.lastCursor = cursor; + if (docBatch.length == 0) { + throw new ReplicationAssertionError(`MongoDB snapshot query returned an empty batch, but hasNext() was true.`); + } + const lastKey = docBatch[docBatch.length - 1]._id; + this.lastKey = lastKey; + return { docs: docBatch, lastKey: bson.serialize({ _id: lastKey }) }; + } + + async [Symbol.asyncDispose](): Promise { + await this.lastCursor?.close(); + } +} diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index e363e8976..2aa524e77 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -8,8 +8,7 @@ import { test_utils } from '@powersync/service-core-tests'; import { PostImagesOption } from '@module/types/types.js'; import { ChangeStreamTestContext } from './change_stream_utils.js'; -import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; +import { describeWithStorage } from './util.js'; const BASIC_SYNC_RULES = ` bucket_definitions: @@ -18,12 +17,8 @@ bucket_definitions: - SELECT _id as id, description FROM "test_data" `; -describe.skipIf(!env.TEST_MONGO_STORAGE)('change stream - mongodb', { timeout: 20_000 }, function () { - defineChangeStreamTests(INITIALIZED_MONGO_STORAGE_FACTORY); -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('change stream - postgres', { timeout: 20_000 }, function () { - defineChangeStreamTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); +describe('change stream', () => { + describeWithStorage({ timeout: 20_000 }, defineChangeStreamTests); }); function defineChangeStreamTests(factory: storage.TestStorageFactory) { @@ -97,7 +92,9 @@ bucket_definitions: }); test('updateLookup - no fullDocument available', async () => { - await using context = await ChangeStreamTestContext.open(factory, { postImages: PostImagesOption.OFF }); + await using context = await ChangeStreamTestContext.open(factory, { + mongoOptions: { postImages: PostImagesOption.OFF } + }); const { db, client } = context; await context.updateSyncRules(` bucket_definitions: @@ -141,7 +138,9 @@ bucket_definitions: test('postImages - autoConfigure', async () => { // Similar to the above test, but with postImages enabled. // This resolves the consistency issue. - await using context = await ChangeStreamTestContext.open(factory, { postImages: PostImagesOption.AUTO_CONFIGURE }); + await using context = await ChangeStreamTestContext.open(factory, { + mongoOptions: { postImages: PostImagesOption.AUTO_CONFIGURE } + }); const { db, client } = context; await context.updateSyncRules(` bucket_definitions: @@ -187,7 +186,9 @@ bucket_definitions: test('postImages - on', async () => { // Similar to postImages - autoConfigure, but does not auto-configure. // changeStreamPreAndPostImages must be manually configured. - await using context = await ChangeStreamTestContext.open(factory, { postImages: PostImagesOption.READ_ONLY }); + await using context = await ChangeStreamTestContext.open(factory, { + mongoOptions: { postImages: PostImagesOption.READ_ONLY } + }); const { db, client } = context; await context.updateSyncRules(` bucket_definitions: @@ -432,7 +433,9 @@ bucket_definitions: }); test('postImages - new collection with postImages enabled', async () => { - await using context = await ChangeStreamTestContext.open(factory, { postImages: PostImagesOption.AUTO_CONFIGURE }); + await using context = await ChangeStreamTestContext.open(factory, { + mongoOptions: { postImages: PostImagesOption.AUTO_CONFIGURE } + }); const { db } = context; await context.updateSyncRules(` bucket_definitions: @@ -463,7 +466,9 @@ bucket_definitions: }); test('postImages - new collection with postImages disabled', async () => { - await using context = await ChangeStreamTestContext.open(factory, { postImages: PostImagesOption.AUTO_CONFIGURE }); + await using context = await ChangeStreamTestContext.open(factory, { + mongoOptions: { postImages: PostImagesOption.AUTO_CONFIGURE } + }); const { db } = context; await context.updateSyncRules(` bucket_definitions: diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index a7a662258..cc8d17732 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -4,9 +4,11 @@ import { createCoreReplicationMetrics, initializeCoreReplicationMetrics, InternalOpId, + OplogEntry, ProtocolOpId, ReplicationCheckpoint, - SyncRulesBucketStorage + SyncRulesBucketStorage, + TestStorageOptions } from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; @@ -29,17 +31,27 @@ export class ChangeStreamTestContext { * * This configures all the context, and tears it down afterwards. */ - static async open(factory: () => Promise, options?: Partial) { - const f = await factory(); - const connectionManager = new MongoManager({ ...TEST_CONNECTION_OPTIONS, ...options }); + static async open( + factory: (options: TestStorageOptions) => Promise, + options?: { + doNotClear?: boolean; + mongoOptions?: Partial; + streamOptions?: Partial; + } + ) { + const f = await factory({ doNotClear: options?.doNotClear }); + const connectionManager = new MongoManager({ ...TEST_CONNECTION_OPTIONS, ...options?.mongoOptions }); - await clearTestDb(connectionManager.db); - return new ChangeStreamTestContext(f, connectionManager); + if (!options?.doNotClear) { + await clearTestDb(connectionManager.db); + } + return new ChangeStreamTestContext(f, connectionManager, options?.streamOptions); } constructor( public factory: BucketStorageFactory, - public connectionManager: MongoManager + public connectionManager: MongoManager, + private streamOptions?: Partial ) { createCoreReplicationMetrics(METRICS_HELPER.metricsEngine); initializeCoreReplicationMetrics(METRICS_HELPER.metricsEngine); @@ -74,6 +86,16 @@ export class ChangeStreamTestContext { return this.storage!; } + async loadNextSyncRules() { + const syncRules = await this.factory.getNextSyncRulesContent(); + if (syncRules == null) { + throw new Error(`Next sync rules not available`); + } + + this.storage = this.factory.getInstance(syncRules); + return this.storage!; + } + get walStream() { if (this.storage == null) { throw new Error('updateSyncRules() first'); @@ -88,7 +110,8 @@ export class ChangeStreamTestContext { abort_signal: this.abortController.signal, // Specifically reduce this from the default for tests on MongoDB <= 6.0, otherwise it can take // a long time to abort the stream. - maxAwaitTimeMS: 200 + maxAwaitTimeMS: this.streamOptions?.maxAwaitTimeMS ?? 200, + snapshotChunkLength: this.streamOptions?.snapshotChunkLength }; this._walStream = new ChangeStream(options); return this._walStream!; @@ -122,23 +145,25 @@ export class ChangeStreamTestContext { return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } - async getBucketData( - bucket: string, - start?: ProtocolOpId | InternalOpId | undefined, - options?: { timeout?: number; limit?: number; chunkLimitBytes?: number } - ) { + async getBucketData(bucket: string, start?: ProtocolOpId | InternalOpId | undefined, options?: { timeout?: number }) { start ??= 0n; if (typeof start == 'string') { start = BigInt(start); } - let checkpoint = await this.getCheckpoint(options); + const checkpoint = await this.getCheckpoint(options); const map = new Map([[bucket, start]]); - const batch = this.storage!.getBucketDataBatch(checkpoint, map, { - limit: options?.limit, - chunkLimitBytes: options?.chunkLimitBytes - }); - const batches = await test_utils.fromAsync(batch); - return batches[0]?.chunkData.data ?? []; + let data: OplogEntry[] = []; + while (true) { + const batch = this.storage!.getBucketDataBatch(checkpoint, map); + + const batches = await test_utils.fromAsync(batch); + data = data.concat(batches[0]?.chunkData.data ?? []); + if (batches.length == 0 || !batches[0]!.chunkData.has_more) { + break; + } + map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + } + return data; } async getChecksums(buckets: string[], options?: { timeout?: number }) { diff --git a/modules/module-mongodb/test/src/chunked_snapshot.test.ts b/modules/module-mongodb/test/src/chunked_snapshot.test.ts new file mode 100644 index 000000000..a3cd937db --- /dev/null +++ b/modules/module-mongodb/test/src/chunked_snapshot.test.ts @@ -0,0 +1,153 @@ +import { mongo } from '@powersync/lib-service-mongodb'; +import { reduceBucket, TestStorageFactory } from '@powersync/service-core'; +import { METRICS_HELPER } from '@powersync/service-core-tests'; +import { JSONBig } from '@powersync/service-jsonbig'; +import { SqliteJsonValue } from '@powersync/service-sync-rules'; +import * as timers from 'timers/promises'; +import { describe, expect, test } from 'vitest'; +import { ChangeStreamTestContext } from './change_stream_utils.js'; +import { describeWithStorage } from './util.js'; + +describe('chunked snapshots', () => { + describeWithStorage({ timeout: 120_000 }, defineBatchTests); +}); + +function defineBatchTests(factory: TestStorageFactory) { + // This is not as sensitive to the id type as postgres, but we still test a couple of cases + test('chunked snapshot (int32)', async () => { + await testChunkedSnapshot({ + generateId(i) { + return i; + }, + idToSqlite(id: number) { + return BigInt(id); + } + }); + }); + + test('chunked snapshot (Timestamp)', async () => { + await testChunkedSnapshot({ + generateId(i) { + return mongo.Timestamp.fromBits(Math.floor(i / 1000), i % 1000); + }, + idToSqlite(id: mongo.Timestamp) { + return id.toBigInt(); + } + }); + }); + + test('chunked snapshot (compound)', async () => { + await testChunkedSnapshot({ + generateId(i) { + return { a: Math.floor(i / 100), b: i % 100 }; + }, + idToSqlite(id: any) { + return JSON.stringify(id); + } + }); + }); + + test('chunked snapshot (float)', async () => { + await testChunkedSnapshot({ + generateId(i) { + // Floating-point operations are not exact, but it should be consistent at least + return i / Math.PI; + }, + idToSqlite(id: any) { + return id; + } + }); + }); + + async function testChunkedSnapshot(options: { + generateId: (i: number) => any; + idToSqlite?: (id: any) => SqliteJsonValue; + }) { + // This is not quite as much of an edge cases as with Postgres. We do still test that + // updates applied while replicating are applied correctly. + const idToSqlite = options.idToSqlite ?? ((n) => n); + const idToString = (id: any) => String(idToSqlite(id)); + + await using context = await ChangeStreamTestContext.open(factory, { + // We need to use a smaller chunk size here, so that we can run a query in between chunks + streamOptions: { snapshotChunkLength: 100 } + }); + + await context.updateSyncRules(`bucket_definitions: + global: + data: + - SELECT _id as id, description FROM test_data`); + const { db } = context; + + let batch = db.collection('test_data').initializeUnorderedBulkOp(); + + // 1. Start with 2k rows... + for (let i = 1; i <= 2000; i++) { + batch.insert({ _id: options.generateId(i), description: 'foo' }); + } + await batch.execute(); + + // 2. Replicate one batch of rows + // Our "stopping point" here is not quite deterministic. + const p = context.replicateSnapshot(); + + const stopAfter = 100; + const startRowCount = (await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0; + + while (true) { + const count = + ((await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0) - startRowCount; + + if (count >= stopAfter) { + break; + } + await timers.setTimeout(1); + } + + // 3. Update some records + const idA = options.generateId(2000); + const idB = options.generateId(1); + await db.collection('test_data').updateOne({ _id: idA }, { $set: { description: 'bar' } }); + await db.collection('test_data').updateOne({ _id: idB }, { $set: { description: 'baz' } }); + + // 4. Delete + const idC = options.generateId(1999); + await db.collection('test_data').deleteOne({ _id: idC }); + + // 5. Insert + const idD = options.generateId(2001); + await db.collection('test_data').insertOne({ _id: idD, description: 'new' }); + + // 4. Replicate the rest of the table. + await p; + + context.startStreaming(); + + const data = await context.getBucketData('global[]'); + const reduced = reduceBucket(data); + + expect(reduced.find((row) => row.object_id == idToString(idA))?.data).toEqual( + JSONBig.stringify({ + id: idToSqlite(idA), + description: 'bar' + }) + ); + + expect(reduced.find((row) => row.object_id == idToString(idB))?.data).toEqual( + JSONBig.stringify({ + id: idToSqlite(idB), + description: 'baz' + }) + ); + + expect(reduced.find((row) => row.object_id == idToString(idC))).toBeUndefined(); + + expect(reduced.find((row) => row.object_id == idToString(idD))?.data).toEqual( + JSONBig.stringify({ + id: idToSqlite(idD), + description: 'new' + }) + ); + expect(reduced.length).toEqual(2001); + } +} diff --git a/modules/module-mongodb/test/src/resume.test.ts b/modules/module-mongodb/test/src/resume.test.ts index b89622be6..68e9b2540 100644 --- a/modules/module-mongodb/test/src/resume.test.ts +++ b/modules/module-mongodb/test/src/resume.test.ts @@ -1,96 +1,14 @@ -import { MongoLSN, ZERO_LSN } from '@module/common/MongoLSN.js'; - +import { ChangeStreamInvalidatedError } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; import { normalizeConnectionConfig } from '@module/types/types.js'; -import { isMongoServerError, mongo } from '@powersync/lib-service-mongodb'; import { BucketStorageFactory, TestStorageOptions } from '@powersync/service-core'; -import { describe, expect, test, vi } from 'vitest'; +import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; -import { ChangeStreamInvalidatedError } from '@module/replication/ChangeStream.js'; - -describe('mongo lsn', () => { - test('LSN with resume tokens should be comparable', () => { - // Values without a resume token should be comparable - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(1) - }).comparable < - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10) - }).comparable - ).true; - - // Values with resume tokens should correctly compare - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(1), - resume_token: { _data: 'resume1' } - }).comparable < - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10), - resume_token: { _data: 'resume2' } - }).comparable - ).true; - - // The resume token should not affect comparison - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(1), - resume_token: { _data: '2' } - }).comparable < - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10), - resume_token: { _data: '1' } - }).comparable - ).true; - - // Resume token should not be required for comparison - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10), - resume_token: { _data: '2' } - }).comparable > // Switching the order to test this case - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(9) - }).comparable - ).true; +import { describeWithStorage } from './util.js'; - // Comparison should be backwards compatible with old LSNs - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10), - resume_token: { _data: '2' } - }).comparable > ZERO_LSN - ).true; - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10), - resume_token: { _data: '2' } - }).comparable > - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(1) - }).comparable.split('|')[0] // Simulate an old LSN - ).true; - expect( - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(1), - resume_token: { _data: '2' } - }).comparable < - new MongoLSN({ - timestamp: mongo.Timestamp.fromNumber(10) - }).comparable.split('|')[0] // Simulate an old LSN - ).true; - }); -}); - -describe.skipIf(!env.TEST_MONGO_STORAGE)('MongoDB resume - mongo storage', () => { - defineResumeTest(INITIALIZED_MONGO_STORAGE_FACTORY); -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('MongoDB resume - postgres storage', () => { - defineResumeTest(INITIALIZED_POSTGRES_STORAGE_FACTORY); +describe('mongodb resuming replication', () => { + describeWithStorage({}, defineResumeTest); }); function defineResumeTest(factoryGenerator: (options?: TestStorageOptions) => Promise) { @@ -112,13 +30,7 @@ function defineResumeTest(factoryGenerator: (options?: TestStorageOptions) => Pr await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); // Wait for the item above to be replicated. The commit should store a resume token. - await vi.waitFor( - async () => { - const checkpoint = await context.storage?.getCheckpoint(); - expect(MongoLSN.fromSerialized(checkpoint!.lsn!).resumeToken).exist; - }, - { timeout: 5000 } - ); + await context.getCheckpoint(); // Done with this context for now await context.dispose(); @@ -145,6 +57,7 @@ function defineResumeTest(factoryGenerator: (options?: TestStorageOptions) => Pr const activeContent = await factory.getActiveSyncRulesContent(); context2.storage = factory.getInstance(activeContent!); + // If this test times out, it likely didn't throw the expected error here. const error = await context2.startStreaming().catch((ex) => ex); // The ChangeStreamReplicationJob will detect this and throw a ChangeStreamInvalidatedError expect(error).toBeInstanceOf(ChangeStreamInvalidatedError); diff --git a/modules/module-mongodb/test/src/resume_token.test.ts b/modules/module-mongodb/test/src/resume_token.test.ts index aa7a85858..19d0fdccf 100644 --- a/modules/module-mongodb/test/src/resume_token.test.ts +++ b/modules/module-mongodb/test/src/resume_token.test.ts @@ -1,5 +1,6 @@ -import { parseResumeTokenTimestamp } from '@module/common/MongoLSN.js'; -import { describe, expect, it } from 'vitest'; +import { MongoLSN, parseResumeTokenTimestamp, ZERO_LSN } from '@module/common/MongoLSN.js'; +import { mongo } from '@powersync/lib-service-mongodb'; +import { describe, expect, it, test } from 'vitest'; describe('parseResumeTokenTimestamp', () => { it('parses a valid resume token (1)', () => { @@ -33,3 +34,78 @@ describe('parseResumeTokenTimestamp', () => { expect(() => parseResumeTokenTimestamp(resumeToken)).toThrowError(/^Invalid resume token/); }); }); + +describe('mongo lsn', () => { + test('LSN with resume tokens should be comparable', () => { + // Values without a resume token should be comparable + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(1) + }).comparable < + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10) + }).comparable + ).toBe(true); + + // Values with resume tokens should correctly compare + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(1), + resume_token: { _data: 'resume1' } + }).comparable < + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10), + resume_token: { _data: 'resume2' } + }).comparable + ).toBe(true); + + // The resume token should not affect comparison + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(1), + resume_token: { _data: '2' } + }).comparable < + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10), + resume_token: { _data: '1' } + }).comparable + ).toBe(true); + + // Resume token should not be required for comparison + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10), + resume_token: { _data: '2' } + }).comparable > // Switching the order to test this case + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(9) + }).comparable + ).toBe(true); + + // Comparison should be backwards compatible with old LSNs + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10), + resume_token: { _data: '2' } + }).comparable > ZERO_LSN + ).toBe(true); + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10), + resume_token: { _data: '2' } + }).comparable > + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(1) + }).comparable.split('|')[0] // Simulate an old LSN + ).toBe(true); + expect( + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(1), + resume_token: { _data: '2' } + }).comparable < + new MongoLSN({ + timestamp: mongo.Timestamp.fromNumber(10) + }).comparable.split('|')[0] // Simulate an old LSN + ).toBe(true); + }); +}); diff --git a/modules/module-mongodb/test/src/resuming_snapshots.test.ts b/modules/module-mongodb/test/src/resuming_snapshots.test.ts new file mode 100644 index 000000000..589c2a55d --- /dev/null +++ b/modules/module-mongodb/test/src/resuming_snapshots.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, test } from 'vitest'; +import { env } from './env.js'; +import { describeWithStorage } from './util.js'; +import { TestStorageFactory } from '@powersync/service-core'; +import { METRICS_HELPER } from '@powersync/service-core-tests'; +import { ReplicationMetric } from '@powersync/service-types'; +import * as timers from 'node:timers/promises'; +import { ChangeStreamTestContext } from './change_stream_utils.js'; + +describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { + describeWithStorage({ timeout: 240_000 }, function (factory) { + test('resuming initial replication (1)', async () => { + // Stop early - likely to not include deleted row in first replication attempt. + await testResumingReplication(factory, 2000); + }); + test('resuming initial replication (2)', async () => { + // Stop late - likely to include deleted row in first replication attempt. + await testResumingReplication(factory, 8000); + }); + }); +}); + +async function testResumingReplication(factory: TestStorageFactory, stopAfter: number) { + // This tests interrupting and then resuming initial replication. + // We interrupt replication after test_data1 has fully replicated, and + // test_data2 has partially replicated. + // This test relies on interval behavior that is not 100% deterministic: + // 1. We attempt to abort initial replication once a certain number of + // rows have been replicated, but this is not exact. Our only requirement + // is that we have not fully replicated test_data2 yet. + // 2. Order of replication is not deterministic, so which specific rows + // have been / have not been replicated at that point is not deterministic. + // We do allow for some variation in the test results to account for this. + + await using context = await ChangeStreamTestContext.open(factory, { streamOptions: { snapshotChunkLength: 1000 } }); + + await context.updateSyncRules(`bucket_definitions: + global: + data: + - SELECT _id as id, description FROM test_data1 + - SELECT _id as id, description FROM test_data2`); + const { db } = context; + + let batch = db.collection('test_data1').initializeUnorderedBulkOp(); + for (let i = 1; i <= 1000; i++) { + batch.insert({ _id: i, description: 'foo' }); + } + await batch.execute(); + batch = db.collection('test_data2').initializeUnorderedBulkOp(); + for (let i = 1; i <= 10000; i++) { + batch.insert({ _id: i, description: 'foo' }); + } + await batch.execute(); + + const p = context.replicateSnapshot(); + + let done = false; + + const startRowCount = (await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0; + try { + (async () => { + while (!done) { + const count = + ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; + + if (count >= stopAfter) { + break; + } + await timers.setTimeout(1); + } + // This interrupts initial replication + await context.dispose(); + })(); + // This confirms that initial replication was interrupted + await expect(p).rejects.toThrowError(); + done = true; + } finally { + done = true; + } + + // Bypass the usual "clear db on factory open" step. + await using context2 = await ChangeStreamTestContext.open(factory, { + doNotClear: true, + streamOptions: { snapshotChunkLength: 1000 } + }); + + // This delete should be using one of the ids already replicated + await db.collection('test_data2').deleteOne({ _id: 1 as any }); + await db.collection('test_data2').updateOne({ _id: 2 as any }, { $set: { description: 'update1' } }); + await db.collection('test_data2').insertOne({ _id: 10001 as any, description: 'insert1' }); + + await context2.loadNextSyncRules(); + await context2.replicateSnapshot(); + + context2.startStreaming(); + const data = await context2.getBucketData('global[]', undefined, {}); + + const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === '1'); + const updatedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === '2'); + const insertedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === '10001'); + + if (deletedRowOps.length != 0) { + // The deleted row was part of the first replication batch, + // so it is removed by streaming replication. + expect(deletedRowOps.length).toEqual(2); + expect(deletedRowOps[1].op).toEqual('REMOVE'); + } else { + // The deleted row was not part of the first replication batch, + // so it's not in the resulting ops at all. + } + + expect(updatedRowOps.length).toEqual(2); + // description for the first op could be 'foo' or 'update1'. + // We only test the final version. + expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); + + expect(insertedRowOps.length).toEqual(2); + expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); + expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + + // 1000 of test_data1 during first replication attempt. + // N >= 1000 of test_data2 during first replication attempt. + // 10000 - N - 1 + 1 of test_data2 during second replication attempt. + // An additional update during streaming replication (2x total for this row). + // An additional insert during streaming replication (2x total for this row). + // If the deleted row was part of the first replication batch, it's removed by streaming replication. + // This adds 2 ops. + // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. + // However, this is not deterministic. + const expectedCount = 11002 + deletedRowOps.length; + expect(data.length).toEqual(expectedCount); + + const replicatedCount = + ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; + + // With resumable replication, there should be no need to re-replicate anything. + expect(replicatedCount).toEqual(expectedCount); +} diff --git a/modules/module-mongodb/test/src/slow_tests.test.ts b/modules/module-mongodb/test/src/slow_tests.test.ts index 7e365b15c..9e21aaf47 100644 --- a/modules/module-mongodb/test/src/slow_tests.test.ts +++ b/modules/module-mongodb/test/src/slow_tests.test.ts @@ -6,24 +6,10 @@ import { storage } from '@powersync/service-core'; import { ChangeStreamTestContext, setSnapshotHistorySeconds } from './change_stream_utils.js'; import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; - -describe.skipIf(!env.TEST_MONGO_STORAGE)('change stream slow tests - mongodb', { timeout: 60_000 }, function () { - if (env.CI || env.SLOW_TESTS) { - defineSlowTests(INITIALIZED_MONGO_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } -}); +import { describeWithStorage } from './util.js'; -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('change stream slow tests - postgres', { timeout: 60_000 }, function () { - if (env.CI || env.SLOW_TESTS) { - defineSlowTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } +describe.runIf(env.CI || env.SLOW_TESTS)('change stream slow tests', { timeout: 60_000 }, function () { + describeWithStorage({}, defineSlowTests); }); function defineSlowTests(factory: storage.TestStorageFactory) { @@ -96,7 +82,7 @@ bucket_definitions: await snapshotPromise; context.startStreaming(); - const data = await context.getBucketData('global[]', undefined, { limit: 50_000, chunkLimitBytes: 60_000_000 }); + const data = await context.getBucketData('global[]'); const preDocuments = data.filter((d: any) => JSON.parse(d.data! as string).description.startsWith('pre')).length; const updatedDocuments = data.filter((d: any) => diff --git a/modules/module-mongodb/test/src/util.ts b/modules/module-mongodb/test/src/util.ts index c6aa6cc22..db02a5158 100644 --- a/modules/module-mongodb/test/src/util.ts +++ b/modules/module-mongodb/test/src/util.ts @@ -4,7 +4,8 @@ import * as postgres_storage from '@powersync/service-module-postgres-storage'; import * as types from '@module/types/types.js'; import { env } from './env.js'; -import { BSON_DESERIALIZE_DATA_OPTIONS } from '@powersync/service-core'; +import { BSON_DESERIALIZE_DATA_OPTIONS, TestStorageFactory } from '@powersync/service-core'; +import { describe, TestOptions } from 'vitest'; export const TEST_URI = env.MONGO_TEST_DATA_URL; @@ -22,6 +23,16 @@ export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.PostgresTes url: env.PG_STORAGE_TEST_URL }); +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { + describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { + fn(INITIALIZED_MONGO_STORAGE_FACTORY); + }); + + describe.skipIf(!env.TEST_POSTGRES_STORAGE)(`postgres storage`, options, function () { + fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); + }); +} + export async function clearTestDb(db: mongo.Db) { await db.dropDatabase(); } diff --git a/modules/module-mysql/src/replication/BinLogReplicationJob.ts b/modules/module-mysql/src/replication/BinLogReplicationJob.ts index c02f8a0f6..49280c9e3 100644 --- a/modules/module-mysql/src/replication/BinLogReplicationJob.ts +++ b/modules/module-mysql/src/replication/BinLogReplicationJob.ts @@ -1,4 +1,4 @@ -import { container } from '@powersync/lib-services-framework'; +import { container, logger as defaultLogger } from '@powersync/lib-services-framework'; import { replication } from '@powersync/service-core'; import { BinlogConfigurationError, BinLogStream } from './BinLogStream.js'; import { MySQLConnectionManagerFactory } from './MySQLConnectionManagerFactory.js'; @@ -13,6 +13,7 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { constructor(options: BinLogReplicationJobOptions) { super(options); + this.logger = defaultLogger.child({ prefix: `[powersync_${this.options.storage.group_id}] ` }); this.connectionFactory = options.connectionFactory; } @@ -32,7 +33,7 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { replication_slot: this.slot_name } }); - this.logger.error(`Replication failed on ${this.slot_name}`, e); + this.logger.error(`Replication failed`, e); } finally { this.abortController.abort(); } @@ -62,6 +63,7 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { return; } const stream = new BinLogStream({ + logger: this.logger, abortSignal: this.abortController.signal, storage: this.options.storage, metrics: this.options.metrics, @@ -73,7 +75,7 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { if (this.abortController.signal.aborted) { return; } - this.logger.error(`Sync rules ${this.id} Replication error`, e); + this.logger.error(`Replication error`, e); if (e.cause != null) { this.logger.error(`cause`, e.cause); } diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 4272eff11..edb516b0c 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -1,4 +1,9 @@ -import { logger, ReplicationAbortedError, ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { + Logger, + logger as defaultLogger, + ReplicationAbortedError, + ReplicationAssertionError +} from '@powersync/lib-services-framework'; import * as sync_rules from '@powersync/service-sync-rules'; import { @@ -23,6 +28,7 @@ export interface BinLogStreamOptions { storage: storage.SyncRulesBucketStorage; metrics: MetricsEngine; abortSignal: AbortSignal; + logger?: Logger; } interface MysqlRelId { @@ -66,6 +72,8 @@ export class BinLogStream { private tableCache = new Map(); + private logger: Logger; + /** * Time of the oldest uncommitted change, according to the source db. * This is used to determine the replication lag. @@ -78,6 +86,7 @@ export class BinLogStream { private isStartingReplication = true; constructor(private options: BinLogStreamOptions) { + this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.connections = options.connections; this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: this.defaultSchema }); @@ -155,7 +164,7 @@ export class BinLogStream { await this.snapshotTable(connection.connection, batch, result.table); await promiseConnection.query('COMMIT'); } catch (e) { - await tryRollback(promiseConnection); + await this.tryRollback(promiseConnection); throw e; } } finally { @@ -213,7 +222,7 @@ AND table_type = 'BASE TABLE';`, [tablePattern.schema, tablePattern.name] ); if (result[0].length == 0) { - logger.info(`Skipping ${tablePattern.schema}.${name} - no table exists/is not a base table`); + this.logger.info(`Skipping ${tablePattern.schema}.${name} - no table exists/is not a base table`); continue; } @@ -248,7 +257,7 @@ AND table_type = 'BASE TABLE';`, const status = await this.storage.getStatus(); const lastKnowGTID = status.checkpoint_lsn ? common.ReplicatedGTID.fromSerialized(status.checkpoint_lsn) : null; if (status.snapshot_done && status.checkpoint_lsn) { - logger.info(`Initial replication already done.`); + this.logger.info(`Initial replication already done.`); if (lastKnowGTID) { // Check if the binlog is still available. If it isn't we need to snapshot again. @@ -256,7 +265,7 @@ AND table_type = 'BASE TABLE';`, try { const isAvailable = await common.isBinlogStillAvailable(connection, lastKnowGTID.position.filename); if (!isAvailable) { - logger.info( + this.logger.info( `BinLog file ${lastKnowGTID.position.filename} is no longer available, starting initial replication again.` ); } @@ -284,9 +293,9 @@ AND table_type = 'BASE TABLE';`, const connection = await this.connections.getStreamingConnection(); const promiseConnection = (connection as mysql.Connection).promise(); const headGTID = await common.readExecutedGtid(promiseConnection); - logger.info(`Using snapshot checkpoint GTID: '${headGTID}'`); + this.logger.info(`Using snapshot checkpoint GTID: '${headGTID}'`); try { - logger.info(`Starting initial replication`); + this.logger.info(`Starting initial replication`); await promiseConnection.query( 'SET TRANSACTION ISOLATION LEVEL REPEATABLE READ, READ ONLY' ); @@ -295,7 +304,12 @@ AND table_type = 'BASE TABLE';`, const sourceTables = this.syncRules.getSourceTables(); await this.storage.startBatch( - { zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, storeCurrentData: true }, + { + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }, async (batch) => { for (let tablePattern of sourceTables) { const tables = await this.getQualifiedTableNames(batch, tablePattern); @@ -308,10 +322,10 @@ AND table_type = 'BASE TABLE';`, await batch.commit(headGTID.comparable); } ); - logger.info(`Initial replication done`); + this.logger.info(`Initial replication done`); await promiseConnection.query('COMMIT'); } catch (e) { - await tryRollback(promiseConnection); + await this.tryRollback(promiseConnection); throw e; } finally { connection.release(); @@ -323,7 +337,7 @@ AND table_type = 'BASE TABLE';`, batch: storage.BucketStorageBatch, table: storage.SourceTable ) { - logger.info(`Replicating ${table.qualifiedName}`); + this.logger.info(`Replicating ${table.qualifiedName}`); // TODO count rows and log progress at certain batch sizes // MAX_EXECUTION_TIME(0) hint disables execution timeout for this query @@ -366,7 +380,7 @@ AND table_type = 'BASE TABLE';`, // all connections automatically closed, including this one. await this.initReplication(); await this.streamChanges(); - logger.info('BinLogStream has been shut down.'); + this.logger.info('BinLogStream has been shut down'); } catch (e) { await this.storage.reportError(e); throw e; @@ -390,7 +404,12 @@ AND table_type = 'BASE TABLE';`, // This is needed for includeSchema to work correctly. const sourceTables = this.syncRules.getSourceTables(); await this.storage.startBatch( - { zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, storeCurrentData: true }, + { + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }, async (batch) => { for (let tablePattern of sourceTables) { await this.getQualifiedTableNames(batch, tablePattern); @@ -418,7 +437,7 @@ AND table_type = 'BASE TABLE';`, const connection = await this.connections.getConnection(); const { checkpoint_lsn } = await this.storage.getStatus(); if (checkpoint_lsn) { - logger.info(`Existing checkpoint found: ${checkpoint_lsn}`); + this.logger.info(`Existing checkpoint found: ${checkpoint_lsn}`); } const fromGTID = checkpoint_lsn ? common.ReplicatedGTID.fromSerialized(checkpoint_lsn) @@ -434,6 +453,7 @@ AND table_type = 'BASE TABLE';`, // Only listen for changes to tables in the sync rules const includedTables = [...this.tableCache.values()].map((table) => table.table); const binlogListener = new BinLogListener({ + logger: this.logger, includedTables: includedTables, startPosition: binLogPositionState, connectionManager: this.connections, @@ -444,7 +464,7 @@ AND table_type = 'BASE TABLE';`, this.abortSignal.addEventListener( 'abort', () => { - logger.info('Abort signal received, stopping replication...'); + this.logger.info('Abort signal received, stopping replication...'); binlogListener.stop(); }, { once: true } @@ -596,12 +616,12 @@ AND table_type = 'BASE TABLE';`, } return Date.now() - this.oldestUncommittedChange.getTime(); } -} -async function tryRollback(promiseConnection: mysqlPromise.Connection) { - try { - await promiseConnection.query('ROLLBACK'); - } catch (e) { - logger.error('Failed to rollback transaction', e); + async tryRollback(promiseConnection: mysqlPromise.Connection) { + try { + await promiseConnection.query('ROLLBACK'); + } catch (e) { + this.logger.error('Failed to rollback transaction', e); + } } } diff --git a/modules/module-mysql/src/replication/zongji/BinLogListener.ts b/modules/module-mysql/src/replication/zongji/BinLogListener.ts index bbd9c8b29..47432b2a9 100644 --- a/modules/module-mysql/src/replication/zongji/BinLogListener.ts +++ b/modules/module-mysql/src/replication/zongji/BinLogListener.ts @@ -2,7 +2,7 @@ import * as common from '../../common/common-index.js'; import async from 'async'; import { BinLogEvent, StartOptions, TableMapEntry, ZongJi } from '@powersync/mysql-zongji'; import * as zongji_utils from './zongji-utils.js'; -import { logger } from '@powersync/lib-services-framework'; +import { Logger, logger as defaultLogger } from '@powersync/lib-services-framework'; import { MySQLConnectionManager } from '../MySQLConnectionManager.js'; // Maximum time the processing queue can be paused before resuming automatically @@ -26,6 +26,7 @@ export interface BinLogListenerOptions { includedTables: string[]; serverId: number; startPosition: common.BinLogPosition; + logger?: Logger; } /** @@ -37,6 +38,7 @@ export class BinLogListener { private eventHandler: BinLogEventHandler; private binLogPosition: common.BinLogPosition; private currentGTID: common.ReplicatedGTID | null; + private logger: Logger; zongji: ZongJi; processingQueue: async.QueueObject; @@ -46,6 +48,7 @@ export class BinLogListener { queueMemoryUsage: number; constructor(public options: BinLogListenerOptions) { + this.logger = options.logger ?? defaultLogger; this.connectionManager = options.connectionManager; this.eventHandler = options.eventHandler; this.binLogPosition = options.startPosition; @@ -68,7 +71,7 @@ export class BinLogListener { if (this.isStopped) { return; } - logger.info(`Starting replication. Created replica client with serverId:${this.options.serverId}`); + this.logger.info(`Starting replication. Created replica client with serverId:${this.options.serverId}`); this.zongji.start({ // We ignore the unknown/heartbeat event since it currently serves no purpose other than to keep the connection alive @@ -83,33 +86,33 @@ export class BinLogListener { return new Promise((resolve, reject) => { // Handle an edge case where the listener has already been stopped before completing startup if (this.isStopped) { - logger.info('BinLog listener was stopped before startup completed.'); + this.logger.info('BinLog listener was stopped before startup completed.'); resolve(); } this.zongji.on('error', (error) => { if (!this.isStopped) { - logger.error('Binlog listener error:', error); + this.logger.error('Binlog listener error:', error); this.stop(); reject(error); } else { - logger.warn('Binlog listener error during shutdown:', error); + this.logger.warn('Binlog listener error during shutdown:', error); } }); this.processingQueue.error((error) => { if (!this.isStopped) { - logger.error('BinlogEvent processing error:', error); + this.logger.error('BinlogEvent processing error:', error); this.stop(); reject(error); } else { - logger.warn('BinlogEvent processing error during shutdown:', error); + this.logger.warn('BinlogEvent processing error during shutdown:', error); } }); this.zongji.on('stopped', () => { resolve(); - logger.info('BinLog listener stopped. Replication ended.'); + this.logger.info('BinLog listener stopped. Replication ended.'); }); }); } @@ -129,13 +132,13 @@ export class BinLogListener { const zongji = this.connectionManager.createBinlogListener(); zongji.on('binlog', async (evt) => { - logger.info(`Received Binlog event:${evt.getEventName()}`); + this.logger.info(`Received Binlog event:${evt.getEventName()}`); this.processingQueue.push(evt); this.queueMemoryUsage += evt.size; // When the processing queue grows past the threshold, we pause the binlog listener if (this.isQueueOverCapacity()) { - logger.info( + this.logger.info( `Binlog processing queue has reached its memory limit of [${this.connectionManager.options.binlog_queue_memory_limit}MB]. Pausing Binlog listener.` ); zongji.pause(); @@ -145,7 +148,7 @@ export class BinLogListener { await Promise.race([this.processingQueue.empty(), resumeTimeoutPromise]); - logger.info(`Binlog processing queue backlog cleared. Resuming Binlog listener.`); + this.logger.info(`Binlog processing queue backlog cleared. Resuming Binlog listener.`); zongji.resume(); } }); @@ -158,11 +161,11 @@ export class BinLogListener { this.zongji.connection.query( // In nanoseconds, 10^9 = 1s 'set @master_heartbeat_period=28*1000000000', - function (error: any, results: any, fields: any) { + (error: any, results: any, fields: any) => { if (error) { reject(error); } else { - logger.info('Successfully set up replication connection heartbeat...'); + this.logger.info('Successfully set up replication connection heartbeat...'); resolve(results); } } @@ -173,10 +176,10 @@ export class BinLogListener { // The timeout here must be greater than the master_heartbeat_period. const socket = this.zongji.connection._socket!; socket.setTimeout(60_000, () => { - logger.info('Destroying socket due to replication connection timeout.'); + this.logger.info('Destroying socket due to replication connection timeout.'); socket.destroy(new Error('Replication connection timeout.')); }); - logger.info( + this.logger.info( `BinLog listener setup complete. Reading binlog from: ${this.binLogPosition.filename}:${this.binLogPosition.offset}` ); }); diff --git a/modules/module-mysql/test/src/BinLogStream.test.ts b/modules/module-mysql/test/src/BinLogStream.test.ts index a0db934cf..1e7708eb9 100644 --- a/modules/module-mysql/test/src/BinLogStream.test.ts +++ b/modules/module-mysql/test/src/BinLogStream.test.ts @@ -1,11 +1,10 @@ import { storage } from '@powersync/service-core'; import { METRICS_HELPER, putOp, removeOp } from '@powersync/service-core-tests'; +import { ReplicationMetric } from '@powersync/service-types'; import { v4 as uuid } from 'uuid'; import { describe, expect, test } from 'vitest'; import { BinlogStreamTestContext } from './BinlogStreamUtils.js'; -import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; -import { ReplicationMetric } from '@powersync/service-types'; +import { describeWithStorage } from './util.js'; const BASIC_SYNC_RULES = ` bucket_definitions: @@ -14,12 +13,8 @@ bucket_definitions: - SELECT id, description FROM "test_data" `; -describe.skipIf(!env.TEST_MONGO_STORAGE)(' Binlog stream - mongodb', { timeout: 20_000 }, function () { - defineBinlogStreamTests(INITIALIZED_MONGO_STORAGE_FACTORY); -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)(' Binlog stream - postgres', { timeout: 20_000 }, function () { - defineBinlogStreamTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); +describe('BigLog stream', () => { + describeWithStorage({ timeout: 20_000 }, defineBinlogStreamTests); }); function defineBinlogStreamTests(factory: storage.TestStorageFactory) { diff --git a/modules/module-mysql/test/src/util.ts b/modules/module-mysql/test/src/util.ts index ffb797032..9126f7447 100644 --- a/modules/module-mysql/test/src/util.ts +++ b/modules/module-mysql/test/src/util.ts @@ -4,6 +4,8 @@ import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; import mysqlPromise from 'mysql2/promise'; import { env } from './env.js'; +import { describe, TestOptions } from 'vitest'; +import { TestStorageFactory } from '@powersync/service-core'; export const TEST_URI = env.MYSQL_TEST_URI; @@ -21,6 +23,16 @@ export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.PostgresTes url: env.PG_STORAGE_TEST_URL }); +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { + describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { + fn(INITIALIZED_MONGO_STORAGE_FACTORY); + }); + + describe.skipIf(!env.TEST_POSTGRES_STORAGE)(`postgres storage`, options, function () { + fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); + }); +} + export async function clearTestDb(connection: mysqlPromise.Connection) { const version = await getMySQLVersion(connection); if (isVersionAtLeast(version, '8.4.0')) { diff --git a/modules/module-postgres-storage/src/migrations/scripts/1684951997326-init.ts b/modules/module-postgres-storage/src/migrations/scripts/1684951997326-init.ts index 0cd26784e..61e8699ea 100644 --- a/modules/module-postgres-storage/src/migrations/scripts/1684951997326-init.ts +++ b/modules/module-postgres-storage/src/migrations/scripts/1684951997326-init.ts @@ -136,6 +136,5 @@ export const down: migrations.PowerSyncMigrationFunction = async (context) => { service_context: { configuration } } = context; await using client = openMigrationDB(configuration.storage); - await dropTables(client); }; diff --git a/modules/module-postgres-storage/src/migrations/scripts/1749024804042-snapshot-progress.ts b/modules/module-postgres-storage/src/migrations/scripts/1749024804042-snapshot-progress.ts new file mode 100644 index 000000000..1fb88df97 --- /dev/null +++ b/modules/module-postgres-storage/src/migrations/scripts/1749024804042-snapshot-progress.ts @@ -0,0 +1,43 @@ +import { migrations } from '@powersync/service-core'; + +import { openMigrationDB } from '../migration-utils.js'; + +export const up: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + await using client = openMigrationDB(configuration.storage); + + await client.transaction(async (db) => { + await db.sql` + ALTER TABLE sync_rules + ADD COLUMN snapshot_lsn TEXT + `.execute(); + await db.sql` + ALTER TABLE source_tables + ADD COLUMN snapshot_total_estimated_count INTEGER, + ADD COLUMN snapshot_replicated_count INTEGER, + ADD COLUMN snapshot_last_key BYTEA + `.execute(); + }); +}; + +export const down: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + await using client = openMigrationDB(configuration.storage); + + await client.transaction(async (db) => { + await db.sql` + ALTER TABLE sync_rules + DROP COLUMN snapshot_lsn + `.execute(); + await db.sql` + ALTER TABLE source_tables + DROP COLUMN snapshot_total_estimated_count, + DROP COLUMN snapshot_replicated_count, + DROP COLUMN snapshot_last_key + `.execute(); + }); +}; diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index e785549e8..4fa48eaff 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -233,6 +233,13 @@ export class PostgresSyncRulesStorage replicationColumns, sourceTableRow!.snapshot_done ?? true ); + if (!sourceTable.snapshotComplete) { + sourceTable.snapshotStatus = { + totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), + replicatedCount: Number(sourceTableRow!.snapshot_replicated_count ?? 0n), + lastKey: sourceTableRow!.snapshot_last_key + }; + } sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable); sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable); sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable); @@ -321,6 +328,7 @@ export class PostgresSyncRulesStorage const checkpoint_lsn = syncRules?.last_checkpoint_lsn ?? null; const batch = new PostgresBucketBatch({ + logger: options.logger ?? framework.logger, db: this.db, sync_rules: this.sync_rules.parsed(options).sync_rules, group_id: this.group_id, @@ -330,7 +338,8 @@ export class PostgresSyncRulesStorage no_checkpoint_before_lsn: syncRules?.no_checkpoint_before ?? options.zeroLSN, store_current_data: options.storeCurrentData, skip_existing_rows: options.skipExistingRows ?? false, - batch_limits: this.options.batchLimits + batch_limits: this.options.batchLimits, + markRecordUnavailable: options.markRecordUnavailable }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); @@ -580,6 +589,7 @@ export class PostgresSyncRulesStorage const syncRulesRow = await this.db.sql` SELECT snapshot_done, + snapshot_lsn, last_checkpoint_lsn, state FROM @@ -587,7 +597,7 @@ export class PostgresSyncRulesStorage WHERE id = ${{ type: 'int4', value: this.group_id }} ` - .decoded(pick(models.SyncRules, ['snapshot_done', 'last_checkpoint_lsn', 'state'])) + .decoded(pick(models.SyncRules, ['snapshot_done', 'last_checkpoint_lsn', 'state', 'snapshot_lsn'])) .first(); if (syncRulesRow == null) { @@ -597,7 +607,8 @@ export class PostgresSyncRulesStorage return { snapshot_done: syncRulesRow.snapshot_done, active: syncRulesRow.state == storage.SyncRuleState.ACTIVE, - checkpoint_lsn: syncRulesRow.last_checkpoint_lsn ?? null + checkpoint_lsn: syncRulesRow.last_checkpoint_lsn ?? null, + snapshot_lsn: syncRulesRow.snapshot_lsn ?? null }; } diff --git a/modules/module-postgres-storage/src/storage/PostgresTestStorageFactoryGenerator.ts b/modules/module-postgres-storage/src/storage/PostgresTestStorageFactoryGenerator.ts index 1739beeba..1e6015cd6 100644 --- a/modules/module-postgres-storage/src/storage/PostgresTestStorageFactoryGenerator.ts +++ b/modules/module-postgres-storage/src/storage/PostgresTestStorageFactoryGenerator.ts @@ -12,50 +12,62 @@ export type PostgresTestStorageOptions = { migrationAgent?: (config: PostgresStorageConfigDecoded) => PostgresMigrationAgent; }; -export const PostgresTestStorageFactoryGenerator = (factoryOptions: PostgresTestStorageOptions) => { - return async (options?: TestStorageOptions) => { - try { - const migrationManager: PowerSyncMigrationManager = new framework.MigrationManager(); - - const BASE_CONFIG = { - type: 'postgresql' as const, - uri: factoryOptions.url, - sslmode: 'disable' as const - }; - - const TEST_CONNECTION_OPTIONS = normalizePostgresStorageConfig(BASE_CONFIG); - - await using migrationAgent = factoryOptions.migrationAgent - ? factoryOptions.migrationAgent(BASE_CONFIG) - : new PostgresMigrationAgent(BASE_CONFIG); - migrationManager.registerMigrationAgent(migrationAgent); - - const mockServiceContext = { configuration: { storage: BASE_CONFIG } } as unknown as ServiceContext; - - if (!options?.doNotClear) { - await migrationManager.migrate({ - direction: framework.migrations.Direction.Down, - migrationContext: { - service_context: mockServiceContext - } - }); +export const postgresTestSetup = (factoryOptions: PostgresTestStorageOptions) => { + const BASE_CONFIG = { + type: 'postgresql' as const, + uri: factoryOptions.url, + sslmode: 'disable' as const + }; + + const TEST_CONNECTION_OPTIONS = normalizePostgresStorageConfig(BASE_CONFIG); + + const migrate = async (direction: framework.migrations.Direction) => { + await using migrationManager: PowerSyncMigrationManager = new framework.MigrationManager(); + await using migrationAgent = factoryOptions.migrationAgent + ? factoryOptions.migrationAgent(BASE_CONFIG) + : new PostgresMigrationAgent(BASE_CONFIG); + migrationManager.registerMigrationAgent(migrationAgent); + + const mockServiceContext = { configuration: { storage: BASE_CONFIG } } as unknown as ServiceContext; + + await migrationManager.migrate({ + direction: framework.migrations.Direction.Down, + migrationContext: { + service_context: mockServiceContext } + }); + if (direction == framework.migrations.Direction.Up) { await migrationManager.migrate({ direction: framework.migrations.Direction.Up, migrationContext: { service_context: mockServiceContext } }); - - return new PostgresBucketStorageFactory({ - config: TEST_CONNECTION_OPTIONS, - slot_name_prefix: 'test_' - }); - } catch (ex) { - // Vitest does not display these errors nicely when using the `await using` syntx - console.error(ex, ex.cause); - throw ex; } }; + + return { + factory: async (options?: TestStorageOptions) => { + try { + if (!options?.doNotClear) { + await migrate(framework.migrations.Direction.Up); + } + + return new PostgresBucketStorageFactory({ + config: TEST_CONNECTION_OPTIONS, + slot_name_prefix: 'test_' + }); + } catch (ex) { + // Vitest does not display these errors nicely when using the `await using` syntx + console.error(ex, ex.cause); + throw ex; + } + }, + migrate + }; +}; + +export const PostgresTestStorageFactoryGenerator = (factoryOptions: PostgresTestStorageOptions) => { + return postgresTestSetup(factoryOptions).factory; }; diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 735aa7d32..ab0c942c8 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -4,12 +4,12 @@ import { container, ErrorCode, errors, - logger, + Logger, ReplicationAssertionError, ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; -import { InternalOpId, storage, utils } from '@powersync/service-core'; +import { BucketStorageMarkRecordUnavailable, InternalOpId, storage, utils } from '@powersync/service-core'; import * as sync_rules from '@powersync/service-sync-rules'; import * as timers from 'timers/promises'; import * as t from 'ts-codec'; @@ -22,6 +22,7 @@ import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './Op import { PostgresPersistedBatch } from './PostgresPersistedBatch.js'; export interface PostgresBucketBatchOptions { + logger: Logger; db: lib_postgres.DatabaseClient; sync_rules: sync_rules.SqlSyncRules; group_id: number; @@ -35,6 +36,8 @@ export interface PostgresBucketBatchOptions { */ skip_existing_rows: boolean; batch_limits: RequiredOperationBatchLimits; + + markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; } /** @@ -54,6 +57,8 @@ export class PostgresBucketBatch extends BaseObserver implements storage.BucketStorageBatch { + private logger: Logger; + public last_flushed_op: InternalOpId | null = null; protected db: lib_postgres.DatabaseClient; @@ -67,15 +72,18 @@ export class PostgresBucketBatch protected readonly sync_rules: sync_rules.SqlSyncRules; protected batch: OperationBatch | null; private lastWaitingLogThrottled = 0; + private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; constructor(protected options: PostgresBucketBatchOptions) { super(); + this.logger = options.logger; this.db = options.db; this.group_id = options.group_id; this.last_checkpoint_lsn = options.last_checkpoint_lsn; this.no_checkpoint_before_lsn = options.no_checkpoint_before_lsn; this.write_checkpoint_batch = []; this.sync_rules = options.sync_rules; + this.markRecordUnavailable = options.markRecordUnavailable; this.batch = null; this.persisted_op = null; if (options.keep_alive_op) { @@ -115,7 +123,7 @@ export class PostgresBucketBatch return null; } - logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); + this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); this.batch ??= new OperationBatch(this.options.batch_limits); this.batch.push(new RecordOperation(record)); @@ -279,14 +287,14 @@ export class PostgresBucketBatch if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { // When re-applying transactions, don't create a new checkpoint until // we are past the last transaction. - logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); + this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); // Cannot create a checkpoint yet - return false return false; } if (lsn < this.no_checkpoint_before_lsn) { if (Date.now() - this.lastWaitingLogThrottled > 5_000) { - logger.info( + this.logger.info( `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}` ); this.lastWaitingLogThrottled = Date.now(); @@ -336,6 +344,7 @@ export class PostgresBucketBatch keepalive_op = ${{ type: 'int8', value: update.keepalive_op }}, last_fatal_error = ${{ type: 'varchar', value: update.last_fatal_error }}, snapshot_done = ${{ type: 'bool', value: update.snapshot_done }}, + snapshot_lsn = NULL, last_keepalive_ts = ${{ type: 1184, value: update.last_keepalive_ts }}, last_checkpoint = COALESCE( ${{ type: 'int8', value: update.last_checkpoint }}, @@ -374,7 +383,7 @@ export class PostgresBucketBatch if (this.persisted_op != null) { // The commit may have been skipped due to "no_checkpoint_before_lsn". // Apply it now if relevant - logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); + this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); return await this.commit(lsn); } @@ -382,6 +391,7 @@ export class PostgresBucketBatch UPDATE sync_rules SET snapshot_done = ${{ type: 'bool', value: true }}, + snapshot_lsn = NULL, last_checkpoint_lsn = ${{ type: 'varchar', value: lsn }}, last_fatal_error = ${{ type: 'varchar', value: null }}, last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }} @@ -402,6 +412,16 @@ export class PostgresBucketBatch return true; } + async setSnapshotLsn(lsn: string): Promise { + await this.db.sql` + UPDATE sync_rules + SET + snapshot_lsn = ${{ type: 'varchar', value: lsn }} + WHERE + id = ${{ type: 'int4', value: this.group_id }} + `.execute(); + } + async markSnapshotDone( tables: storage.SourceTable[], no_checkpoint_before_lsn: string @@ -412,7 +432,10 @@ export class PostgresBucketBatch await db.sql` UPDATE source_tables SET - snapshot_done = ${{ type: 'bool', value: true }} + snapshot_done = ${{ type: 'bool', value: true }}, + snapshot_total_estimated_count = NULL, + snapshot_replicated_count = NULL, + snapshot_last_key = NULL WHERE id IN ( SELECT @@ -451,6 +474,31 @@ export class PostgresBucketBatch }); } + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const copy = table.clone(); + const snapshotStatus = { + totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, + replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, + lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null + }; + copy.snapshotStatus = snapshotStatus; + + await this.db.sql` + UPDATE source_tables + SET + snapshot_total_estimated_count = ${{ type: 'int4', value: snapshotStatus.totalEstimatedCount }}, + snapshot_replicated_count = ${{ type: 'int4', value: snapshotStatus.replicatedCount }}, + snapshot_last_key = ${{ type: 'bytea', value: snapshotStatus.lastKey }} + WHERE + id = ${{ type: 'varchar', value: table.id }} + `.execute(); + + return copy; + } + addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void { this.write_checkpoint_batch.push({ ...checkpoint, @@ -660,10 +708,19 @@ export class PostgresBucketBatch existingBuckets = []; existingLookups = []; // Log to help with debugging if there was a consistency issue + if (this.options.store_current_data) { - logger.warn( - `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` - ); + if (this.markRecordUnavailable != null) { + // This will trigger a "resnapshot" of the record. + // This is not relevant if storeCurrentData is false, since we'll get the full row + // directly in the replication stream. + this.markRecordUnavailable(record); + } else { + // Log to help with debugging if there was a consistency issue + this.logger.warn( + `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` + ); + } } } else { existingBuckets = result.buckets; @@ -680,8 +737,8 @@ export class PostgresBucketBatch existingBuckets = []; existingLookups = []; // Log to help with debugging if there was a consistency issue - if (this.options.store_current_data) { - logger.warn( + if (this.options.store_current_data && this.markRecordUnavailable == null) { + this.logger.warn( `Cannot find previous record for delete on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}` ); } @@ -774,7 +831,7 @@ export class PostgresBucketBatch } } ); - logger.error( + this.logger.error( `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}` ); } @@ -814,7 +871,7 @@ export class PostgresBucketBatch } } ); - logger.error( + this.logger.error( `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${after.id}: ${error.error}` ); } diff --git a/modules/module-postgres-storage/src/types/models/SourceTable.ts b/modules/module-postgres-storage/src/types/models/SourceTable.ts index 9613fd3b3..257eac0ce 100644 --- a/modules/module-postgres-storage/src/types/models/SourceTable.ts +++ b/modules/module-postgres-storage/src/types/models/SourceTable.ts @@ -1,5 +1,5 @@ import * as t from 'ts-codec'; -import { bigint, jsonb, jsonb_raw, pgwire_number } from '../codecs.js'; +import { bigint, hexBuffer, jsonb, jsonb_raw, pgwire_number } from '../codecs.js'; export type StoredRelationId = { object_id: string | number | undefined; @@ -25,7 +25,10 @@ export const SourceTable = t.object({ schema_name: t.string, table_name: t.string, replica_id_columns: t.Null.or(jsonb(t.array(ColumnDescriptor))), - snapshot_done: t.boolean + snapshot_done: t.boolean, + snapshot_total_estimated_count: t.Null.or(bigint), + snapshot_replicated_count: t.Null.or(bigint), + snapshot_last_key: t.Null.or(hexBuffer) }); export type SourceTable = t.Encoded; diff --git a/modules/module-postgres-storage/src/types/models/SyncRules.ts b/modules/module-postgres-storage/src/types/models/SyncRules.ts index 8edc5eea4..8a6ca1a35 100644 --- a/modules/module-postgres-storage/src/types/models/SyncRules.ts +++ b/modules/module-postgres-storage/src/types/models/SyncRules.ts @@ -11,6 +11,10 @@ export const SyncRules = t.object({ * Can only be false if state == PROCESSING. */ snapshot_done: t.boolean, + /** + * May be set if snapshot_done = false, if the replication stream requires it. + */ + snapshot_lsn: t.Null.or(t.string), /** * The last consistent checkpoint. * diff --git a/modules/module-postgres-storage/test/src/migrations.test.ts b/modules/module-postgres-storage/test/src/migrations.test.ts index ee3acd955..1f2e12a64 100644 --- a/modules/module-postgres-storage/test/src/migrations.test.ts +++ b/modules/module-postgres-storage/test/src/migrations.test.ts @@ -1,15 +1,23 @@ -import { describe, expect, it } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { Direction } from '@powersync/lib-services-framework'; import { register } from '@powersync/service-core-tests'; import { PostgresMigrationAgent } from '../../src/migrations/PostgresMigrationAgent.js'; import { env } from './env.js'; -import { POSTGRES_STORAGE_FACTORY } from './util.js'; +import { POSTGRES_STORAGE_FACTORY, POSTGRES_STORAGE_SETUP } from './util.js'; const MIGRATION_AGENT_FACTORY = () => { return new PostgresMigrationAgent({ type: 'postgresql', uri: env.PG_STORAGE_TEST_URL, sslmode: 'disable' }); }; describe('Migrations', () => { + beforeEach(async () => { + // The migration tests clear the migration store, without running the down migrations. + // This ensures all the down migrations have been run before. + const setup = POSTGRES_STORAGE_SETUP; + await setup.migrate(Direction.Down); + }); + register.registerMigrationTests(MIGRATION_AGENT_FACTORY); it('Should have tables declared', async () => { diff --git a/modules/module-postgres-storage/test/src/util.ts b/modules/module-postgres-storage/test/src/util.ts index 3f0cd4428..c9e0e2555 100644 --- a/modules/module-postgres-storage/test/src/util.ts +++ b/modules/module-postgres-storage/test/src/util.ts @@ -2,7 +2,10 @@ import path from 'path'; import { fileURLToPath } from 'url'; import { normalizePostgresStorageConfig } from '../../src//types/types.js'; import { PostgresMigrationAgent } from '../../src/migrations/PostgresMigrationAgent.js'; -import { PostgresTestStorageFactoryGenerator } from '../../src/storage/PostgresTestStorageFactoryGenerator.js'; +import { + postgresTestSetup, + PostgresTestStorageFactoryGenerator +} from '../../src/storage/PostgresTestStorageFactoryGenerator.js'; import { env } from './env.js'; const __filename = fileURLToPath(import.meta.url); @@ -28,7 +31,9 @@ class TestPostgresMigrationAgent extends PostgresMigrationAgent { } } -export const POSTGRES_STORAGE_FACTORY = PostgresTestStorageFactoryGenerator({ +export const POSTGRES_STORAGE_SETUP = postgresTestSetup({ url: env.PG_STORAGE_TEST_URL, migrationAgent: (config) => new TestPostgresMigrationAgent(config) }); + +export const POSTGRES_STORAGE_FACTORY = POSTGRES_STORAGE_SETUP.factory; diff --git a/modules/module-postgres/package.json b/modules/module-postgres/package.json index 824e8bdad..00a870c6a 100644 --- a/modules/module-postgres/package.json +++ b/modules/module-postgres/package.json @@ -45,6 +45,7 @@ "devDependencies": { "@powersync/service-core-tests": "workspace:*", "@powersync/service-module-mongodb-storage": "workspace:*", + "@powersync/lib-service-postgres": "workspace:*", "@powersync/service-module-postgres-storage": "workspace:*", "@types/semver": "^7.5.4" } diff --git a/modules/module-postgres/src/replication/SnapshotQuery.ts b/modules/module-postgres/src/replication/SnapshotQuery.ts new file mode 100644 index 000000000..438637cb6 --- /dev/null +++ b/modules/module-postgres/src/replication/SnapshotQuery.ts @@ -0,0 +1,209 @@ +import { ColumnDescriptor, SourceTable, bson } from '@powersync/service-core'; +import { PgChunk, PgConnection, PgType, StatementParam, PgTypeOid } from '@powersync/service-jpgwire'; +import { escapeIdentifier } from '@powersync/lib-service-postgres'; +import { SqliteValue } from '@powersync/service-sync-rules'; +import { ServiceAssertionError } from '@powersync/lib-services-framework'; + +export interface SnapshotQuery { + initialize(): Promise; + /** + * Returns an async iterable iterator that yields chunks of data. + * + * If the last chunk has 0 rows, it indicates that there are no more rows to fetch. + */ + nextChunk(): AsyncIterableIterator; +} + +export type PrimaryKeyValue = Record; + +export interface MissingRow { + table: SourceTable; + key: PrimaryKeyValue; +} + +/** + * Snapshot query using a plain SELECT * FROM table; chunked using + * DELCLARE CURSOR / FETCH. + * + * This supports all tables, but does not efficiently resume the snapshot + * if the process is restarted. + */ +export class SimpleSnapshotQuery implements SnapshotQuery { + public constructor( + private readonly connection: PgConnection, + private readonly table: SourceTable, + private readonly chunkSize: number = 10_000 + ) {} + + public async initialize(): Promise { + await this.connection.query(`DECLARE snapshot_cursor CURSOR FOR SELECT * FROM ${this.table.escapedIdentifier}`); + } + + public nextChunk(): AsyncIterableIterator { + return this.connection.stream(`FETCH ${this.chunkSize} FROM snapshot_cursor`); + } +} + +/** + * Performs a table snapshot query, chunking by ranges of primary key data. + * + * This may miss some rows if they are modified during the snapshot query. + * In that case, logical replication will pick up those rows afterwards, + * possibly resulting in an IdSnapshotQuery. + * + * Currently, this only supports a table with a single primary key column, + * of a select few types. + */ +export class ChunkedSnapshotQuery implements SnapshotQuery { + /** + * Primary key types that we support for chunked snapshots. + * + * Can expand this over time as we add more tests, + * and ensure there are no issues with type conversion. + */ + static SUPPORTED_TYPES = [ + PgTypeOid.TEXT, + PgTypeOid.VARCHAR, + PgTypeOid.UUID, + PgTypeOid.INT2, + PgTypeOid.INT4, + PgTypeOid.INT8 + ]; + + static supports(table: SourceTable) { + if (table.replicaIdColumns.length != 1) { + return false; + } + const primaryKey = table.replicaIdColumns[0]; + + return primaryKey.typeId != null && ChunkedSnapshotQuery.SUPPORTED_TYPES.includes(Number(primaryKey.typeId)); + } + + private readonly key: ColumnDescriptor; + lastKey: string | bigint | null = null; + + public constructor( + private readonly connection: PgConnection, + private readonly table: SourceTable, + private readonly chunkSize: number = 10_000, + lastKeySerialized: Uint8Array | null + ) { + this.key = table.replicaIdColumns[0]; + + if (lastKeySerialized != null) { + this.lastKey = this.deserializeKey(lastKeySerialized); + } + } + + public async initialize(): Promise { + // No-op + } + + private deserializeKey(key: Uint8Array) { + const decoded = bson.deserialize(key, { useBigInt64: true }); + const keys = Object.keys(decoded); + if (keys.length != 1) { + throw new ServiceAssertionError(`Multiple keys found: ${keys.join(', ')}`); + } + if (keys[0] != this.key.name) { + throw new ServiceAssertionError(`Key name mismatch: expected ${this.key.name}, got ${keys[0]}`); + } + const value = decoded[this.key.name]; + return value; + } + + public getLastKeySerialized(): Uint8Array { + return bson.serialize({ [this.key.name]: this.lastKey }); + } + + public async *nextChunk(): AsyncIterableIterator { + let stream: AsyncIterableIterator; + const escapedKeyName = escapeIdentifier(this.key.name); + if (this.lastKey == null) { + stream = this.connection.stream( + `SELECT * FROM ${this.table.escapedIdentifier} ORDER BY ${escapedKeyName} LIMIT ${this.chunkSize}` + ); + } else { + if (this.key.typeId == null) { + throw new Error(`typeId required for primary key ${this.key.name}`); + } + let type: StatementParam['type'] = Number(this.key.typeId); + stream = this.connection.stream({ + statement: `SELECT * FROM ${this.table.escapedIdentifier} WHERE ${escapedKeyName} > $1 ORDER BY ${escapedKeyName} LIMIT ${this.chunkSize}`, + params: [{ value: this.lastKey, type }] + }); + } + let primaryKeyIndex: number = -1; + + for await (let chunk of stream) { + if (chunk.tag == 'RowDescription') { + // We get a RowDescription for each FETCH call, but they should + // all be the same. + let i = 0; + const pk = chunk.payload.findIndex((c) => c.name == this.key.name); + if (pk < 0) { + throw new Error( + `Cannot find primary key column ${this.key} in results. Keys: ${chunk.payload.map((c) => c.name).join(', ')}` + ); + } + primaryKeyIndex = pk; + } + + if (chunk.rows.length > 0) { + this.lastKey = chunk.rows[chunk.rows.length - 1][primaryKeyIndex]; + } + yield chunk; + } + } +} + +/** + * This performs a snapshot query using a list of primary keys. + * + * This is not used for general snapshots, but is used when we need to re-fetch specific rows + * during streaming replication. + */ +export class IdSnapshotQuery implements SnapshotQuery { + private didChunk = false; + + static supports(table: SourceTable) { + // We have the same requirements as ChunkedSnapshotQuery. + // This is typically only used as a fallback when ChunkedSnapshotQuery + // skipped some rows. + return ChunkedSnapshotQuery.supports(table); + } + + public constructor( + private readonly connection: PgConnection, + private readonly table: SourceTable, + private readonly keys: PrimaryKeyValue[] + ) {} + + public async initialize(): Promise { + // No-op + } + + public async *nextChunk(): AsyncIterableIterator { + // Only produce one chunk + if (this.didChunk) { + return; + } + this.didChunk = true; + + const keyDefinition = this.table.replicaIdColumns[0]; + const ids = this.keys.map((record) => record[keyDefinition.name]); + const type = PgType.getArrayType(keyDefinition.typeId!); + if (type == null) { + throw new Error(`Cannot determine primary key array type for ${JSON.stringify(keyDefinition)}`); + } + yield* this.connection.stream({ + statement: `SELECT * FROM ${this.table.escapedIdentifier} WHERE ${escapeIdentifier(keyDefinition.name)} = ANY($1)`, + params: [ + { + type: type, + value: ids + } + ] + }); + } +} diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 4176b9eb9..998c2c200 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -4,11 +4,21 @@ import { DatabaseConnectionError, ErrorCode, errors, - logger, - ReplicationAbortedError, - ReplicationAssertionError + Logger, + logger as defaultLogger, + ReplicationAssertionError, + ReplicationAbortedError } from '@powersync/lib-services-framework'; -import { getUuidReplicaIdentityBson, MetricsEngine, SourceEntityDescriptor, storage } from '@powersync/service-core'; +import { + BucketStorageBatch, + getUuidReplicaIdentityBson, + MetricsEngine, + RelationCache, + SaveUpdate, + SourceEntityDescriptor, + SourceTable, + storage +} from '@powersync/service-core'; import * as pgwire from '@powersync/service-jpgwire'; import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern, toSyncRulesRow } from '@powersync/service-sync-rules'; import * as pg_utils from '../utils/pgwire_utils.js'; @@ -17,12 +27,30 @@ import { PgManager } from './PgManager.js'; import { getPgOutputRelation, getRelId } from './PgRelation.js'; import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js'; import { ReplicationMetric } from '@powersync/service-types'; +import { + ChunkedSnapshotQuery, + IdSnapshotQuery, + MissingRow, + PrimaryKeyValue, + SimpleSnapshotQuery, + SnapshotQuery +} from './SnapshotQuery.js'; export interface WalStreamOptions { + logger?: Logger; connections: PgManager; storage: storage.SyncRulesBucketStorage; metrics: MetricsEngine; abort_signal: AbortSignal; + + /** + * Override snapshot chunk length (number of rows), for testing. + * + * Defaults to 10_000. + * + * Note that queries are streamed, so we don't actually keep that much data in memory. + */ + snapshotChunkLength?: number; } interface InitResult { @@ -73,6 +101,8 @@ export class WalStream { connection_id = 1; + private logger: Logger; + private readonly storage: storage.SyncRulesBucketStorage; private readonly metrics: MetricsEngine; private readonly slot_name: string; @@ -81,10 +111,17 @@ export class WalStream { private abort_signal: AbortSignal; - private relation_cache = new Map(); + private relationCache = new RelationCache((relation: number | SourceTable) => { + if (typeof relation == 'number') { + return relation; + } + return relation.objectId!; + }); private startedStreaming = false; + private snapshotChunkLength: number; + /** * Time of the oldest uncommitted change, according to the source db. * This is used to determine the replication lag. @@ -97,12 +134,14 @@ export class WalStream { private isStartingReplication = true; constructor(options: WalStreamOptions) { + this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; + this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; this.abort_signal = options.abort_signal; this.abort_signal.addEventListener( @@ -115,7 +154,7 @@ export class WalStream { const promise = sendKeepAlive(this.connections.pool); promise.catch((e) => { // Failures here are okay - this only speeds up stopping the process. - logger.warn('Failed to ping connection', e); + this.logger.warn('Failed to ping connection', e); }); } else { // If we haven't started streaming yet, it could be due to something like @@ -194,7 +233,7 @@ export class WalStream { ] }); if (rs.rows.length == 0) { - logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); + this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); continue; } @@ -226,7 +265,7 @@ export class WalStream { const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; if (snapshotDone) { // Snapshot is done, but we still need to check the replication slot status - logger.info(`${slotName} Initial replication already done`); + this.logger.info(`Initial replication already done`); } // Check if replication slot exists @@ -287,7 +326,7 @@ export class WalStream { // We peek a large number of changes here, to make it more likely to pick up replication slot errors. // For example, "publication does not exist" only occurs here if the peek actually includes changes related // to the slot. - logger.info(`Checking ${slotName}`); + this.logger.info(`Checking ${slotName}`); // The actual results can be quite large, so we don't actually return everything // due to memory and processing overhead that would create. @@ -304,11 +343,11 @@ export class WalStream { } // Success - logger.info(`Slot ${slotName} appears healthy`); + this.logger.info(`Slot ${slotName} appears healthy`); return { needsNewSlot: false }; } catch (e) { last_error = e; - logger.warn(`${slotName} Replication slot error`, e); + this.logger.warn(`Replication slot error`, e); if (this.stopped) { throw e; @@ -335,7 +374,7 @@ export class WalStream { // Sample: publication "powersync" does not exist // Happens when publication deleted or never created. // Slot must be re-created in this case. - logger.info(`${slotName} is not valid anymore`); + this.logger.info(`${slotName} is not valid anymore`); return { needsNewSlot: true }; } @@ -347,7 +386,7 @@ export class WalStream { throw new ReplicationAssertionError('Unreachable'); } - async estimatedCount(db: pgwire.PgConnection, table: storage.SourceTable): Promise { + async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { const results = await db.query({ statement: `SELECT reltuples::bigint AS estimate FROM pg_class @@ -356,9 +395,9 @@ WHERE oid = $1::regclass`, }); const row = results.rows[0]; if ((row?.[0] ?? -1n) == -1n) { - return '?'; + return -1; } else { - return `~${row[0]}`; + return Number(row[0]); } } @@ -392,7 +431,7 @@ WHERE oid = $1::regclass`, // The replication slot must be created before we start snapshotting tables. await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); - logger.info(`Created replication slot ${slotName}`); + this.logger.info(`Created replication slot ${slotName}`); } await this.initialReplication(db); @@ -401,24 +440,37 @@ WHERE oid = $1::regclass`, async initialReplication(db: pgwire.PgConnection) { const sourceTables = this.sync_rules.getSourceTables(); await this.storage.startBatch( - { zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: true }, + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, async (batch) => { + let tablesWithStatus: SourceTable[] = []; for (let tablePattern of sourceTables) { const tables = await this.getQualifiedTableNames(batch, db, tablePattern); + // Pre-get counts for (let table of tables) { if (table.snapshotComplete) { - logger.info(`${this.slot_name} Skipping ${table.qualifiedName} - snapshot already done`); + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); continue; } - await this.snapshotTable(batch, db, table); + const count = await this.estimatedCountNumber(db, table); + table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); + this.relationCache.update(table); + tablesWithStatus.push(table); - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const tableLsnNotBefore = rs.rows[0][0]; - await batch.markSnapshotDone([table], tableLsnNotBefore); - await touch(); + this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); } } + for (let table of tablesWithStatus) { + await this.snapshotTableInTx(batch, db, table); + await touch(); + } + // Always commit the initial snapshot at zero. // This makes sure we don't skip any changes applied before starting this snapshot, // in the case of snapshot retries. @@ -442,60 +494,164 @@ WHERE oid = $1::regclass`, yield toSyncRulesRow(row); } } + private async snapshotTableInTx( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ): Promise { + // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. + // The data may change during the transaction, but that is compensated for in the streaming + // replication afterwards. + await db.query('BEGIN'); + try { + let tableLsnNotBefore: string; + await this.snapshotTable(batch, db, table, limited); + + // Get the current LSN. + // The data will only be consistent once incremental replication has passed that point. + // We have to get this LSN _after_ we have finished the table snapshot. + // + // There are basically two relevant LSNs here: + // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, + // but it is implicitly recorded in the replication slot. + // B: The LSN after the table snapshot is complete, which is what we get here. + // When we do the snapshot queries, the data that we get back for each chunk could match the state + // anywhere between A and B. To actually have a consistent state on our side, we need to: + // 1. Complete the snapshot. + // 2. Wait until logical replication has caught up with all the change between A and B. + // Calling `markSnapshotDone(LSN B)` covers that. + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + tableLsnNotBefore = rs.rows[0][0]; + // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. + await db.query('COMMIT'); + const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore); + this.relationCache.update(resultTable); + return resultTable; + } catch (e) { + await db.query('ROLLBACK'); + throw e; + } + } + + private async snapshotTable( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ) { + let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; + let at = table.snapshotStatus?.replicatedCount ?? 0; + let lastCountTime = 0; + let q: SnapshotQuery; + // We do streaming on two levels: + // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. + // 2. Fine level: Stream chunks from each fetch call. + if (limited) { + q = new IdSnapshotQuery(db, table, limited); + } else if (ChunkedSnapshotQuery.supports(table)) { + // Single primary key - we can use the primary key for chunking + const orderByKey = table.replicaIdColumns[0]; + q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); + if (table.snapshotStatus?.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); + } + } else { + // Fallback case - query the entire table + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); + q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); + at = 0; + } + await q.initialize(); - private async snapshotTable(batch: storage.BucketStorageBatch, db: pgwire.PgConnection, table: storage.SourceTable) { - logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`); - const estimatedCount = await this.estimatedCount(db, table); - let at = 0; - let lastLogIndex = 0; - const cursor = db.stream({ statement: `SELECT * FROM ${table.escapedIdentifier}` }); let columns: { i: number; name: string }[] = []; - // pgwire streams rows in chunks. - // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. - - for await (let chunk of cursor) { - if (chunk.tag == 'RowDescription') { - let i = 0; - columns = chunk.payload.map((c) => { - return { i: i++, name: c.name }; + let hasRemainingData = true; + while (hasRemainingData) { + // Fetch 10k at a time. + // The balance here is between latency overhead per FETCH call, + // and not spending too much time on each FETCH call. + // We aim for a couple of seconds on each FETCH call. + const cursor = q.nextChunk(); + hasRemainingData = false; + // pgwire streams rows in chunks. + // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. + // There are typically 100-200 rows per chunk. + for await (let chunk of cursor) { + if (chunk.tag == 'RowDescription') { + // We get a RowDescription for each FETCH call, but they should + // all be the same. + let i = 0; + columns = chunk.payload.map((c) => { + return { i: i++, name: c.name }; + }); + continue; + } + + const rows = chunk.rows.map((row) => { + let q: DatabaseInputRow = {}; + for (let c of columns) { + q[c.name] = row[c.i]; + } + return q; }); - continue; - } + if (rows.length > 0) { + hasRemainingData = true; + } - const rows = chunk.rows.map((row) => { - let q: DatabaseInputRow = {}; - for (let c of columns) { - q[c.name] = row[c.i]; + for (const record of WalStream.getQueryData(rows)) { + // This auto-flushes when the batch reaches its size limit + await batch.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) + }); } - return q; - }); - if (rows.length > 0 && at - lastLogIndex >= 5000) { - logger.info(`${this.slot_name} Replicating ${table.qualifiedName} ${at}/${estimatedCount}`); - lastLogIndex = at; - } - if (this.abort_signal.aborted) { - throw new ReplicationAbortedError(`Aborted initial replication of ${this.slot_name}`); + + at += rows.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); + + await touch(); } - for (const record of WalStream.getQueryData(rows)) { - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) + // Important: flush before marking progress + await batch.flush(); + if (limited == null) { + let lastKey: Uint8Array | undefined; + if (q instanceof ChunkedSnapshotQuery) { + lastKey = q.getLastKeySerialized(); + } + if (lastCountTime < performance.now() - 10 * 60 * 1000) { + // Even though we're doing the snapshot inside a transaction, the transaction uses + // the default "Read Committed" isolation level. This means we can get new data + // within the transaction, so we re-estimate the count every 10 minutes when replicating + // large tables. + totalEstimatedCount = await this.estimatedCountNumber(db, table); + lastCountTime = performance.now(); + } + table = await batch.updateTableProgress(table, { + lastKey: lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount }); - } + this.relationCache.update(table); - at += rows.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); + } - await touch(); + if (this.abort_signal.aborted) { + // We only abort after flushing + throw new ReplicationAbortedError(`Initial replication interrupted`); + } } - - await batch.flush(); } async handleRelation(batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, snapshot: boolean) { @@ -509,7 +665,7 @@ WHERE oid = $1::regclass`, entity_descriptor: descriptor, sync_rules: this.sync_rules }); - this.relation_cache.set(descriptor.objectId, result.table); + this.relationCache.update(result.table); // Drop conflicting tables. This includes for example renamed tables. await batch.drop(result.dropTables); @@ -524,40 +680,59 @@ WHERE oid = $1::regclass`, // Truncate this table, in case a previous snapshot was interrupted. await batch.truncate([result.table]); - let lsn: string = ZERO_LSN; // Start the snapshot inside a transaction. // We use a dedicated connection for this. const db = await this.connections.snapshotConnection(); try { - await db.query('BEGIN'); - try { - await this.snapshotTable(batch, db, result.table); - - // Get the current LSN. - // The data will only be consistent once incremental replication - // has passed that point. - // We have to get this LSN _after_ we have started the snapshot query. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - lsn = rs.rows[0][0]; - - await db.query('COMMIT'); - } catch (e) { - await db.query('ROLLBACK'); - // TODO: Wrap with custom error type - throw e; - } + const table = await this.snapshotTableInTx(batch, db, result.table); + // After the table snapshot, we wait for replication to catch up. + // To make sure there is actually something to replicate, we send a keepalive + // message. + await sendKeepAlive(db); + return table; } finally { await db.end(); } - const [table] = await batch.markSnapshotDone([result.table], lsn); - return table; } return result.table; } + /** + * Process rows that have missing TOAST values. + * + * This can happen during edge cases in the chunked intial snapshot process. + * + * We handle this similar to an inline table snapshot, but limited to the specific + * set of rows. + */ + private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) { + const byTable = new Map(); + for (let row of rows) { + const relId = row.table.objectId as number; // always a number for postgres + if (!byTable.has(relId)) { + byTable.set(relId, []); + } + byTable.get(relId)!.push(row); + } + const db = await this.connections.snapshotConnection(); + try { + for (let rows of byTable.values()) { + const table = rows[0].table; + await this.snapshotTableInTx( + batch, + db, + table, + rows.map((r) => r.key) + ); + } + } finally { + await db.end(); + } + } + private getTable(relationId: number): storage.SourceTable { - const table = this.relation_cache.get(relationId); + const table = this.relationCache.get(relationId); if (table == null) { // We should always receive a replication message before the relation is used. // If we can't find it, it's a bug. @@ -576,7 +751,7 @@ WHERE oid = $1::regclass`, if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') { const table = this.getTable(getRelId(msg.relation)); if (!table.syncAny) { - logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`); + this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`); return null; } @@ -684,8 +859,39 @@ WHERE oid = $1::regclass`, // Auto-activate as soon as initial replication is done await this.storage.autoActivate(); + let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = []; + + const markRecordUnavailable = (record: SaveUpdate) => { + if (!IdSnapshotQuery.supports(record.sourceTable)) { + // If it's not supported, it's also safe to ignore + return; + } + let key: PrimaryKeyValue = {}; + for (let column of record.sourceTable.replicaIdColumns) { + const name = column.name; + const value = record.after[name]; + if (value == null) { + // We don't expect this to actually happen. + // The key should always be present in the "after" record. + return; + } + key[name] = value; + } + resnapshot.push({ + table: record.sourceTable, + key: key + }); + }; + await this.storage.startBatch( - { zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: false }, + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: false, + markRecordUnavailable + }, async (batch) => { // We don't handle any plain keepalive messages while we have transactions. // While we have transactions, we use that to advance the position. @@ -729,6 +935,16 @@ WHERE oid = $1::regclass`, // This effectively lets us batch multiple transactions within the same chunk // into a single flush, increasing throughput for many small transactions. skipKeepalive = false; + // flush() must be before the resnapshot check - that is + // typically what reports the resnapshot records. + await batch.flush(); + // This _must_ be checked after the flush(), and before + // commit() or ack(). We never persist the resnapshot list, + // so we have to process it before marking our progress. + if (resnapshot.length > 0) { + await this.resnapshot(batch, resnapshot); + resnapshot = []; + } const didCommit = await batch.commit(msg.lsn!, { createEmptyCheckpoints, oldestUncommittedChange: this.oldestUncommittedChange @@ -741,7 +957,7 @@ WHERE oid = $1::regclass`, } } else { if (count % 100 == 0) { - logger.info(`${this.slot_name} replicating op ${count} ${msg.lsn}`); + this.logger.info(`Replicating op ${count} ${msg.lsn}`); } /** @@ -755,7 +971,14 @@ WHERE oid = $1::regclass`, } count += 1; - await this.writeChange(batch, msg); + const flushResult = await this.writeChange(batch, msg); + if (flushResult != null && resnapshot.length > 0) { + // If we have large transactions, we also need to flush the resnapshot list + // periodically. + // TODO: make sure this bit is actually triggered + await this.resnapshot(batch, resnapshot); + resnapshot = []; + } } } diff --git a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts index 899cb6a71..97d163fb7 100644 --- a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts +++ b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts @@ -1,4 +1,4 @@ -import { container } from '@powersync/lib-services-framework'; +import { container, logger } from '@powersync/lib-services-framework'; import { PgManager } from './PgManager.js'; import { MissingReplicationSlotError, sendKeepAlive, WalStream } from './WalStream.js'; @@ -16,6 +16,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob constructor(options: WalStreamReplicationJobOptions) { super(options); + this.logger = logger.child({ prefix: `[${this.slotName}] ` }); this.connectionFactory = options.connectionFactory; this.connectionManager = this.connectionFactory.create({ // Pool connections are only used intermittently. @@ -58,7 +59,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob replication_slot: this.slotName } }); - this.logger.error(`Replication failed on ${this.slotName}`, e); + this.logger.error(`Replication failed`, e); if (e instanceof MissingReplicationSlotError) { // This stops replication on this slot and restarts with a new slot @@ -94,6 +95,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob return; } const stream = new WalStream({ + logger: this.logger, abort_signal: this.abortController.signal, storage: this.options.storage, metrics: this.options.metrics, @@ -102,7 +104,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob this.lastStream = stream; await stream.replicate(); } catch (e) { - this.logger.error(`${this.slotName} Replication error`, e); + this.logger.error(`Replication error`, e); if (e.cause != null) { // Example: // PgError.conn_ended: Unable to do postgres query on ended connection diff --git a/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap b/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap index 2ba06c6fe..4133150e4 100644 --- a/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap +++ b/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap @@ -1,5 +1,5 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`schema changes - mongodb > add to publication (not in sync rules) 1`] = `0`; +exports[`schema changes > mongodb storage > add to publication (not in sync rules) 1`] = `0`; -exports[`schema changes - postgres > add to publication (not in sync rules) 1`] = `16384`; +exports[`schema changes > postgres storage > add to publication (not in sync rules) 1`] = `16384`; diff --git a/modules/module-postgres/test/src/checkpoints.test.ts b/modules/module-postgres/test/src/checkpoints.test.ts index 7ad247ea1..a0446ca1e 100644 --- a/modules/module-postgres/test/src/checkpoints.test.ts +++ b/modules/module-postgres/test/src/checkpoints.test.ts @@ -3,7 +3,6 @@ import { checkpointUserId, createWriteCheckpoint } from '@powersync/service-core import { describe, test } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; -import { env } from './env.js'; import timers from 'node:timers/promises'; diff --git a/modules/module-postgres/test/src/chunked_snapshots.test.ts b/modules/module-postgres/test/src/chunked_snapshots.test.ts new file mode 100644 index 000000000..dffb8aa1e --- /dev/null +++ b/modules/module-postgres/test/src/chunked_snapshots.test.ts @@ -0,0 +1,156 @@ +import { reduceBucket, TestStorageFactory } from '@powersync/service-core'; +import { METRICS_HELPER } from '@powersync/service-core-tests'; +import { SqliteJsonValue } from '@powersync/service-sync-rules'; +import * as crypto from 'node:crypto'; +import * as timers from 'timers/promises'; +import { describe, expect, test } from 'vitest'; +import { describeWithStorage } from './util.js'; +import { WalStreamTestContext } from './wal_stream_utils.js'; + +describe('chunked snapshots', () => { + describeWithStorage({ timeout: 120_000 }, defineBatchTests); +}); + +function defineBatchTests(factory: TestStorageFactory) { + // We need to test every supported type, since chunking could be quite sensitive to + // how each specific type is handled. + test('chunked snapshot edge case (int2)', async () => { + await testChunkedSnapshot({ + idType: 'int2', + genId: 'i', + lastId: '2000', + moveTo: '0', + moveToJs: 0 + }); + }); + + test('chunked snapshot edge case (int4)', async () => { + await testChunkedSnapshot({ + idType: 'int4', + genId: 'i', + lastId: '2000', + moveTo: '0', + moveToJs: 0 + }); + }); + + test('chunked snapshot edge case (int8)', async () => { + await testChunkedSnapshot({ + idType: 'int8', + genId: 'i', + lastId: '2000', + moveTo: '0', + moveToJs: 0 + }); + }); + + test('chunked snapshot edge case (text)', async () => { + await testChunkedSnapshot({ + idType: 'text', + genId: `to_char(i, 'fm0000')`, + lastId: `'2000'`, + moveTo: `'0000'`, + moveToJs: '0000' + }); + }); + + test('chunked snapshot edge case (varchar)', async () => { + await testChunkedSnapshot({ + idType: 'varchar', + genId: `to_char(i, 'fm0000')`, + lastId: `'2000'`, + moveTo: `'0000'`, + moveToJs: '0000' + }); + }); + + test('chunked snapshot edge case (uuid)', async () => { + await testChunkedSnapshot({ + idType: 'uuid', + // Generate a uuid by using the first part of a uuid and appending a 4-digit number. + genId: `('00000000-0000-4000-8000-00000000' || to_char(i, 'fm0000')) :: uuid`, + lastId: `'00000000-0000-4000-8000-000000002000'`, + moveTo: `'00000000-0000-4000-8000-000000000000'`, + moveToJs: '00000000-0000-4000-8000-000000000000' + }); + }); + + async function testChunkedSnapshot(options: { + idType: string; + genId: string; + lastId: string; + moveTo: string; + moveToJs: SqliteJsonValue; + }) { + // 1. Start with 2k rows, one row with id = 2000, and a large TOAST value in another column. + // 2. Replicate one batch of rows (id < 2000). + // 3. `UPDATE table SET id = 0 WHERE id = 2000` + // 4. Replicate the rest of the table. + // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column. + // 6. We end up with a row that has a missing TOAST column. + + await using context = await WalStreamTestContext.open(factory, { + // We need to use a smaller chunk size here, so that we can run a query in between chunks + walStreamOptions: { snapshotChunkLength: 100 } + }); + + await context.updateSyncRules(`bucket_definitions: + global: + data: + - SELECT * FROM test_data`); + const { pool } = context; + + await pool.query(`CREATE TABLE test_data(id ${options.idType} primary key, description text)`); + + // 1. Start with 2k rows, one row with id = 2000... + await pool.query({ + statement: `INSERT INTO test_data(id, description) SELECT ${options.genId}, 'foo' FROM generate_series(1, 2000) i` + }); + + // ...and a large TOAST value in another column. + // Toast value, must be > 8kb after compression + const largeDescription = crypto.randomBytes(20_000).toString('hex'); + await pool.query({ + statement: `UPDATE test_data SET description = $1 WHERE id = ${options.lastId} :: ${options.idType}`, + params: [{ type: 'varchar', value: largeDescription }] + }); + + // 2. Replicate one batch of rows (id < 100). + // Our "stopping point" here is not quite deterministic. + const p = context.replicateSnapshot(); + + const stopAfter = 100; + const startRowCount = (await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0; + + while (true) { + const count = + ((await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0) - startRowCount; + + if (count >= stopAfter) { + break; + } + await timers.setTimeout(1); + } + + // 3. `UPDATE table SET id = 0 WHERE id = 2000` + const rs = await pool.query( + `UPDATE test_data SET id = ${options.moveTo} WHERE id = ${options.lastId} RETURNING id` + ); + expect(rs.rows.length).toEqual(1); + + // 4. Replicate the rest of the table. + await p; + + // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column. + context.startStreaming(); + + // 6. If all went well, the "resnapshot" process would take care of this. + const data = await context.getBucketData('global[]', undefined, {}); + const reduced = reduceBucket(data); + + const movedRow = reduced.find((row) => row.object_id == String(options.moveToJs)); + expect(movedRow?.data).toEqual(JSON.stringify({ id: options.moveToJs, description: largeDescription })); + + expect(reduced.length).toEqual(2001); + } +} diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 790394899..e084b65a7 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -1,37 +1,14 @@ import { storage } from '@powersync/service-core'; -import * as timers from 'timers/promises'; import { describe, expect, test } from 'vitest'; import { populateData } from '../../dist/utils/populate_test_data.js'; import { env } from './env.js'; -import { - INITIALIZED_MONGO_STORAGE_FACTORY, - INITIALIZED_POSTGRES_STORAGE_FACTORY, - TEST_CONNECTION_OPTIONS -} from './util.js'; +import { describeWithStorage, TEST_CONNECTION_OPTIONS } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; -import { METRICS_HELPER } from '@powersync/service-core-tests'; -import { ReplicationMetric } from '@powersync/service-types'; - -describe.skipIf(!env.TEST_MONGO_STORAGE)('batch replication tests - mongodb', { timeout: 120_000 }, function () { - // These are slow but consistent tests. - // Not run on every test run, but we do run on CI, or when manually debugging issues. - if (env.CI || env.SLOW_TESTS) { - defineBatchTests(INITIALIZED_MONGO_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } -}); -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('batch replication tests - postgres', { timeout: 240_000 }, function () { - // These are slow but consistent tests. - // Not run on every test run, but we do run on CI, or when manually debugging issues. - if (env.CI || env.SLOW_TESTS) { - defineBatchTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } +describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { + describeWithStorage({ timeout: 240_000 }, function (factory) { + defineBatchTests(factory); + }); }); const BASIC_SYNC_RULES = `bucket_definitions: @@ -257,132 +234,6 @@ function defineBatchTests(factory: storage.TestStorageFactory) { expect(checksum.get('global[]')!.count).toEqual((numDocs + 2) * 4); }); - test('resuming initial replication (1)', async () => { - // Stop early - likely to not include deleted row in first replication attempt. - await testResumingReplication(2000); - }); - test('resuming initial replication (2)', async () => { - // Stop late - likely to include deleted row in first replication attempt. - await testResumingReplication(8000); - }); - - async function testResumingReplication(stopAfter: number) { - // This tests interrupting and then resuming initial replication. - // We interrupt replication after test_data1 has fully replicated, and - // test_data2 has partially replicated. - // This test relies on interval behavior that is not 100% deterministic: - // 1. We attempt to abort initial replication once a certain number of - // rows have been replicated, but this is not exact. Our only requirement - // is that we have not fully replicated test_data2 yet. - // 2. Order of replication is not deterministic, so which specific rows - // have been / have not been replicated at that point is not deterministic. - // We do allow for some variation in the test results to account for this. - - await using context = await WalStreamTestContext.open(factory); - - await context.updateSyncRules(`bucket_definitions: - global: - data: - - SELECT * FROM test_data1 - - SELECT * FROM test_data2`); - const { pool } = context; - - await pool.query(`CREATE TABLE test_data1(id serial primary key, description text)`); - await pool.query(`CREATE TABLE test_data2(id serial primary key, description text)`); - - await pool.query( - { - statement: `INSERT INTO test_data1(description) SELECT 'foo' FROM generate_series(1, 1000) i` - }, - { - statement: `INSERT INTO test_data2( description) SELECT 'foo' FROM generate_series(1, 10000) i` - } - ); - - const p = context.replicateSnapshot(); - - let done = false; - - const startRowCount = (await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0; - try { - (async () => { - while (!done) { - const count = - ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; - - if (count >= stopAfter) { - break; - } - await timers.setTimeout(1); - } - // This interrupts initial replication - await context.dispose(); - })(); - // This confirms that initial replication was interrupted - await expect(p).rejects.toThrowError(); - done = true; - } finally { - done = true; - } - - // Bypass the usual "clear db on factory open" step. - await using context2 = await WalStreamTestContext.open(factory, { doNotClear: true }); - - // This delete should be using one of the ids already replicated - const { - rows: [[id1]] - } = await context2.pool.query(`DELETE FROM test_data2 WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id`); - // This update should also be using one of the ids already replicated - const { - rows: [[id2]] - } = await context2.pool.query( - `UPDATE test_data2 SET description = 'update1' WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id` - ); - const { - rows: [[id3]] - } = await context2.pool.query(`INSERT INTO test_data2(description) SELECT 'insert1' RETURNING id`); - - await context2.loadNextSyncRules(); - await context2.replicateSnapshot(); - - context2.startStreaming(); - const data = await context2.getBucketData('global[]', undefined, {}); - - const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id1)); - const updatedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id2)); - const insertedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id3)); - - if (deletedRowOps.length != 0) { - // The deleted row was part of the first replication batch, - // so it is removed by streaming replication. - expect(deletedRowOps.length).toEqual(2); - expect(deletedRowOps[1].op).toEqual('REMOVE'); - } else { - // The deleted row was not part of the first replication batch, - // so it's not in the resulting ops at all. - } - - expect(updatedRowOps.length).toEqual(2); - // description for the first op could be 'foo' or 'update1'. - // We only test the final version. - expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); - - expect(insertedRowOps.length).toEqual(2); - expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); - expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); - - // 1000 of test_data1 during first replication attempt. - // N >= 1000 of test_data2 during first replication attempt. - // 10000 - N - 1 + 1 of test_data2 during second replication attempt. - // An additional update during streaming replication (2x total for this row). - // An additional insert during streaming replication (2x total for this row). - // If the deleted row was part of the first replication batch, it's removed by streaming replication. - // This adds 2 ops. - // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. - // However, this is not deterministic. - expect(data.length).toEqual(11002 + deletedRowOps.length); - } - function printMemoryUsage() { const memoryUsage = process.memoryUsage(); diff --git a/modules/module-postgres/test/src/resuming_snapshots.test.ts b/modules/module-postgres/test/src/resuming_snapshots.test.ts new file mode 100644 index 000000000..01da33739 --- /dev/null +++ b/modules/module-postgres/test/src/resuming_snapshots.test.ts @@ -0,0 +1,150 @@ +import { describe, expect, test } from 'vitest'; +import { env } from './env.js'; +import { describeWithStorage } from './util.js'; +import { WalStreamTestContext } from './wal_stream_utils.js'; +import { TestStorageFactory } from '@powersync/service-core'; +import { METRICS_HELPER } from '@powersync/service-core-tests'; +import { ReplicationMetric } from '@powersync/service-types'; +import * as timers from 'node:timers/promises'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; + +describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { + describeWithStorage({ timeout: 240_000 }, function (factory) { + test('resuming initial replication (1)', async () => { + // Stop early - likely to not include deleted row in first replication attempt. + await testResumingReplication(factory, 2000); + }); + test('resuming initial replication (2)', async () => { + // Stop late - likely to include deleted row in first replication attempt. + await testResumingReplication(factory, 8000); + }); + }); +}); + +async function testResumingReplication(factory: TestStorageFactory, stopAfter: number) { + // This tests interrupting and then resuming initial replication. + // We interrupt replication after test_data1 has fully replicated, and + // test_data2 has partially replicated. + // This test relies on interval behavior that is not 100% deterministic: + // 1. We attempt to abort initial replication once a certain number of + // rows have been replicated, but this is not exact. Our only requirement + // is that we have not fully replicated test_data2 yet. + // 2. Order of replication is not deterministic, so which specific rows + // have been / have not been replicated at that point is not deterministic. + // We do allow for some variation in the test results to account for this. + + await using context = await WalStreamTestContext.open(factory, { walStreamOptions: { snapshotChunkLength: 1000 } }); + + await context.updateSyncRules(`bucket_definitions: + global: + data: + - SELECT * FROM test_data1 + - SELECT * FROM test_data2`); + const { pool } = context; + + await pool.query(`CREATE TABLE test_data1(id serial primary key, description text)`); + await pool.query(`CREATE TABLE test_data2(id serial primary key, description text)`); + + await pool.query( + { + statement: `INSERT INTO test_data1(description) SELECT 'foo' FROM generate_series(1, 1000) i` + }, + { + statement: `INSERT INTO test_data2( description) SELECT 'foo' FROM generate_series(1, 10000) i` + } + ); + + const p = context.replicateSnapshot(); + + let done = false; + + const startRowCount = (await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0; + try { + (async () => { + while (!done) { + const count = + ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; + + if (count >= stopAfter) { + break; + } + await timers.setTimeout(1); + } + // This interrupts initial replication + await context.dispose(); + })(); + // This confirms that initial replication was interrupted + const error = await p.catch((e) => e); + expect(error).toBeInstanceOf(ReplicationAbortedError); + done = true; + } finally { + done = true; + } + + // Bypass the usual "clear db on factory open" step. + await using context2 = await WalStreamTestContext.open(factory, { + doNotClear: true, + walStreamOptions: { snapshotChunkLength: 1000 } + }); + + // This delete should be using one of the ids already replicated + const { + rows: [[id1]] + } = await context2.pool.query(`DELETE FROM test_data2 WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id`); + // This update should also be using one of the ids already replicated + const { + rows: [[id2]] + } = await context2.pool.query( + `UPDATE test_data2 SET description = 'update1' WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id` + ); + const { + rows: [[id3]] + } = await context2.pool.query(`INSERT INTO test_data2(description) SELECT 'insert1' RETURNING id`); + + await context2.loadNextSyncRules(); + await context2.replicateSnapshot(); + + context2.startStreaming(); + const data = await context2.getBucketData('global[]', undefined, {}); + + const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id1)); + const updatedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id2)); + const insertedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id3)); + + if (deletedRowOps.length != 0) { + // The deleted row was part of the first replication batch, + // so it is removed by streaming replication. + expect(deletedRowOps.length).toEqual(2); + expect(deletedRowOps[1].op).toEqual('REMOVE'); + } else { + // The deleted row was not part of the first replication batch, + // so it's not in the resulting ops at all. + } + + expect(updatedRowOps.length).toEqual(2); + // description for the first op could be 'foo' or 'update1'. + // We only test the final version. + expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); + + expect(insertedRowOps.length).toEqual(2); + expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); + expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + + // 1000 of test_data1 during first replication attempt. + // N >= 1000 of test_data2 during first replication attempt. + // 10000 - N - 1 + 1 of test_data2 during second replication attempt. + // An additional update during streaming replication (2x total for this row). + // An additional insert during streaming replication (2x total for this row). + // If the deleted row was part of the first replication batch, it's removed by streaming replication. + // This adds 2 ops. + // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. + // However, this is not deterministic. + const expectedCount = 11002 + deletedRowOps.length; + expect(data.length).toEqual(expectedCount); + + const replicatedCount = + ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; + + // With resumable replication, there should be no need to re-replicate anything. + expect(replicatedCount).toEqual(expectedCount); +} diff --git a/modules/module-postgres/test/src/schema_changes.test.ts b/modules/module-postgres/test/src/schema_changes.test.ts index 3fffb3f6d..26a4fa3c8 100644 --- a/modules/module-postgres/test/src/schema_changes.test.ts +++ b/modules/module-postgres/test/src/schema_changes.test.ts @@ -3,16 +3,11 @@ import * as timers from 'timers/promises'; import { describe, expect, test } from 'vitest'; import { storage } from '@powersync/service-core'; -import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; +import { describeWithStorage } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; -describe.skipIf(!env.TEST_MONGO_STORAGE)('schema changes - mongodb', { timeout: 20_000 }, function () { - defineTests(INITIALIZED_MONGO_STORAGE_FACTORY); -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('schema changes - postgres', { timeout: 20_000 }, function () { - defineTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); +describe('schema changes', { timeout: 20_000 }, function () { + describeWithStorage({}, defineTests); }); const BASIC_SYNC_RULES = ` @@ -459,7 +454,7 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); // Just as an FYI - cannot update or delete here - expect(pool.query(`UPDATE test_data SET description = 'test2b' WHERE id = 't2'`)).rejects.toThrow( + await expect(pool.query(`UPDATE test_data SET description = 'test2b' WHERE id = 't2'`)).rejects.toThrow( 'does not have a replica identity and publishes updates' ); @@ -497,7 +492,7 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); // Just as an FYI - cannot update or delete here - expect(pool.query(`UPDATE test_data SET description = 'test2b' WHERE id = 't2'`)).rejects.toThrow( + await expect(pool.query(`UPDATE test_data SET description = 'test2b' WHERE id = 't2'`)).rejects.toThrow( 'does not have a replica identity and publishes updates' ); diff --git a/modules/module-postgres/test/src/slow_tests.test.ts b/modules/module-postgres/test/src/slow_tests.test.ts index b79009c8d..551eea9fe 100644 --- a/modules/module-postgres/test/src/slow_tests.test.ts +++ b/modules/module-postgres/test/src/slow_tests.test.ts @@ -5,9 +5,8 @@ import { env } from './env.js'; import { clearTestDb, connectPgPool, + describeWithStorage, getClientCheckpoint, - INITIALIZED_MONGO_STORAGE_FACTORY, - INITIALIZED_POSTGRES_STORAGE_FACTORY, TEST_CONNECTION_OPTIONS } from './util.js'; @@ -21,26 +20,10 @@ import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; import * as timers from 'node:timers/promises'; -describe.skipIf(!env.TEST_MONGO_STORAGE)('slow tests - mongodb', function () { - // These are slow, inconsistent tests. - // Not run on every test run, but we do run on CI, or when manually debugging issues. - if (env.CI || env.SLOW_TESTS) { - defineSlowTests(INITIALIZED_MONGO_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('slow tests - postgres', function () { - // These are slow, inconsistent tests. - // Not run on every test run, but we do run on CI, or when manually debugging issues. - if (env.CI || env.SLOW_TESTS) { - defineSlowTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); - } else { - // Need something in this file. - test('no-op', () => {}); - } +describe.skipIf(!(env.CI || env.SLOW_TESTS))('slow tests', function () { + describeWithStorage({ timeout: 120_000 }, function (factory) { + defineSlowTests(factory); + }); }); function defineSlowTests(factory: storage.TestStorageFactory) { @@ -350,14 +333,14 @@ bucket_definitions: const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); const replicationConnection = await connections.replicationConnection(); - abortController = new AbortController(); - const options: WalStreamOptions = { - abort_signal: abortController.signal, - connections, - storage: storage, - metrics: METRICS_HELPER.metricsEngine - }; - walStream = new WalStream(options); + abortController = new AbortController(); + const options: WalStreamOptions = { + abort_signal: abortController.signal, + connections, + storage: storage, + metrics: METRICS_HELPER.metricsEngine + }; + walStream = new WalStream(options); await storage.clear(); diff --git a/modules/module-postgres/test/src/util.ts b/modules/module-postgres/test/src/util.ts index 4135f4b78..30d106858 100644 --- a/modules/module-postgres/test/src/util.ts +++ b/modules/module-postgres/test/src/util.ts @@ -2,11 +2,12 @@ import { PostgresRouteAPIAdapter } from '@module/api/PostgresRouteAPIAdapter.js' import * as types from '@module/types/types.js'; import * as lib_postgres from '@powersync/lib-service-postgres'; import { logger } from '@powersync/lib-services-framework'; -import { BucketStorageFactory, InternalOpId, TestStorageOptions } from '@powersync/service-core'; +import { BucketStorageFactory, InternalOpId, TestStorageFactory, TestStorageOptions } from '@powersync/service-core'; import * as pgwire from '@powersync/service-jpgwire'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; import { env } from './env.js'; +import { describe, TestOptions } from 'vitest'; export const TEST_URI = env.PG_TEST_URL; @@ -19,6 +20,16 @@ export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.PostgresTes url: env.PG_STORAGE_TEST_URL }); +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { + describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { + fn(INITIALIZED_MONGO_STORAGE_FACTORY); + }); + + describe.skipIf(!env.TEST_POSTGRES_STORAGE)(`postgres storage`, options, function () { + fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); + }); +} + export const TEST_CONNECTION_OPTIONS = types.normalizeConnectionConfig({ type: 'postgresql', uri: TEST_URI, diff --git a/modules/module-postgres/test/src/validation.test.ts b/modules/module-postgres/test/src/validation.test.ts index c97b3c95a..135b0fab7 100644 --- a/modules/module-postgres/test/src/validation.test.ts +++ b/modules/module-postgres/test/src/validation.test.ts @@ -1,7 +1,6 @@ import { getDebugTablesInfo } from '@module/replication/replication-utils.js'; import { expect, test } from 'vitest'; -// Not quite a walStreamTest, but it helps to manage the connection import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; diff --git a/modules/module-postgres/test/src/wal_stream.test.ts b/modules/module-postgres/test/src/wal_stream.test.ts index 109460acd..2293c54c3 100644 --- a/modules/module-postgres/test/src/wal_stream.test.ts +++ b/modules/module-postgres/test/src/wal_stream.test.ts @@ -2,12 +2,11 @@ import { MissingReplicationSlotError } from '@module/replication/WalStream.js'; import { storage } from '@powersync/service-core'; import { METRICS_HELPER, putOp, removeOp } from '@powersync/service-core-tests'; import { pgwireRows } from '@powersync/service-jpgwire'; +import { ReplicationMetric } from '@powersync/service-types'; import * as crypto from 'crypto'; import { describe, expect, test } from 'vitest'; -import { env } from './env.js'; -import { INITIALIZED_MONGO_STORAGE_FACTORY, INITIALIZED_POSTGRES_STORAGE_FACTORY } from './util.js'; +import { describeWithStorage } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; -import { ReplicationMetric } from '@powersync/service-types'; const BASIC_SYNC_RULES = ` bucket_definitions: @@ -16,12 +15,8 @@ bucket_definitions: - SELECT id, description FROM "test_data" `; -describe.skipIf(!env.TEST_MONGO_STORAGE)('wal stream - mongodb', { timeout: 20_000 }, function () { - defineWalStreamTests(INITIALIZED_MONGO_STORAGE_FACTORY); -}); - -describe.skipIf(!env.TEST_POSTGRES_STORAGE)('wal stream - postgres', { timeout: 20_000 }, function () { - defineWalStreamTests(INITIALIZED_POSTGRES_STORAGE_FACTORY); +describe('wal stream', () => { + describeWithStorage({ timeout: 20_000 }, defineWalStreamTests); }); function defineWalStreamTests(factory: storage.TestStorageFactory) { diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 992a3765f..aa1170279 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -19,6 +19,7 @@ export class WalStreamTestContext implements AsyncDisposable { private streamPromise?: Promise; public storage?: SyncRulesBucketStorage; private replicationConnection?: pgwire.PgConnection; + private snapshotPromise?: Promise; /** * Tests operating on the wal stream need to configure the stream and manage asynchronous @@ -28,7 +29,7 @@ export class WalStreamTestContext implements AsyncDisposable { */ static async open( factory: (options: storage.TestStorageOptions) => Promise, - options?: { doNotClear?: boolean } + options?: { doNotClear?: boolean; walStreamOptions?: Partial } ) { const f = await factory({ doNotClear: options?.doNotClear }); const connectionManager = new PgManager(TEST_CONNECTION_OPTIONS, {}); @@ -37,12 +38,13 @@ export class WalStreamTestContext implements AsyncDisposable { await clearTestDb(connectionManager.pool); } - return new WalStreamTestContext(f, connectionManager); + return new WalStreamTestContext(f, connectionManager, options?.walStreamOptions); } constructor( public factory: BucketStorageFactory, - public connectionManager: PgManager + public connectionManager: PgManager, + private walStreamOptions?: Partial ) { createCoreReplicationMetrics(METRICS_HELPER.metricsEngine); initializeCoreReplicationMetrics(METRICS_HELPER.metricsEngine); @@ -54,6 +56,7 @@ export class WalStreamTestContext implements AsyncDisposable { async dispose() { this.abortController.abort(); + await this.snapshotPromise; await this.streamPromise; await this.connectionManager.destroy(); await this.factory?.[Symbol.asyncDispose](); @@ -108,16 +111,21 @@ export class WalStreamTestContext implements AsyncDisposable { storage: this.storage, metrics: METRICS_HELPER.metricsEngine, connections: this.connectionManager, - abort_signal: this.abortController.signal + abort_signal: this.abortController.signal, + ...this.walStreamOptions }; this._walStream = new WalStream(options); return this._walStream!; } async replicateSnapshot() { - this.replicationConnection = await this.connectionManager.replicationConnection(); - await this.walStream.initReplication(this.replicationConnection); - await this.storage!.autoActivate(); + const promise = (async () => { + this.replicationConnection = await this.connectionManager.replicationConnection(); + await this.walStream.initReplication(this.replicationConnection); + await this.storage!.autoActivate(); + })(); + this.snapshotPromise = promise.catch((e) => e); + await promise; } startStreaming() { diff --git a/packages/jpgwire/src/pgwire_types.ts b/packages/jpgwire/src/pgwire_types.ts index 65870f99f..dbb924595 100644 --- a/packages/jpgwire/src/pgwire_types.ts +++ b/packages/jpgwire/src/pgwire_types.ts @@ -3,187 +3,152 @@ import { JsonContainer } from '@powersync/service-jsonbig'; import { dateToSqlite, lsnMakeComparable, timestampToSqlite, timestamptzToSqlite } from './util.js'; +export enum PgTypeOid { + TEXT = 25, + UUID = 2950, + VARCHAR = 1043, + BOOL = 16, + BYTEA = 17, + INT2 = 21, + INT4 = 23, + OID = 26, + INT8 = 20, + FLOAT4 = 700, + FLOAT8 = 701, + DATE = 1082, + TIMESTAMP = 1114, + TIMESTAMPTZ = 1184, + JSON = 114, + JSONB = 3802, + PG_LSN = 3220 +} + +// Generate using: +// select '[' || typarray || ', ' || oid || '], // ' || typname from pg_catalog.pg_type WHERE typarray != 0; +const ARRAY_TO_ELEM_OID = new Map([ + [1000, 16], // bool + [1001, 17], // bytea + [1002, 18], // char + [1003, 19], // name + [1016, 20], // int8 + [1005, 21], // int2 + [1006, 22], // int2vector + [1007, 23], // int4 + [1008, 24], // regproc + [1009, 25], // text + [1028, 26], // oid + [1010, 27], // tid + [1011, 28], // xid + [1012, 29], // cid + [1013, 30], // oidvector + [210, 71], // pg_type + [270, 75], // pg_attribute + [272, 81], // pg_proc + [273, 83], // pg_class + [199, 114], // json + [143, 142], // xml + [271, 5069], // xid8 + [1017, 600], // point + [1018, 601], // lseg + [1019, 602], // path + [1020, 603], // box + [1027, 604], // polygon + [629, 628], // line + [1021, 700], // float4 + [1022, 701], // float8 + [0, 705], // unknown + [719, 718], // circle + [791, 790], // money + [1040, 829], // macaddr + [1041, 869], // inet + [651, 650], // cidr + [775, 774], // macaddr8 + [1034, 1033], // aclitem + [1014, 1042], // bpchar + [1015, 1043], // varchar + [1182, 1082], // date + [1183, 1083], // time + [1115, 1114], // timestamp + [1185, 1184], // timestamptz + [1187, 1186], // interval + [1270, 1266], // timetz + [1561, 1560], // bit + [1563, 1562], // varbit + [1231, 1700], // numeric + [2201, 1790], // refcursor + [2207, 2202], // regprocedure + [2208, 2203], // regoper + [2209, 2204], // regoperator + [2210, 2205], // regclass + [4192, 4191], // regcollation + [2211, 2206], // regtype + [4097, 4096], // regrole + [4090, 4089], // regnamespace + [2951, 2950], // uuid + [3221, 3220], // pg_lsn + [3643, 3614], // tsvector + [3644, 3642], // gtsvector + [3645, 3615], // tsquery + [3735, 3734], // regconfig + [3770, 3769], // regdictionary + [3807, 3802], // jsonb + [4073, 4072] // jsonpath +]); + +const ELEM_OID_TO_ARRAY = new Map(); +ARRAY_TO_ELEM_OID.forEach((value, key) => { + ELEM_OID_TO_ARRAY.set(value, key); +}); + export class PgType { + static getArrayType(typeOid: number): number | undefined { + return ELEM_OID_TO_ARRAY.get(typeOid); + } + static decode(text: string, typeOid: number) { - switch ( - typeOid // add line here when register new type - ) { - case 25 /* text */: - case 2950 /* uuid */: - case 1043 /* varchar */: + switch (typeOid) { + // add line here when register new type + case PgTypeOid.TEXT: + case PgTypeOid.UUID: + case PgTypeOid.VARCHAR: return text; - case 16 /* bool */: + case PgTypeOid.BOOL: return text == 't'; - case 17 /* bytea */: + case PgTypeOid.BYTEA: return this._decodeBytea(text); - case 21 /* int2 */: - case 23 /* int4 */: - case 26 /* oid */: - case 20 /* int8 */: + case PgTypeOid.INT2: + case PgTypeOid.INT4: + case PgTypeOid.OID: + case PgTypeOid.INT8: return BigInt(text); - case 700 /* float4 */: - case 701 /* float8 */: + case PgTypeOid.FLOAT4: + case PgTypeOid.FLOAT8: return Number(text); - case 1082 /* date */: + case PgTypeOid.DATE: return dateToSqlite(text); - case 1114 /* timestamp */: + case PgTypeOid.TIMESTAMP: return timestampToSqlite(text); - case 1184 /* timestamptz */: + case PgTypeOid.TIMESTAMPTZ: return timestamptzToSqlite(text); - case 114 /* json */: - case 3802 /* jsonb */: + case PgTypeOid.JSON: + case PgTypeOid.JSONB: // Don't parse the contents return new JsonContainer(text); - case 3220 /* pg_lsn */: + case PgTypeOid.PG_LSN: return lsnMakeComparable(text); } const elemTypeid = this._elemTypeOid(typeOid); - if (elemTypeid) { + if (elemTypeid != null) { return this._decodeArray(text, elemTypeid); } return text; // unknown type } - static _elemTypeOid(arrayTypeOid: number) { - // select 'case ' || typarray || ': return ' || oid || '; // ' || typname from pg_catalog.pg_type WHERE typearray != 0; - switch ( - arrayTypeOid // add line here when register new type - ) { - case 1000: - return 16; // bool - case 1001: - return 17; // bytea - case 1002: - return 18; // char - case 1003: - return 19; // name - case 1016: - return 20; // int8 - case 1005: - return 21; // int2 - case 1006: - return 22; // int2vector - case 1007: - return 23; // int4 - case 1008: - return 24; // regproc - case 1009: - return 25; // text - case 1028: - return 26; // oid - case 1010: - return 27; // tid - case 1011: - return 28; // xid - case 1012: - return 29; // cid - case 1013: - return 30; // oidvector - case 210: - return 71; // pg_type - case 270: - return 75; // pg_attribute - case 272: - return 81; // pg_proc - case 273: - return 83; // pg_class - case 199: - return 114; // json - case 143: - return 142; // xml - case 271: - return 5069; // xid8 - case 1017: - return 600; // point - case 1018: - return 601; // lseg - case 1019: - return 602; // path - case 1020: - return 603; // box - case 1027: - return 604; // polygon - case 629: - return 628; // line - case 1021: - return 700; // float4 - case 1022: - return 701; // float8 - case 0: - return 705; // unknown - case 719: - return 718; // circle - case 791: - return 790; // money - case 1040: - return 829; // macaddr - case 1041: - return 869; // inet - case 651: - return 650; // cidr - case 775: - return 774; // macaddr8 - case 1034: - return 1033; // aclitem - case 1014: - return 1042; // bpchar - case 1015: - return 1043; // varchar - case 1182: - return 1082; // date - case 1183: - return 1083; // time - case 1115: - return 1114; // timestamp - case 1185: - return 1184; // timestamptz - case 1187: - return 1186; // interval - case 1270: - return 1266; // timetz - case 1561: - return 1560; // bit - case 1563: - return 1562; // varbit - case 1231: - return 1700; // numeric - case 2201: - return 1790; // refcursor - case 2207: - return 2202; // regprocedure - case 2208: - return 2203; // regoper - case 2209: - return 2204; // regoperator - case 2210: - return 2205; // regclass - case 4192: - return 4191; // regcollation - case 2211: - return 2206; // regtype - case 4097: - return 4096; // regrole - case 4090: - return 4089; // regnamespace - case 2951: - return 2950; // uuid - case 3221: - return 3220; // pg_lsn - case 3643: - return 3614; // tsvector - case 3644: - return 3642; // gtsvector - case 3645: - return 3615; // tsquery - case 3735: - return 3734; // regconfig - case 3770: - return 3769; // regdictionary - case 3807: - return 3802; // jsonb - case 4073: - return 4072; // jsonpath - } + + static _elemTypeOid(arrayTypeOid: number): number | undefined { + // select 'case ' || typarray || ': return ' || oid || '; // ' || typname from pg_catalog.pg_type WHERE typarray != 0; + return ARRAY_TO_ELEM_OID.get(arrayTypeOid); } + static _decodeArray(text: string, elemTypeOid: number): any { text = text.replace(/^\[.+=/, ''); // skip dimensions let result: any; diff --git a/packages/service-core/src/index.ts b/packages/service-core/src/index.ts index 09e3e9d79..3f50062e5 100644 --- a/packages/service-core/src/index.ts +++ b/packages/service-core/src/index.ts @@ -41,3 +41,5 @@ export * as utils from './util/util-index.js'; export * from './streams/streams-index.js'; export * as streams from './streams/streams-index.js'; + +export * as bson from 'bson'; diff --git a/packages/service-core/src/replication/RelationCache.ts b/packages/service-core/src/replication/RelationCache.ts new file mode 100644 index 000000000..5411b584f --- /dev/null +++ b/packages/service-core/src/replication/RelationCache.ts @@ -0,0 +1,25 @@ +import { SourceTable } from '../storage/SourceTable.js'; + +export class RelationCache { + private cache = new Map(); + private idFunction: (item: T | SourceTable) => string | number; + + constructor(idFunction: (item: T | SourceTable) => string | number) { + this.idFunction = idFunction; + } + + update(table: SourceTable) { + const id = this.idFunction(table); + this.cache.set(id, table); + } + + get(source: T | SourceTable): SourceTable | undefined { + const id = this.idFunction(source); + return this.cache.get(id); + } + + delete(source: T | SourceTable): boolean { + const id = this.idFunction(source); + return this.cache.delete(id); + } +} diff --git a/packages/service-core/src/replication/replication-index.ts b/packages/service-core/src/replication/replication-index.ts index eb1f4705b..6c39483dc 100644 --- a/packages/service-core/src/replication/replication-index.ts +++ b/packages/service-core/src/replication/replication-index.ts @@ -4,3 +4,4 @@ export * from './ErrorRateLimiter.js'; export * from './ReplicationEngine.js'; export * from './ReplicationModule.js'; export * from './replication-metrics.js'; +export * from './RelationCache.js'; diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index da256bf07..c9efefeac 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -2,7 +2,7 @@ import { ObserverClient } from '@powersync/lib-services-framework'; import { EvaluatedParameters, EvaluatedRow, SqliteRow, ToastableSqliteRow } from '@powersync/service-sync-rules'; import { BSON } from 'bson'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; -import { SourceTable } from './SourceTable.js'; +import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; import { BatchedCustomWriteCheckpointOptions } from './storage-index.js'; import { InternalOpId } from '../util/utils.js'; @@ -59,6 +59,14 @@ export interface BucketStorageBatch extends ObserverClient; + /** + * Set the LSN for a snapshot, before starting replication. + * + * Not required if the source database keeps track of this, for example with + * PostgreSQL logical replication slots. + */ + setSnapshotLsn(lsn: string): Promise; + /** * Get the last checkpoint LSN, from either commit or keepalive. */ @@ -66,6 +74,8 @@ export interface BucketStorageBatch extends ObserverClient; + updateTableProgress(table: SourceTable, progress: Partial): Promise; + /** * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. */ diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index ab415c913..0279afe33 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -2,6 +2,12 @@ import { DEFAULT_TAG } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { ColumnDescriptor } from './SourceEntity.js'; +export interface TableSnapshotStatus { + totalEstimatedCount: number; + replicatedCount: number; + lastKey: Uint8Array | null; +} + export class SourceTable { static readonly DEFAULT_TAG = DEFAULT_TAG; @@ -32,6 +38,13 @@ export class SourceTable { */ public syncEvent = true; + /** + * Always undefined if snapshotComplete = true. + * + * May be set if snapshotComplete = false. + */ + public snapshotStatus: TableSnapshotStatus | undefined = undefined; + constructor( public readonly id: any, public readonly connectionTag: string, @@ -40,7 +53,7 @@ export class SourceTable { public readonly table: string, public readonly replicaIdColumns: ColumnDescriptor[], - public readonly snapshotComplete: boolean + public snapshotComplete: boolean ) {} get hasReplicaIdentity() { @@ -68,4 +81,34 @@ export class SourceTable { get syncAny() { return this.syncData || this.syncParameters || this.syncEvent; } + + /** + * In-memory clone of the table status. + */ + clone() { + const copy = new SourceTable( + this.id, + this.connectionTag, + this.objectId, + this.schema, + this.table, + this.replicaIdColumns, + this.snapshotComplete + ); + copy.syncData = this.syncData; + copy.syncParameters = this.syncParameters; + copy.snapshotStatus = this.snapshotStatus; + return copy; + } + + formatSnapshotProgress() { + if (this.snapshotComplete || this.snapshotStatus == null) { + // Should not happen + return '-'; + } else if (this.snapshotStatus.totalEstimatedCount < 0) { + return `${this.snapshotStatus.replicatedCount}/?`; + } else { + return `${this.snapshotStatus.replicatedCount}/~${this.snapshotStatus.totalEstimatedCount}`; + } + } } diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 1d85a3a00..c86d71063 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -1,7 +1,7 @@ -import { ObserverClient } from '@powersync/lib-services-framework'; +import { Logger, ObserverClient } from '@powersync/lib-services-framework'; import { ParameterLookup, SqlSyncRules, SqliteJsonRow } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; -import { BucketStorageBatch, FlushedResult } from './BucketStorageBatch.js'; +import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; import { BucketStorageFactory } from './BucketStorageFactory.js'; import { ParseSyncRulesOptions } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; @@ -125,6 +125,7 @@ export interface SyncRuleStatus { checkpoint_lsn: string | null; active: boolean; snapshot_done: boolean; + snapshot_lsn: string | null; } export interface ResolveTableOptions { group_id: number; @@ -159,6 +160,15 @@ export interface StartBatchOptions extends ParseSyncRulesOptions { * This will avoid creating new operations for rows previously replicated. */ skipExistingRows?: boolean; + + /** + * Callback called if we streamed an update to a record that we don't have yet. + * + * This is expected to happen in some initial replication edge cases, only if storeCurrentData = true. + */ + markRecordUnavailable?: BucketStorageMarkRecordUnavailable; + + logger?: Logger; } export interface CompactOptions { @@ -274,3 +284,5 @@ export const CHECKPOINT_INVALIDATE_ALL: CheckpointChanges = { updatedParameterLookups: new Set(), invalidateParameterBuckets: true }; + +export type BucketStorageMarkRecordUnavailable = (record: SaveUpdate) => void; diff --git a/packages/service-core/src/storage/bson.ts b/packages/service-core/src/storage/bson.ts index d205c7ac4..db4b732be 100644 --- a/packages/service-core/src/storage/bson.ts +++ b/packages/service-core/src/storage/bson.ts @@ -10,7 +10,10 @@ type NodeBuffer = Buffer; */ export const BSON_DESERIALIZE_INTERNAL_OPTIONS: bson.DeserializeOptions = { // use bigint instead of Long - useBigInt64: true + useBigInt64: true, + // We cannot use promoteBuffers: true, since that also converst UUID to Buffer + // Instead, we need to handle bson.Binary when reading data + promoteBuffers: false }; /** diff --git a/packages/service-errors/src/codes.ts b/packages/service-errors/src/codes.ts index 43bdeb63d..7a90422f1 100644 --- a/packages/service-errors/src/codes.ts +++ b/packages/service-errors/src/codes.ts @@ -269,6 +269,13 @@ export enum ErrorCode { */ PSYNC_S1346 = 'PSYNC_S1346', + /** + * Timeout while getting a resume token for an initial snapshot. + * + * This may happen if there is very high load on the source database. + */ + PSYNC_S1347 = 'PSYNC_S1347', + // ## PSYNC_S14xx: MongoDB storage replication issues /**