From 609adf4d03cb2be1bffc861f5c3053ae0525b8f5 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 3 Sep 2025 16:51:29 +0200 Subject: [PATCH 1/4] Add failing tests. --- .../test/src/storage.test.ts | 48 ++++++++++++++++ .../tests/register-data-storage-data-tests.ts | 55 +++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/modules/module-mongodb-storage/test/src/storage.test.ts b/modules/module-mongodb-storage/test/src/storage.test.ts index 858c51b8..b238487c 100644 --- a/modules/module-mongodb-storage/test/src/storage.test.ts +++ b/modules/module-mongodb-storage/test/src/storage.test.ts @@ -3,6 +3,7 @@ import { describe } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; import { env } from './env.js'; import { MongoTestStorageFactoryGenerator } from '@module/storage/implementation/MongoTestStorageFactoryGenerator.js'; +import { MongoChecksumOptions } from '@module/storage/implementation/MongoChecksums.js'; describe('Mongo Sync Bucket Storage - Parameters', () => register.registerDataStorageParameterTests(INITIALIZED_MONGO_STORAGE_FACTORY)); @@ -42,3 +43,50 @@ describe('Mongo Sync Bucket Storage - split buckets', () => } }) )); + +describe('Mongo Sync Bucket Storage - checksum calculations', () => { + // This test tests 4 buckets x 4 operations in each. + // We specifically use operationBatchLimit that does not have factors in common with 4, + // as well some that do. + const params: MongoChecksumOptions[] = [ + { + bucketBatchLimit: 100, + operationBatchLimit: 3 + }, + + { + bucketBatchLimit: 10, + operationBatchLimit: 7 + }, + + { + bucketBatchLimit: 3, + operationBatchLimit: 1 + }, + { + bucketBatchLimit: 1, + operationBatchLimit: 3 + }, + { + bucketBatchLimit: 2, + operationBatchLimit: 4 + }, + { + bucketBatchLimit: 4, + operationBatchLimit: 12 + } + ]; + for (let options of params) { + describe(`${options.bucketBatchLimit}|${options.operationBatchLimit}`, () => { + register.testChecksumBatching( + MongoTestStorageFactoryGenerator({ + url: env.MONGO_TEST_URL, + isCI: env.CI, + internalOptions: { + checksumOptions: options + } + }) + ); + }); + } +}); diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index 1aeeb36d..21b95709 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -1235,4 +1235,59 @@ bucket_definitions: const checksums2 = [...(await bucketStorage.getChecksums(checkpoint + 1n, ['global[]'])).values()]; expect(checksums2).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); }); + + testChecksumBatching(generateStorageFactory); +} + +/** + * This specifically tests an issue we ran into with MongoDB storage. + * + * Exposed as a separate test so we can test with more storage parameters. + */ +export function testChecksumBatching(generateStorageFactory: storage.TestStorageFactory) { + test('checksums for multiple buckets', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + user: + parameters: select request.user_id() as user_id + data: + - select id, description from test where user_id = bucket.user_id +` + }); + const bucketStorage = factory.getInstance(syncRules); + + const sourceTable = TEST_TABLE; + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + for (let u of ['u1', 'u2', 'u3', 'u4']) { + for (let t of ['t1', 't2', 't3', 't4']) { + const id = `${t}_${u}`; + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id, + description: `${t} description`, + user_id: u + }, + afterReplicaId: test_utils.rid(id) + }); + } + } + await batch.commit('1/1'); + }); + const { checkpoint } = await bucketStorage.getCheckpoint(); + + bucketStorage.clearChecksumCache(); + const buckets = ['user["u1"]', 'user["u2"]', 'user["u3"]', 'user["u4"]']; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, buckets)).values()]; + checksums.sort((a, b) => a.bucket.localeCompare(b.bucket)); + expect(checksums).toEqual([ + { bucket: 'user["u1"]', count: 4, checksum: 346204588 }, + { bucket: 'user["u2"]', count: 4, checksum: 5261081 }, + { bucket: 'user["u3"]', count: 4, checksum: 134760718 }, + { bucket: 'user["u4"]', count: 4, checksum: -302639724 } + ]); + }); } From 3c3588c7478a997b8f3dfe4ac68be9494c815f97 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 3 Sep 2025 16:52:24 +0200 Subject: [PATCH 2/4] Remove wrong count increment. --- .../src/storage/implementation/MongoChecksums.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts index 6930b881..4f34a382 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts @@ -284,7 +284,6 @@ export class MongoChecksums { // All done for this bucket requests.delete(bucket); } - batchCount++; } if (!limitReached) { break; From 265a27e901bddd9fd3a269bd697a408a6142258e Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 3 Sep 2025 16:52:42 +0200 Subject: [PATCH 3/4] Sort group results. --- .../src/storage/implementation/MongoChecksums.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts index 4f34a382..9238a602 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts @@ -247,7 +247,10 @@ export class MongoChecksums { }, last_op: { $max: '$_id.o' } } - } + }, + // Sort the aggregated results (100 max, so should be fast). + // This is important to identify which buckets we have partial data for. + { $sort: { _id: 1 } } ], { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS } ) From 52168c26c978741be1134dd43cf8fd74d852d41d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 3 Sep 2025 17:01:13 +0200 Subject: [PATCH 4/4] Changeset. --- .changeset/violet-badgers-collect.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .changeset/violet-badgers-collect.md diff --git a/.changeset/violet-badgers-collect.md b/.changeset/violet-badgers-collect.md new file mode 100644 index 00000000..b9fdb4f8 --- /dev/null +++ b/.changeset/violet-badgers-collect.md @@ -0,0 +1,8 @@ +--- +'@powersync/service-module-mongodb-storage': patch +'@powersync/service-core-tests': patch +'@powersync/service-core': patch +'@powersync/service-image': patch +--- + +Fix checksum calculation issues with large buckets.