From 02c3b8254e0ec23aa7867d87fcc68004b8533f10 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 17 Jul 2025 13:39:57 +0200 Subject: [PATCH 1/4] Add diagnostic message if no commit is performed on the active replication stream. --- packages/service-core/src/api/diagnostics.ts | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/packages/service-core/src/api/diagnostics.ts b/packages/service-core/src/api/diagnostics.ts index a4c6af3c0..36e2be824 100644 --- a/packages/service-core/src/api/diagnostics.ts +++ b/packages/service-core/src/api/diagnostics.ts @@ -134,6 +134,37 @@ export async function getSyncRulesStatus( }) ); + if (live_status && status?.active) { + // Check replication lag for active sync rules. + if (sync_rules.last_checkpoint_ts == null && sync_rules.last_keepalive_ts == null) { + errors.push({ + level: 'warning', + message: 'No checkpoint found, cannot calculate replication lag' + }); + } else { + const lastTime = Math.max( + sync_rules.last_checkpoint_ts?.getTime() ?? 0, + sync_rules.last_keepalive_ts?.getTime() ?? 0 + ); + const lagSeconds = Math.round((Date.now() - lastTime) / 1000); + // On idle instances, keepalive messages are only persisted every 60 seconds. + // So we use 2 minutes as a threshold for warnings, and 15 minutes for critical. + // The replication lag metric should give a more granular value, but that is not available directly + // in the API containers used for diagnostics, and this should give a good enough indication. + if (lagSeconds > 15 * 60) { + errors.push({ + level: 'fatal', + message: `No replicated commit in more than ${lagSeconds}s` + }); + } else if (lagSeconds > 120) { + errors.push({ + level: 'warning', + message: `No replicated commit in more than ${lagSeconds}s` + }); + } + } + } + return { content: include_content ? sync_rules.sync_rules_content : undefined, connections: [ From e570317c83f0b4fa148b9918ca81704580aaac43 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 17 Jul 2025 13:40:14 +0200 Subject: [PATCH 2/4] For postgres, ping every minute. --- packages/service-core/src/replication/AbstractReplicator.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/service-core/src/replication/AbstractReplicator.ts b/packages/service-core/src/replication/AbstractReplicator.ts index b102350d9..e76debe73 100644 --- a/packages/service-core/src/replication/AbstractReplicator.ts +++ b/packages/service-core/src/replication/AbstractReplicator.ts @@ -10,8 +10,8 @@ import { AbstractReplicationJob } from './AbstractReplicationJob.js'; import { ErrorRateLimiter } from './ErrorRateLimiter.js'; import { ConnectionTestResult } from './ReplicationModule.js'; -// 5 minutes -const PING_INTERVAL = 1_000_000_000n * 300n; +// 1 minute +const PING_INTERVAL = 1_000_000_000n * 60n; export interface CreateJobOptions { lock: storage.ReplicationLock; From 0c224d0839ca4158ba7cffa50362f46e0469f5ce Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 17 Jul 2025 13:41:15 +0200 Subject: [PATCH 3/4] Add changeset. --- .changeset/ninety-points-provide.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/ninety-points-provide.md diff --git a/.changeset/ninety-points-provide.md b/.changeset/ninety-points-provide.md new file mode 100644 index 000000000..cce194576 --- /dev/null +++ b/.changeset/ninety-points-provide.md @@ -0,0 +1,6 @@ +--- +'@powersync/service-core': minor +'@powersync/service-image': minor +--- + +Report lack of commits or keepalives as issues in the diagnostics api. From 81f679ca2a6fce75261a13935a57dd0f4cc7e8bc Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 21 Jul 2025 17:37:02 +0200 Subject: [PATCH 4/4] Disable check for mysql for now. --- packages/service-core/src/api/diagnostics.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/service-core/src/api/diagnostics.ts b/packages/service-core/src/api/diagnostics.ts index 36e2be824..6a1eaab40 100644 --- a/packages/service-core/src/api/diagnostics.ts +++ b/packages/service-core/src/api/diagnostics.ts @@ -134,8 +134,9 @@ export async function getSyncRulesStatus( }) ); - if (live_status && status?.active) { + if (live_status && status?.active && sourceConfig.type != 'mysql') { // Check replication lag for active sync rules. + // Right now we exclude mysql, since it we don't have consistent keepalives for it. if (sync_rules.last_checkpoint_ts == null && sync_rules.last_keepalive_ts == null) { errors.push({ level: 'warning',