File tree Expand file tree Collapse file tree 3 files changed +40
-2
lines changed
packages/service-core/src Expand file tree Collapse file tree 3 files changed +40
-2
lines changed Original file line number Diff line number Diff line change 1+ ---
2+ ' @powersync/service-core ' : minor
3+ ' @powersync/service-image ' : minor
4+ ---
5+
6+ Report lack of commits or keepalives as issues in the diagnostics api.
Original file line number Diff line number Diff line change @@ -134,6 +134,38 @@ export async function getSyncRulesStatus(
134134 } )
135135 ) ;
136136
137+ if ( live_status && status ?. active && sourceConfig . type != 'mysql' ) {
138+ // Check replication lag for active sync rules.
139+ // Right now we exclude mysql, since it we don't have consistent keepalives for it.
140+ if ( sync_rules . last_checkpoint_ts == null && sync_rules . last_keepalive_ts == null ) {
141+ errors . push ( {
142+ level : 'warning' ,
143+ message : 'No checkpoint found, cannot calculate replication lag'
144+ } ) ;
145+ } else {
146+ const lastTime = Math . max (
147+ sync_rules . last_checkpoint_ts ?. getTime ( ) ?? 0 ,
148+ sync_rules . last_keepalive_ts ?. getTime ( ) ?? 0
149+ ) ;
150+ const lagSeconds = Math . round ( ( Date . now ( ) - lastTime ) / 1000 ) ;
151+ // On idle instances, keepalive messages are only persisted every 60 seconds.
152+ // So we use 2 minutes as a threshold for warnings, and 15 minutes for critical.
153+ // The replication lag metric should give a more granular value, but that is not available directly
154+ // in the API containers used for diagnostics, and this should give a good enough indication.
155+ if ( lagSeconds > 15 * 60 ) {
156+ errors . push ( {
157+ level : 'fatal' ,
158+ message : `No replicated commit in more than ${ lagSeconds } s`
159+ } ) ;
160+ } else if ( lagSeconds > 120 ) {
161+ errors . push ( {
162+ level : 'warning' ,
163+ message : `No replicated commit in more than ${ lagSeconds } s`
164+ } ) ;
165+ }
166+ }
167+ }
168+
137169 return {
138170 content : include_content ? sync_rules . sync_rules_content : undefined ,
139171 connections : [
Original file line number Diff line number Diff line change @@ -10,8 +10,8 @@ import { AbstractReplicationJob } from './AbstractReplicationJob.js';
1010import { ErrorRateLimiter } from './ErrorRateLimiter.js' ;
1111import { ConnectionTestResult } from './ReplicationModule.js' ;
1212
13- // 5 minutes
14- const PING_INTERVAL = 1_000_000_000n * 300n ;
13+ // 1 minute
14+ const PING_INTERVAL = 1_000_000_000n * 60n ;
1515
1616export interface CreateJobOptions {
1717 lock : storage . ReplicationLock ;
You can’t perform that action at this time.
0 commit comments