1- import { mongo } from '@powersync/lib-service-mongodb' ;
1+ import { isMongoNetworkTimeoutError , isMongoServerError , mongo } from '@powersync/lib-service-mongodb' ;
22import {
33 container ,
44 DatabaseConnectionError ,
@@ -10,19 +10,26 @@ import {
1010} from '@powersync/lib-services-framework' ;
1111import { MetricsEngine , SaveOperationTag , SourceEntityDescriptor , SourceTable , storage } from '@powersync/service-core' ;
1212import { DatabaseInputRow , SqliteRow , SqlSyncRules , TablePattern } from '@powersync/service-sync-rules' ;
13+ import { ReplicationMetric } from '@powersync/service-types' ;
1314import { MongoLSN } from '../common/MongoLSN.js' ;
1415import { PostImagesOption } from '../types/types.js' ;
1516import { escapeRegExp } from '../utils.js' ;
1617import { MongoManager } from './MongoManager.js' ;
1718import { constructAfterRecord , createCheckpoint , getCacheIdentifier , getMongoRelation } from './MongoRelation.js' ;
1819import { CHECKPOINTS_COLLECTION } from './replication-utils.js' ;
19- import { ReplicationMetric } from '@powersync/service-types' ;
2020
2121export interface ChangeStreamOptions {
2222 connections : MongoManager ;
2323 storage : storage . SyncRulesBucketStorage ;
2424 metrics : MetricsEngine ;
2525 abort_signal : AbortSignal ;
26+ /**
27+ * Override maxAwaitTimeMS for testing.
28+ *
29+ * In most cases, the default of 10_000 is fine. However, for MongoDB 6.0, this can cause a delay
30+ * in closing the stream. To cover that case, reduce the timeout for tests.
31+ */
32+ maxAwaitTimeMS ?: number ;
2633}
2734
2835interface InitResult {
@@ -56,6 +63,8 @@ export class ChangeStream {
5663 private readonly defaultDb : mongo . Db ;
5764 private readonly metrics : MetricsEngine ;
5865
66+ private readonly maxAwaitTimeMS : number ;
67+
5968 private abort_signal : AbortSignal ;
6069
6170 private relation_cache = new Map < string | number , storage . SourceTable > ( ) ;
@@ -65,6 +74,7 @@ export class ChangeStream {
6574 this . metrics = options . metrics ;
6675 this . group_id = options . storage . group_id ;
6776 this . connections = options . connections ;
77+ this . maxAwaitTimeMS = options . maxAwaitTimeMS ?? 10_000 ;
6878 this . client = this . connections . client ;
6979 this . defaultDb = this . connections . db ;
7080 this . sync_rules = options . storage . getParsedSyncRules ( {
@@ -557,7 +567,7 @@ export class ChangeStream {
557567
558568 const streamOptions : mongo . ChangeStreamOptions = {
559569 showExpandedEvents : true ,
560- maxAwaitTimeMS : 200 ,
570+ maxAwaitTimeMS : this . maxAwaitTimeMS ,
561571 fullDocument : fullDocument
562572 } ;
563573
@@ -597,20 +607,45 @@ export class ChangeStream {
597607
598608 let flexDbNameWorkaroundLogged = false ;
599609
610+ let lastEmptyResume = performance . now ( ) ;
611+
600612 while ( true ) {
601613 if ( this . abort_signal . aborted ) {
602614 break ;
603615 }
604616
605- const originalChangeDocument = await stream . tryNext ( ) ;
617+ const originalChangeDocument = await stream . tryNext ( ) . catch ( ( e ) => {
618+ throw mapChangeStreamError ( e ) ;
619+ } ) ;
606620 // The stream was closed, we will only ever receive `null` from it
607621 if ( ! originalChangeDocument && stream . closed ) {
608622 break ;
609623 }
610624
611- if ( originalChangeDocument == null || this . abort_signal . aborted ) {
625+ if ( this . abort_signal . aborted ) {
626+ break ;
627+ }
628+
629+ if ( originalChangeDocument == null ) {
630+ // We get a new null document after `maxAwaitTimeMS` if there were no other events.
631+ // In this case, stream.resumeToken is the resume token associated with the last response.
632+ // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next()
633+ // does update it.
634+ // From observed behavior, the actual resumeToken changes around once every 10 seconds.
635+ // If we don't update it on empty events, we do keep consistency, but resuming the stream
636+ // with old tokens may cause connection timeouts.
637+ // We throttle this further by only persisting a keepalive once a minute.
638+ // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
639+ // doing a keepalive in the middle of a transaction.
640+ if ( waitForCheckpointLsn == null && performance . now ( ) - lastEmptyResume > 60_000 ) {
641+ const { comparable : lsn } = MongoLSN . fromResumeToken ( stream . resumeToken ) ;
642+ await batch . keepalive ( lsn ) ;
643+ await touch ( ) ;
644+ lastEmptyResume = performance . now ( ) ;
645+ }
612646 continue ;
613647 }
648+
614649 await touch ( ) ;
615650
616651 if ( startAfter != null && originalChangeDocument . clusterTime ?. lte ( startAfter ) ) {
@@ -762,3 +797,21 @@ async function touch() {
762797 // or reduce PING_INTERVAL here.
763798 return container . probes . touch ( ) ;
764799}
800+
801+ function mapChangeStreamError ( e : any ) {
802+ if ( isMongoNetworkTimeoutError ( e ) ) {
803+ // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out".
804+ // We wrap the error to make it more useful.
805+ throw new DatabaseConnectionError ( ErrorCode . PSYNC_S1345 , `Timeout while reading MongoDB ChangeStream` , e ) ;
806+ } else if (
807+ isMongoServerError ( e ) &&
808+ e . codeName == 'NoMatchingDocument' &&
809+ e . errmsg ?. includes ( 'post-image was not found' )
810+ ) {
811+ throw new ChangeStreamInvalidatedError ( e . errmsg , e ) ;
812+ } else if ( isMongoServerError ( e ) && e . hasErrorLabel ( 'NonResumableChangeStreamError' ) ) {
813+ throw new ChangeStreamInvalidatedError ( e . message , e ) ;
814+ } else {
815+ throw new DatabaseConnectionError ( ErrorCode . PSYNC_S1346 , `Error reading MongoDB ChangeStream` , e ) ;
816+ }
817+ }
0 commit comments