@@ -4,6 +4,7 @@ use std::sync::Arc;
4
4
use tokio:: sync:: Semaphore ;
5
5
use tokio:: task:: JoinHandle ;
6
6
use tokio_postgres:: types:: PgLsn ;
7
+ use tracing:: warn;
7
8
use tracing:: { Instrument , debug, error, info} ;
8
9
9
10
use crate :: concurrency:: shutdown:: ShutdownRx ;
@@ -14,7 +15,7 @@ use crate::error::{ErrorKind, EtlError, EtlResult};
14
15
use crate :: etl_error;
15
16
use crate :: replication:: apply:: { ApplyLoopAction , ApplyLoopHook , start_apply_loop} ;
16
17
use crate :: replication:: client:: PgReplicationClient ;
17
- use crate :: replication:: common:: get_table_replication_states ;
18
+ use crate :: replication:: common:: get_active_table_replication_states ;
18
19
use crate :: state:: table:: {
19
20
TableReplicationError , TableReplicationPhase , TableReplicationPhaseType ,
20
21
} ;
@@ -387,30 +388,34 @@ where
387
388
async fn before_loop ( & self , _start_lsn : PgLsn ) -> EtlResult < ApplyLoopAction > {
388
389
info ! ( "starting table sync workers before the main apply loop" ) ;
389
390
390
- let active_table_replication_states =
391
- get_table_replication_states ( & self . store , false ) . await ?;
392
-
393
- for ( table_id, table_replication_phase) in active_table_replication_states {
394
- // A table in `SyncDone` doesn't need to have its worker started, since the main apply
395
- // worker will move it into `Ready` state automatically once the condition is met.
396
- if let TableReplicationPhaseType :: SyncDone = table_replication_phase. as_type ( ) {
397
- continue ;
398
- }
391
+ for ( table_id, table_replication_phase) in self . store . get_table_replication_states ( ) . await ?
392
+ {
393
+ if !table_replication_phase. as_type ( ) . is_done ( ) {
394
+ // A table in `SyncDone` doesn't need to have its worker started, since the main apply
395
+ // worker will move it into `Ready` state automatically once the condition is met.
396
+ if let TableReplicationPhaseType :: SyncDone = table_replication_phase. as_type ( ) {
397
+ continue ;
398
+ }
399
399
400
- // If there is already an active worker for this table in the pool, we can avoid starting
401
- // it.
402
- let mut pool = self . pool . lock ( ) . await ;
403
- if pool. get_active_worker_state ( table_id) . is_some ( ) {
404
- continue ;
405
- }
400
+ // If there is already an active worker for this table in the pool, we can avoid starting
401
+ // it.
402
+ let mut pool = self . pool . lock ( ) . await ;
403
+ if pool. get_active_worker_state ( table_id) . is_some ( ) {
404
+ continue ;
405
+ }
406
406
407
- // If we fail, we just show an error, and hopefully we will succeed when starting it
408
- // during syncing tables.
409
- let table_sync_worker = self . build_table_sync_worker ( table_id) . await ;
410
- if let Err ( err) = pool. start_worker ( table_sync_worker) . await {
411
- error ! (
412
- "error starting table sync worker for table {} during initialization: {}" ,
413
- table_id, err
407
+ // If we fail, we just show an error, and hopefully we will succeed when starting it
408
+ // during syncing tables.
409
+ let table_sync_worker = self . build_table_sync_worker ( table_id) . await ;
410
+ if let Err ( err) = pool. start_worker ( table_sync_worker) . await {
411
+ error ! (
412
+ "error starting table sync worker for table {} during initialization: {}" ,
413
+ table_id, err
414
+ ) ;
415
+ }
416
+ } else if table_replication_phase. as_type ( ) . is_errored ( ) {
417
+ warn ! (
418
+ "table sync worker for table {table_id} won't run because it is in an errored state."
414
419
) ;
415
420
}
416
421
}
@@ -429,7 +434,7 @@ where
429
434
update_state : bool ,
430
435
) -> EtlResult < ApplyLoopAction > {
431
436
let active_table_replication_states =
432
- get_table_replication_states ( & self . store , false ) . await ?;
437
+ get_active_table_replication_states ( & self . store ) . await ?;
433
438
debug ! (
434
439
"processing syncing tables for apply worker with lsn {}" ,
435
440
current_lsn
0 commit comments