3
3
//! Contains the main [`Pipeline`] struct that coordinates Postgres logical replication
4
4
//! with destination systems. Manages worker lifecycles, shutdown coordination, and error handling.
5
5
6
- use etl_config:: shared:: PipelineConfig ;
7
- use std:: sync:: Arc ;
8
- use tokio:: sync:: Semaphore ;
9
- use tracing:: { error, info} ;
10
-
11
6
use crate :: bail;
12
7
use crate :: concurrency:: shutdown:: { ShutdownTx , create_shutdown_channel} ;
13
8
use crate :: destination:: Destination ;
14
9
use crate :: error:: { ErrorKind , EtlError , EtlResult } ;
15
10
use crate :: metrics:: register_metrics;
16
11
use crate :: replication:: client:: PgReplicationClient ;
17
12
use crate :: state:: table:: TableReplicationPhase ;
13
+ use crate :: store:: cleanup:: CleanupStore ;
18
14
use crate :: store:: schema:: SchemaStore ;
19
15
use crate :: store:: state:: StateStore ;
20
16
use crate :: types:: PipelineId ;
21
17
use crate :: workers:: apply:: { ApplyWorker , ApplyWorkerHandle } ;
22
18
use crate :: workers:: base:: { Worker , WorkerHandle } ;
23
19
use crate :: workers:: pool:: TableSyncWorkerPool ;
20
+ use etl_config:: shared:: PipelineConfig ;
21
+ use etl_postgres:: types:: TableId ;
22
+ use std:: collections:: HashSet ;
23
+ use std:: sync:: Arc ;
24
+ use tokio:: sync:: Semaphore ;
25
+ use tracing:: { error, info} ;
24
26
25
27
/// Internal state tracking for pipeline lifecycle.
26
28
///
@@ -62,7 +64,7 @@ pub struct Pipeline<S, D> {
62
64
63
65
impl < S , D > Pipeline < S , D >
64
66
where
65
- S : StateStore + SchemaStore + Clone + Send + Sync + ' static ,
67
+ S : StateStore + SchemaStore + CleanupStore + Clone + Send + Sync + ' static ,
66
68
D : Destination + Clone + Send + Sync + ' static ,
67
69
{
68
70
/// Creates a new pipeline with the given configuration.
@@ -252,11 +254,17 @@ where
252
254
self . wait ( ) . await
253
255
}
254
256
255
- /// Initializes table replication states for all tables in the publication.
257
+ /// Initializes table replication states for tables in the publication and
258
+ /// purges state for tables removed from it.
259
+ ///
260
+ /// Ensures each table currently in the Postgres publication has a
261
+ /// corresponding replication state; tables without existing states are
262
+ /// initialized to [`TableReplicationPhase::Init`].
256
263
///
257
- /// This private method ensures that each table in the Postgres publication has
258
- /// a corresponding replication state record. Tables without existing states are
259
- /// initialized to the [`TableReplicationPhase::Init`] phase.
264
+ /// Also detects tables for which we have stored state but are no longer
265
+ /// part of the publication, and deletes their stored state (replication
266
+ /// state, table mappings, and table schemas) without touching the actual
267
+ /// destination tables.
260
268
async fn initialize_table_states (
261
269
& self ,
262
270
replication_client : & PgReplicationClient ,
@@ -281,26 +289,43 @@ where
281
289
) ;
282
290
}
283
291
284
- let table_ids = replication_client
292
+ let publication_table_ids = replication_client
285
293
. get_publication_table_ids ( & self . config . publication_name )
286
294
. await ?;
287
295
288
296
info ! (
289
297
"the publication '{}' contains {} tables" ,
290
298
self . config. publication_name,
291
- table_ids . len( )
299
+ publication_table_ids . len( )
292
300
) ;
293
301
294
302
self . store . load_table_replication_states ( ) . await ?;
295
- let states = self . store . get_table_replication_states ( ) . await ?;
296
- for table_id in table_ids {
297
- if !states. contains_key ( & table_id) {
303
+ let table_replication_states = self . store . get_table_replication_states ( ) . await ?;
304
+
305
+ // Initialize states for newly added tables in the publication
306
+ for table_id in & publication_table_ids {
307
+ if !table_replication_states. contains_key ( table_id) {
298
308
self . store
299
- . update_table_replication_state ( table_id, TableReplicationPhase :: Init )
309
+ . update_table_replication_state ( * table_id, TableReplicationPhase :: Init )
300
310
. await ?;
301
311
}
302
312
}
303
313
314
+ // Detect and purge tables that have been removed from the publication.
315
+ //
316
+ // We must not delete the destination table, only the internal state.
317
+ let publication_set: HashSet < TableId > = publication_table_ids. iter ( ) . copied ( ) . collect ( ) ;
318
+ for ( table_id, _) in table_replication_states {
319
+ if !publication_set. contains ( & table_id) {
320
+ info ! (
321
+ "table {} removed from publication, purging stored state" ,
322
+ table_id
323
+ ) ;
324
+
325
+ self . store . cleanup_table_state ( table_id) . await ?;
326
+ }
327
+ }
328
+
304
329
Ok ( ( ) )
305
330
}
306
331
}
0 commit comments