@@ -9,7 +9,7 @@ use etl_config::{
9
9
Environment ,
10
10
shared:: { ReplicatorConfig , SupabaseConfig , TlsConfig } ,
11
11
} ;
12
- use etl_postgres:: replication:: { TableLookupError , get_table_name_from_oid, health, state} ;
12
+ use etl_postgres:: replication:: { TableLookupError , get_table_name_from_oid, health, lag , state} ;
13
13
use etl_postgres:: types:: TableId ;
14
14
use secrecy:: ExposeSecret ;
15
15
use serde:: { Deserialize , Serialize } ;
@@ -271,9 +271,7 @@ pub enum SimpleTableReplicationState {
271
271
Queued ,
272
272
CopyingTable ,
273
273
CopiedTable ,
274
- FollowingWal {
275
- lag : u64 ,
276
- } ,
274
+ FollowingWal ,
277
275
Error {
278
276
reason : String ,
279
277
#[ serde( skip_serializing_if = "Option::is_none" ) ]
@@ -301,13 +299,10 @@ impl From<state::TableReplicationState> for SimpleTableReplicationState {
301
299
state:: TableReplicationState :: Init => SimpleTableReplicationState :: Queued ,
302
300
state:: TableReplicationState :: DataSync => SimpleTableReplicationState :: CopyingTable ,
303
301
state:: TableReplicationState :: FinishedCopy => SimpleTableReplicationState :: CopiedTable ,
304
- // TODO: add lag metric when available.
305
302
state:: TableReplicationState :: SyncDone { .. } => {
306
- SimpleTableReplicationState :: FollowingWal { lag : 0 }
307
- }
308
- state:: TableReplicationState :: Ready => {
309
- SimpleTableReplicationState :: FollowingWal { lag : 0 }
303
+ SimpleTableReplicationState :: FollowingWal
310
304
}
305
+ state:: TableReplicationState :: Ready => SimpleTableReplicationState :: FollowingWal ,
311
306
state:: TableReplicationState :: Errored {
312
307
reason,
313
308
solution,
@@ -340,12 +335,52 @@ pub struct TableReplicationStatus {
340
335
#[ schema( example = "public.users" ) ]
341
336
pub table_name : String ,
342
337
pub state : SimpleTableReplicationState ,
338
+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
339
+ #[ schema( nullable = true ) ]
340
+ pub table_sync_lag : Option < SlotLagMetricsResponse > ,
341
+ }
342
+
343
+ /// Lag metrics reported for replication slots.
344
+ #[ derive( Debug , Serialize , Deserialize , ToSchema ) ]
345
+ pub struct SlotLagMetricsResponse {
346
+ /// Bytes between the current WAL location and the slot restart LSN.
347
+ #[ schema( example = 1024 ) ]
348
+ pub restart_lsn_bytes : i64 ,
349
+ /// Bytes between the current WAL location and the confirmed flush LSN.
350
+ #[ schema( example = 2048 ) ]
351
+ pub confirmed_flush_lsn_bytes : i64 ,
352
+ /// How many bytes of WAL are still safe to build up before the limit of the slot is reached.
353
+ #[ schema( example = 8192 ) ]
354
+ pub safe_wal_size_bytes : i64 ,
355
+ /// Write lag expressed in milliseconds.
356
+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
357
+ #[ schema( example = 1500 , nullable = true ) ]
358
+ pub write_lag : Option < i64 > ,
359
+ /// Flush lag expressed in milliseconds.
360
+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
361
+ #[ schema( example = 1200 , nullable = true ) ]
362
+ pub flush_lag : Option < i64 > ,
363
+ }
364
+
365
+ impl From < lag:: SlotLagMetrics > for SlotLagMetricsResponse {
366
+ fn from ( metrics : lag:: SlotLagMetrics ) -> Self {
367
+ Self {
368
+ restart_lsn_bytes : metrics. restart_lsn_bytes ,
369
+ confirmed_flush_lsn_bytes : metrics. confirmed_flush_lsn_bytes ,
370
+ safe_wal_size_bytes : metrics. safe_wal_size_bytes ,
371
+ write_lag : metrics. write_lag_ms ,
372
+ flush_lag : metrics. flush_lag_ms ,
373
+ }
374
+ }
343
375
}
344
376
345
377
#[ derive( Debug , Serialize , Deserialize , ToSchema ) ]
346
378
pub struct GetPipelineReplicationStatusResponse {
347
379
#[ schema( example = 1 ) ]
348
380
pub pipeline_id : i64 ,
381
+ #[ serde( skip_serializing_if = "Option::is_none" ) ]
382
+ #[ schema( nullable = true ) ]
383
+ pub apply_lag : Option < SlotLagMetricsResponse > ,
349
384
pub table_statuses : Vec < TableReplicationStatus > ,
350
385
}
351
386
@@ -924,13 +959,14 @@ pub async fn get_pipeline_replication_status(
924
959
925
960
// Fetch replication state for all tables in this pipeline
926
961
let state_rows = state:: get_table_replication_state_rows ( & source_pool, pipeline_id) . await ?;
962
+ let mut lag_metrics = lag:: get_pipeline_lag_metrics ( & source_pool, pipeline_id as u64 ) . await ?;
963
+ let apply_lag = lag_metrics. apply . map ( Into :: into) ;
927
964
928
965
// Convert database states to UI-friendly format and fetch table names
929
966
let mut tables: Vec < TableReplicationStatus > = Vec :: new ( ) ;
930
967
for row in state_rows {
931
- let table_id = row. table_id . 0 ;
932
- let table_name =
933
- get_table_name_from_oid ( & source_pool, TableId :: new ( row. table_id . 0 ) ) . await ?;
968
+ let table_id = TableId :: new ( row. table_id . 0 ) ;
969
+ let table_name = get_table_name_from_oid ( & source_pool, table_id) . await ?;
934
970
935
971
// Extract the metadata row from the database
936
972
let table_replication_state = row
@@ -939,14 +975,16 @@ pub async fn get_pipeline_replication_status(
939
975
. ok_or ( PipelineError :: MissingTableReplicationState ) ?;
940
976
941
977
tables. push ( TableReplicationStatus {
942
- table_id,
978
+ table_id : table_id . into_inner ( ) ,
943
979
table_name : table_name. to_string ( ) ,
944
980
state : table_replication_state. into ( ) ,
981
+ table_sync_lag : lag_metrics. table_sync . remove ( & table_id) . map ( Into :: into) ,
945
982
} ) ;
946
983
}
947
984
948
985
let response = GetPipelineReplicationStatusResponse {
949
986
pipeline_id,
987
+ apply_lag,
950
988
table_statuses : tables,
951
989
} ;
952
990
0 commit comments