@@ -39,6 +39,7 @@ defmodule Sequin.Runtime.SlotProcessorServer do
3939 alias Sequin.Runtime.PostgresRelationHashCache
4040 alias Sequin.Runtime.SlotMessageStore
4141 alias Sequin.Runtime.SlotProcessor.Message
42+ alias Sequin.Time
4243 alias Sequin.Workers.CreateReplicationSlotWorker
4344
4445 require Logger
@@ -456,7 +457,10 @@ defmodule Sequin.Runtime.SlotProcessorServer do
456457 # Int64 - Server's system clock (microseconds since 2000-01-01 midnight)
457458 # Byte1 - 1 if reply requested immediately to avoid timeout, 0 otherwise
458459 # The server is not asking for a reply
459- def handle_data ( << ?k , wal_end :: 64 , _clock :: 64 , 0 >> , % State { } = state ) do
460+ def handle_data ( << ?k , wal_end :: 64 , clock :: 64 , 0 >> , % State { } = state ) do
461+ diff_ms = Time . microseconds_since_2000_to_ms_since_now ( clock )
462+ Logger . info ( "Received keepalive message for slot" , clock: clock , wal_end: wal_end , diff_ms: diff_ms )
463+
460464 execute_timed ( :handle_data_keepalive , fn ->
461465 # Because these are <14 Postgres databases, they will not receive heartbeat messages
462466 # temporarily mark them as healthy if we receive a keepalive message
@@ -469,9 +473,11 @@ defmodule Sequin.Runtime.SlotProcessorServer do
469473
470474 # Check if we should send an ack even though not requested
471475 if should_send_ack? ( state ) do
476+ Logger . info ( "Sending ack" )
472477 commit_lsn = get_commit_lsn ( state , wal_end )
473478 reply = ack_message ( commit_lsn )
474479 state = % { state | last_lsn_acked_at: Sequin . utc_now ( ) }
480+ log_keepalive_ack ( commit_lsn , clock )
475481 { :keep_state_and_ack , reply , state }
476482 else
477483 { :keep_state , state }
@@ -480,11 +486,20 @@ defmodule Sequin.Runtime.SlotProcessorServer do
480486 end
481487
482488 # The server is asking for a reply
483- def handle_data ( << ?k , wal_end :: 64 , _clock :: 64 , 1 >> , % State { } = state ) do
489+ def handle_data ( << ?k , wal_end :: 64 , clock :: 64 , 1 >> , % State { } = state ) do
490+ diff_ms = Time . microseconds_since_2000_to_ms_since_now ( clock )
491+
492+ Logger . info ( "Received keepalive message for slot (expecting reply)" ,
493+ clock: clock ,
494+ wal_end: wal_end ,
495+ diff_ms: diff_ms
496+ )
497+
484498 execute_timed ( :handle_data_keepalive , fn ->
485499 commit_lsn = get_commit_lsn ( state , wal_end )
486500 reply = ack_message ( commit_lsn )
487501 state = % { state | last_lsn_acked_at: Sequin . utc_now ( ) }
502+ log_keepalive_ack ( commit_lsn , clock )
488503 { :keep_state_and_ack , reply , state }
489504 end )
490505 end
@@ -1783,4 +1798,16 @@ defmodule Sequin.Runtime.SlotProcessorServer do
17831798 latency_us = DateTime . diff ( Sequin . utc_now ( ) , ts , :microsecond )
17841799 Prometheus . observe_ingestion_latency ( state . replication_slot . id , state . replication_slot . slot_name , latency_us )
17851800 end
1801+
1802+ defp log_keepalive_ack ( commit_lsn , clock ) do
1803+ diff_ms = Time . microseconds_since_2000_to_ms_since_now ( clock )
1804+ message = "Responded to keepalive ack in #{ diff_ms } ms"
1805+ tags = [ commit_lsn: commit_lsn , diff_ms: diff_ms ]
1806+
1807+ case diff_ms do
1808+ diff_ms when diff_ms < 100 -> Logger . info ( message , tags )
1809+ diff_ms when diff_ms < 1000 -> Logger . warning ( message , tags )
1810+ _ -> Logger . error ( message , tags )
1811+ end
1812+ end
17861813end
0 commit comments