@@ -25,7 +25,8 @@ import (
2525 "fmt"
2626 "math/rand"
2727 "net/url"
28- "os"
28+ os "os"
29+ "path/filepath"
2930 "reflect"
3031 "runtime"
3132 "slices"
@@ -10587,3 +10588,117 @@ func TestJetStreamConsumerNoDeleteAfterConcurrentShutdownAndLeaderChange(t *test
1058710588 o = mset .lookupConsumer ("CONSUMER" )
1058810589 require_NotNil (t , o )
1058910590}
10591+
10592+ func TestJetStreamConsumerReconcileConsumerAfterStreamDataLoss (t * testing.T ) {
10593+ test := func (t * testing.T , totalMsgs int ) {
10594+ storeDir := t .TempDir ()
10595+ conf := createConfFile (t , []byte (fmt .Sprintf (`
10596+ listen: 127.0.0.1:-1
10597+ jetstream: {store_dir: %q}
10598+ ` , storeDir )))
10599+
10600+ s , _ := RunServerWithConfig (conf )
10601+ defer s .Shutdown ()
10602+
10603+ nc , js := jsClientConnect (t , s )
10604+ defer nc .Close ()
10605+
10606+ _ , err := js .AddStream (& nats.StreamConfig {
10607+ Name : "TEST" ,
10608+ Subjects : []string {"foo" },
10609+ })
10610+ require_NoError (t , err )
10611+
10612+ // Publish a few messages.
10613+ for range totalMsgs {
10614+ _ , err = js .Publish ("foo" , nil )
10615+ require_NoError (t , err )
10616+ }
10617+
10618+ _ , err = js .AddConsumer ("TEST" , & nats.ConsumerConfig {
10619+ Durable : "DURABLE" ,
10620+ AckPolicy : nats .AckExplicitPolicy ,
10621+ })
10622+ require_NoError (t , err )
10623+
10624+ sub , err := js .PullSubscribe (_EMPTY_ , "CONSUMER" , nats .BindStream ("TEST" ))
10625+ require_NoError (t , err )
10626+ defer sub .Drain ()
10627+
10628+ // Consume all available messages.
10629+ msgs , err := sub .Fetch (totalMsgs , nats .MaxWait (200 * time .Millisecond ))
10630+ require_NoError (t , err )
10631+ require_Len (t , len (msgs ), totalMsgs )
10632+ for _ , msg := range msgs {
10633+ require_NoError (t , msg .AckSync ())
10634+ }
10635+
10636+ // Confirm the consumer info reports all messages as delivered and acked.
10637+ lseq := uint64 (totalMsgs )
10638+ ci , err := js .ConsumerInfo ("TEST" , "CONSUMER" )
10639+ require_NoError (t , err )
10640+ require_Equal (t , ci .NumPending , 0 )
10641+ require_Equal (t , ci .NumAckPending , 0 )
10642+ require_Equal (t , ci .Delivered .Stream , lseq )
10643+ require_Equal (t , ci .AckFloor .Stream , lseq )
10644+ require_Equal (t , ci .Delivered .Consumer , lseq )
10645+
10646+ // Shut down the server and manually remove or truncate the message blocks, simulating data loss.
10647+ mset , err := s .globalAccount ().lookupStream ("TEST" )
10648+ require_NoError (t , err )
10649+ fs := mset .store .(* fileStore )
10650+ blk := filepath .Join (fs .fcfg .StoreDir , msgDir , "1.blk" )
10651+ index := filepath .Join (fs .fcfg .StoreDir , msgDir , streamStreamStateFile )
10652+ nc .Close ()
10653+ s .Shutdown ()
10654+ if totalMsgs > 1 {
10655+ stat , err := os .Stat (blk )
10656+ require_NoError (t , err )
10657+ require_NoError (t , os .Truncate (blk , stat .Size ()/ 2 + 1 ))
10658+ } else {
10659+ require_NoError (t , os .Remove (blk ))
10660+ }
10661+ require_NoError (t , os .Remove (index ))
10662+
10663+ // Restart the server and reconnect.
10664+ s , _ = RunServerWithConfig (conf )
10665+ defer s .Shutdown ()
10666+ nc , js = jsClientConnect (t , s )
10667+ defer nc .Close ()
10668+
10669+ // Publish another message. Due to the simulated data loss, the stream sequence should continue
10670+ // counting after truncating the corrupted data.
10671+ pubAck , err := js .Publish ("foo" , nil )
10672+ require_NoError (t , err )
10673+ require_Equal (t , pubAck .Sequence , lseq )
10674+
10675+ sub , err = js .PullSubscribe (_EMPTY_ , "CONSUMER" , nats .BindStream ("TEST" ))
10676+ require_NoError (t , err )
10677+ defer sub .Drain ()
10678+
10679+ // The consumer should be able to consume above message.
10680+ // Previously the consumer state would not be reconciled and would not be able to consume the message.
10681+ msgs , err = sub .Fetch (1 , nats .MaxWait (200 * time .Millisecond ))
10682+ require_NoError (t , err )
10683+ require_Len (t , len (msgs ), 1 )
10684+ msg := msgs [0 ]
10685+ meta , err := msg .Metadata ()
10686+ require_NoError (t , err )
10687+ require_Equal (t , meta .Sequence .Stream , lseq )
10688+ require_NoError (t , msg .AckSync ())
10689+
10690+ // Confirm the consumer info reports all messages as delivered and acked.
10691+ // But the delivered sequence shouldn't be reset and still move monotonically.
10692+ ci , err = js .ConsumerInfo ("TEST" , "CONSUMER" )
10693+ require_NoError (t , err )
10694+ require_Equal (t , ci .NumPending , 0 )
10695+ require_Equal (t , ci .NumAckPending , 0 )
10696+ require_Equal (t , ci .Delivered .Stream , lseq )
10697+ require_Equal (t , ci .AckFloor .Stream , lseq )
10698+ require_Equal (t , ci .Delivered .Consumer , lseq + 1 )
10699+ }
10700+
10701+ for _ , totalMsgs := range []int {1 , 2 } {
10702+ t .Run (fmt .Sprint (totalMsgs ), func (t * testing.T ) { test (t , totalMsgs ) })
10703+ }
10704+ }
0 commit comments