Skip to content

Commit 03a2737

Browse files
authored
Keep readonly state when stopping lifecycler (#6642)
* Keep READONLY when stopping lifecycler Signed-off-by: Daniel Deluiggi <[email protected]> * changelog Signed-off-by: Daniel Deluiggi <[email protected]> * fix test Signed-off-by: Daniel Deluiggi <[email protected]> * typo Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent f0ad7f5 commit 03a2737

File tree

3 files changed

+96
-4
lines changed

3 files changed

+96
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* [BUGFIX] Query Frontend: Fix panic caused by nil pointer dereference. #6609
2626
* [BUGFIX] Ingester: Add check to avoid query 5xx when closing tsdb. #6616
2727
* [BUGFIX] Querier: Fix panic when marshaling QueryResultRequest. #6601
28+
* [BUGFIX] Ingester: Avoid resharding for query when restart readonly ingesters. #6642
2829

2930
## 1.19.0 2025-02-27
3031

pkg/ring/lifecycler.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -655,10 +655,15 @@ func (i *Lifecycler) stopping(runningError error) error {
655655
i.setPreviousState(currentState)
656656
}
657657

658-
// Mark ourselved as Leaving so no more samples are send to us.
659-
err := i.changeState(context.Background(), LEAVING)
660-
if err != nil {
661-
level.Error(i.logger).Log("msg", "failed to set state to LEAVING", "ring", i.RingName, "err", err)
658+
// We dont need to mark us as leaving if READONLY. There is not request sent to us.
659+
// Also important to avoid this change so we dont have resharding(for querier) happen when READONLY restart as we extended shard on READONLY but not on LEAVING
660+
// Query also keeps calling pods on LEAVING or JOINING not causing any difference if left on READONLY
661+
if i.GetState() != READONLY {
662+
// Mark ourselved as Leaving so no more samples are send to us.
663+
err := i.changeState(context.Background(), LEAVING)
664+
if err != nil {
665+
level.Error(i.logger).Log("msg", "failed to set state to LEAVING", "ring", i.RingName, "err", err)
666+
}
662667
}
663668

664669
// Do the transferring / flushing on a background goroutine so we can continue

pkg/ring/lifecycler_test.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,92 @@ func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testi
743743
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))
744744
}
745745

746+
func TestRestartIngester_READONLY(t *testing.T) {
747+
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
748+
t.Cleanup(func() { assert.NoError(t, closer.Close()) })
749+
750+
var ringConfig Config
751+
flagext.DefaultValues(&ringConfig)
752+
ringConfig.KVStore.Mock = ringStore
753+
754+
r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil)
755+
require.NoError(t, err)
756+
require.NoError(t, services.StartAndAwaitRunning(context.Background(), r))
757+
758+
// poll function waits for a condition and returning actual state of the ingesters after the condition succeed.
759+
poll := func(condition func(*Desc) bool) map[string]InstanceDesc {
760+
var ingesters map[string]InstanceDesc
761+
test.Poll(t, 5*time.Second, true, func() interface{} {
762+
d, err := r.KVClient.Get(context.Background(), ringKey)
763+
require.NoError(t, err)
764+
765+
desc, ok := d.(*Desc)
766+
767+
if ok {
768+
ingesters = desc.Ingesters
769+
}
770+
return ok && condition(desc)
771+
})
772+
773+
return ingesters
774+
}
775+
776+
// Starts Ingester and wait it to became active
777+
startIngesterAndWaitState := func(ingId string, addr string, expectedState InstanceState) *Lifecycler {
778+
lifecyclerConfig := testLifecyclerConfigWithAddr(ringConfig, ingId, addr)
779+
lifecyclerConfig.UnregisterOnShutdown = false
780+
lifecycler, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "lifecycler", ringKey, true, true, log.NewNopLogger(), nil)
781+
require.NoError(t, err)
782+
require.NoError(t, services.StartAndAwaitRunning(context.Background(), lifecycler))
783+
poll(func(desc *Desc) bool {
784+
return desc.Ingesters[ingId].State == expectedState
785+
})
786+
return lifecycler
787+
}
788+
789+
l1 := startIngesterAndWaitState("ing1", "0.0.0.0", ACTIVE)
790+
l2 := startIngesterAndWaitState("ing2", "0.0.0.0", ACTIVE)
791+
792+
err = l2.ChangeState(context.Background(), READONLY)
793+
require.NoError(t, err)
794+
poll(func(desc *Desc) bool {
795+
return desc.Ingesters["ing2"].State == READONLY
796+
})
797+
798+
ingesters := poll(func(desc *Desc) bool {
799+
return len(desc.Ingesters) == 2 && desc.Ingesters["ing1"].State == ACTIVE && desc.Ingesters["ing2"].State == READONLY
800+
})
801+
802+
// Both Ingester should be active and running
803+
assert.Equal(t, ACTIVE, ingesters["ing1"].State)
804+
assert.Equal(t, READONLY, ingesters["ing2"].State)
805+
806+
// Stop ingester 1 gracefully should leave it on LEAVING STATE on the ring
807+
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l1))
808+
// Stop ingester 2 gracefully should keep on READONLY
809+
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2))
810+
811+
ingesters = poll(func(desc *Desc) bool {
812+
return len(desc.Ingesters) == 2 && desc.Ingesters["ing1"].State == LEAVING
813+
})
814+
815+
assert.Equal(t, LEAVING, ingesters["ing1"].State)
816+
assert.Equal(t, READONLY, ingesters["ing2"].State)
817+
818+
// Start Ingester1 again - Should flip back to ACTIVE in the ring
819+
defer services.StopAndAwaitTerminated(context.Background(), startIngesterAndWaitState("ing1", "0.0.0.0", ACTIVE)) //nolint:errcheck
820+
821+
// Start Ingester2 again - Should keep on READONLY
822+
defer services.StopAndAwaitTerminated(context.Background(), startIngesterAndWaitState("ing2", "0.0.0.0", READONLY)) //nolint:errcheck
823+
824+
ingesters = poll(func(desc *Desc) bool {
825+
return len(desc.Ingesters) == 2 && desc.Ingesters["ing1"].State == ACTIVE
826+
})
827+
828+
assert.Equal(t, ACTIVE, ingesters["ing1"].State)
829+
assert.Equal(t, READONLY, ingesters["ing2"].State)
830+
}
831+
746832
func TestTokenFileOnDisk(t *testing.T) {
747833
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
748834
t.Cleanup(func() { assert.NoError(t, closer.Close()) })

0 commit comments

Comments
 (0)