Skip to content

Commit 8767dfa

Browse files
authored
Merge pull request #20051 from ahrtr/20250529_v2store_panic_3.6
[release-3.6] Add protection on `PromoteMember` and `UpdateRaftAttributes`
2 parents 9f19153 + 091f6c8 commit 8767dfa

File tree

3 files changed

+220
-9
lines changed

3 files changed

+220
-9
lines changed

server/etcdserver/api/membership/cluster.go

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -522,9 +522,17 @@ func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
522522
defer c.Unlock()
523523

524524
if c.v2store != nil {
525-
m := *(c.members[id])
526-
m.RaftAttributes.IsLearner = false
527-
mustUpdateMemberInStore(c.lg, c.v2store, &m)
525+
if _, ok := c.members[id]; ok {
526+
m := *(c.members[id])
527+
m.RaftAttributes.IsLearner = false
528+
mustUpdateMemberInStore(c.lg, c.v2store, &m)
529+
} else {
530+
c.lg.Info("Skipped promoting non-existent member in v2store",
531+
zap.String("cluster-id", c.cid.String()),
532+
zap.String("local-member-id", c.localID.String()),
533+
zap.String("promoted-member-id", id.String()),
534+
)
535+
}
528536
}
529537

530538
if id == c.localID {
@@ -550,15 +558,23 @@ func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
550558
// promotion request had been applied to v3store, but not saved
551559
// to v2snapshot yet when in 3.5. Once upgrading to 3.6, the
552560
// patch here ensure the issue can be automatically fixed.
553-
if m.IsLearner {
561+
if m == nil {
562+
c.lg.Info(
563+
"Skipped forcibly promoting non-existent member in v3store",
564+
zap.String("cluster-id", c.cid.String()),
565+
zap.String("local-member-id", c.localID.String()),
566+
zap.String("promoted-member-id", id.String()),
567+
)
568+
} else if m.IsLearner {
554569
m.RaftAttributes.IsLearner = false
555-
c.lg.Info("Forcibly apply member promotion request", zap.String("member", fmt.Sprintf("%+v", *m)))
570+
c.lg.Info("Forcibly apply member promotion request in v3store", zap.String("member", fmt.Sprintf("%+v", *m)))
556571
c.be.MustHackySaveMemberToBackend(m)
557572
} else {
558573
c.lg.Info(
559-
"ignore already promoted member",
574+
"ignore already promoted member in v3store",
560575
zap.String("cluster-id", c.cid.String()),
561576
zap.String("local-member-id", c.localID.String()),
577+
zap.String("promoted-member-id", id.String()),
562578
)
563579
}
564580
}
@@ -567,6 +583,7 @@ func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
567583
"ignore already promoted member due to backend being nil",
568584
zap.String("cluster-id", c.cid.String()),
569585
zap.String("local-member-id", c.localID.String()),
586+
zap.String("promoted-member-id", id.String()),
570587
)
571588
}
572589
}
@@ -602,9 +619,19 @@ func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes,
602619
defer c.Unlock()
603620

604621
if c.v2store != nil {
605-
m := *(c.members[id])
606-
m.RaftAttributes = raftAttr
607-
mustUpdateMemberInStore(c.lg, c.v2store, &m)
622+
if _, ok := c.members[id]; ok {
623+
m := *(c.members[id])
624+
m.RaftAttributes = raftAttr
625+
mustUpdateMemberInStore(c.lg, c.v2store, &m)
626+
} else {
627+
c.lg.Info("Skipped updating non-existent member in v2store",
628+
zap.String("cluster-id", c.cid.String()),
629+
zap.String("local-member-id", c.localID.String()),
630+
zap.String("updated-remote-peer-id", id.String()),
631+
zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
632+
zap.Bool("updated-remote-peer-is-learner", raftAttr.IsLearner),
633+
)
634+
}
608635
}
609636
if c.be != nil && shouldApplyV3 {
610637
c.members[id].RaftAttributes = raftAttr

server/etcdserver/api/membership/cluster_test.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,118 @@ func TestIsReadyToPromoteMember(t *testing.T) {
992992
}
993993
}
994994

995+
func TestPromoteMember(t *testing.T) {
996+
clientURLs := []string{"http://127.0.0.1:2379"}
997+
testCases := []struct {
998+
name string
999+
members []*Member
1000+
promoteID types.ID
1001+
wantMembers map[types.ID]*Member
1002+
}{
1003+
{
1004+
name: "promote a voting member",
1005+
members: []*Member{
1006+
newTestMember(1, nil, "1", clientURLs),
1007+
newTestMemberAsLearner(2, nil, "2", clientURLs),
1008+
},
1009+
promoteID: 1,
1010+
wantMembers: map[types.ID]*Member{
1011+
1: newTestMember(1, nil, "1", clientURLs),
1012+
2: newTestMemberAsLearner(2, nil, "2", clientURLs),
1013+
},
1014+
},
1015+
{
1016+
name: "promote a learner",
1017+
members: []*Member{
1018+
newTestMember(1, nil, "1", clientURLs),
1019+
newTestMemberAsLearner(2, nil, "2", clientURLs),
1020+
},
1021+
promoteID: 2,
1022+
wantMembers: map[types.ID]*Member{
1023+
1: newTestMember(1, nil, "1", clientURLs),
1024+
2: newTestMember(2, nil, "2", clientURLs),
1025+
},
1026+
},
1027+
{
1028+
name: "promote a non-exist member",
1029+
members: []*Member{
1030+
newTestMember(1, nil, "1", clientURLs),
1031+
newTestMemberAsLearner(2, nil, "2", clientURLs),
1032+
},
1033+
promoteID: 3,
1034+
wantMembers: map[types.ID]*Member{
1035+
1: newTestMember(1, nil, "1", clientURLs),
1036+
2: newTestMemberAsLearner(2, nil, "2", clientURLs),
1037+
},
1038+
},
1039+
}
1040+
1041+
for _, tc := range testCases {
1042+
t.Run(tc.name, func(t *testing.T) {
1043+
c := newTestCluster(t, tc.members)
1044+
st := v2store.New("/0", "/1")
1045+
c.Store(st)
1046+
c.SetStore(st)
1047+
1048+
c.PromoteMember(tc.promoteID, false)
1049+
1050+
mst, _ := membersFromStore(c.lg, st)
1051+
require.Equal(t, tc.wantMembers, mst)
1052+
})
1053+
}
1054+
}
1055+
1056+
func TestUpdateRaftAttributes(t *testing.T) {
1057+
clientURLs := []string{"http://127.0.0.1:2379"}
1058+
oldPeerURLs := []string{"http://127.0.0.1:2380"}
1059+
newPeerURLs := []string{"http://127.0.0.1:2382"}
1060+
testCases := []struct {
1061+
name string
1062+
members []*Member
1063+
updateMemberID types.ID
1064+
wantMembers map[types.ID]*Member
1065+
}{
1066+
{
1067+
name: "update an existing member",
1068+
members: []*Member{
1069+
newTestMember(1, oldPeerURLs, "1", clientURLs),
1070+
newTestMember(2, oldPeerURLs, "2", clientURLs),
1071+
},
1072+
updateMemberID: 2,
1073+
wantMembers: map[types.ID]*Member{
1074+
1: newTestMember(1, oldPeerURLs, "1", clientURLs),
1075+
2: newTestMember(2, newPeerURLs, "2", clientURLs),
1076+
},
1077+
},
1078+
{
1079+
name: "update a non-exist member",
1080+
members: []*Member{
1081+
newTestMember(1, oldPeerURLs, "1", clientURLs),
1082+
newTestMember(2, oldPeerURLs, "2", clientURLs),
1083+
},
1084+
updateMemberID: 3,
1085+
wantMembers: map[types.ID]*Member{
1086+
1: newTestMember(1, oldPeerURLs, "1", clientURLs),
1087+
2: newTestMember(2, oldPeerURLs, "2", clientURLs),
1088+
},
1089+
},
1090+
}
1091+
1092+
for _, tc := range testCases {
1093+
t.Run(tc.name, func(t *testing.T) {
1094+
c := newTestCluster(t, tc.members)
1095+
st := v2store.New("/0", "/1")
1096+
c.Store(st)
1097+
c.SetStore(st)
1098+
1099+
c.UpdateRaftAttributes(tc.updateMemberID, RaftAttributes{PeerURLs: newPeerURLs}, false)
1100+
1101+
mst, _ := membersFromStore(c.lg, st)
1102+
require.Equal(t, tc.wantMembers, mst)
1103+
})
1104+
}
1105+
}
1106+
9951107
func TestClusterStore(t *testing.T) {
9961108
name := "etcd"
9971109
clientURLs := []string{"http://127.0.0.1:4001"}

tests/e2e/force_new_cluster_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright 2025 The etcd Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package e2e
16+
17+
import (
18+
"context"
19+
"testing"
20+
21+
"github.com/stretchr/testify/require"
22+
23+
"go.etcd.io/etcd/tests/v3/framework/config"
24+
"go.etcd.io/etcd/tests/v3/framework/e2e"
25+
)
26+
27+
// TestForceNewCluster verified that etcd works as expected when --force-new-cluster.
28+
// Refer to discussion in https://github.com/etcd-io/etcd/issues/20009.
29+
func TestForceNewCluster(t *testing.T) {
30+
e2e.BeforeTest(t)
31+
32+
testCases := []struct {
33+
name string
34+
snapcount int
35+
}{
36+
{
37+
name: "create a snapshot after promotion",
38+
snapcount: 10,
39+
},
40+
{
41+
name: "no snapshot after promotion",
42+
snapcount: 0,
43+
},
44+
}
45+
46+
for _, tc := range testCases {
47+
t.Run(tc.name, func(t *testing.T) {
48+
ctx := context.Background()
49+
epc, promotedMembers := mustCreateNewClusterByPromotingMembers(t, e2e.CurrentVersion, 5,
50+
e2e.WithKeepDataDir(true), e2e.WithSnapshotCount(uint64(tc.snapcount)))
51+
require.Len(t, promotedMembers, 4)
52+
53+
for i := 0; i < tc.snapcount; i++ {
54+
err := epc.Etcdctl().Put(ctx, "foo", "bar", config.PutOptions{})
55+
require.NoError(t, err)
56+
}
57+
58+
require.NoError(t, epc.Close())
59+
60+
m := epc.Procs[0]
61+
t.Logf("Forcibly create a one-member cluster with member: %s", m.Config().Name)
62+
m.Config().Args = append(m.Config().Args, "--force-new-cluster")
63+
require.NoError(t, m.Start(ctx))
64+
65+
t.Log("Restarting the member")
66+
require.NoError(t, m.Restart(ctx))
67+
68+
t.Log("Closing the member")
69+
require.NoError(t, m.Close())
70+
})
71+
}
72+
}

0 commit comments

Comments
 (0)