Skip to content

Commit b3ffb7b

Browse files
committed
Fix Learner promotion not being persisted into v3store may be propagated across multiple upgrades
Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
1 parent 4d14240 commit b3ffb7b

File tree

3 files changed

+211
-11
lines changed

3 files changed

+211
-11
lines changed

server/etcdserver/api/membership/cluster.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,28 @@ func membersFromBackend(lg *zap.Logger, be backend.Backend) (map[types.ID]*Membe
744744
return mustReadMembersFromBackend(lg, be)
745745
}
746746

747+
func SyncLearnerPromotionIfNeeded(lg *zap.Logger, be backend.Backend, st v2store.Store) error {
748+
v2Members, _ := membersFromStore(lg, st)
749+
v3Members, _ := membersFromBackend(lg, be)
750+
751+
for id, v3Member := range v3Members {
752+
v2Member, ok := v2Members[id]
753+
if !ok {
754+
return fmt.Errorf("detected member only in v3store but missing in v2store, member: %+v", *v3Member)
755+
}
756+
757+
if !v2Member.IsLearner && v3Member.IsLearner {
758+
syncedV3Member := v3Member.Clone()
759+
syncedV3Member.IsLearner = false
760+
lg.Warn("Syncing member in v3store", zap.String("member", fmt.Sprintf("%+v", *syncedV3Member)))
761+
if err := unsafeSaveMemberToBackend(lg, be, syncedV3Member); err != nil {
762+
return fmt.Errorf("failed to save synced member to backend, member: %+v, error:%w", *syncedV3Member, err)
763+
}
764+
}
765+
}
766+
return nil
767+
}
768+
747769
func clusterVersionFromStore(lg *zap.Logger, st v2store.Store) *semver.Version {
748770
e, err := st.Get(path.Join(storePrefix, "version"), false, false)
749771
if err != nil {

server/etcdserver/api/membership/cluster_test.go

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/coreos/go-semver/semver"
2626
"github.com/stretchr/testify/assert"
27+
"github.com/stretchr/testify/require"
2728
betesting "go.etcd.io/etcd/server/v3/mvcc/backend/testing"
2829
"go.uber.org/zap"
2930
"go.uber.org/zap/zaptest"
@@ -280,6 +281,179 @@ func TestClusterValidateAndAssignIDs(t *testing.T) {
280281
}
281282
}
282283

284+
// TestSyncLearnerPromotionIfNeeded verified that etcd can automatically
285+
// correct member's IsLearner attribute in v3store based on v2store when
286+
// applying a snapshot.
287+
// See https://github.com/etcd-io/etcd/issues/20793
288+
func TestSyncLearnerPromotionIfNeeded(t *testing.T) {
289+
members := []*Member{
290+
{
291+
ID: types.ID(1),
292+
RaftAttributes: RaftAttributes{PeerURLs: []string{"http://10.200.6.180:2380"}},
293+
Attributes: Attributes{Name: "foo", ClientURLs: []string{"http://10.200.6.180:2379"}},
294+
},
295+
{
296+
ID: types.ID(2),
297+
RaftAttributes: RaftAttributes{PeerURLs: []string{"http://10.200.6.181:2380"}},
298+
Attributes: Attributes{Name: "foo", ClientURLs: []string{"http://10.200.6.181:2379"}},
299+
},
300+
{
301+
ID: types.ID(3),
302+
RaftAttributes: RaftAttributes{PeerURLs: []string{"http://10.200.6.182:2380"}},
303+
Attributes: Attributes{Name: "foo", ClientURLs: []string{"http://10.200.6.182:2379"}},
304+
},
305+
}
306+
307+
genMember := func(m *Member, isLearner bool, attr *Attributes) *Member {
308+
newMember := m.Clone()
309+
newMember.IsLearner = isLearner
310+
if attr != nil {
311+
newMember.Attributes = *attr
312+
}
313+
return newMember
314+
}
315+
316+
testCases := []struct {
317+
name string
318+
v2Members []*Member
319+
v3Members []*Member
320+
expectedSyncedMembers map[types.ID]*Member
321+
}{
322+
{
323+
name: "no learners in both v2store and v3store",
324+
v2Members: []*Member{
325+
genMember(members[0], false, nil),
326+
genMember(members[1], false, nil),
327+
genMember(members[2], false, nil),
328+
},
329+
v3Members: []*Member{
330+
genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
331+
genMember(members[1], false, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
332+
genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
333+
},
334+
expectedSyncedMembers: map[types.ID]*Member{
335+
members[0].ID: genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
336+
members[1].ID: genMember(members[1], false, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
337+
members[2].ID: genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
338+
},
339+
},
340+
{
341+
name: "all learners in both v2store and v3store",
342+
v2Members: []*Member{
343+
genMember(members[0], true, nil),
344+
genMember(members[1], true, nil),
345+
genMember(members[2], true, nil),
346+
},
347+
v3Members: []*Member{
348+
genMember(members[0], true, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
349+
genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
350+
genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
351+
},
352+
expectedSyncedMembers: map[types.ID]*Member{
353+
members[0].ID: genMember(members[0], true, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
354+
members[1].ID: genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
355+
members[2].ID: genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
356+
},
357+
},
358+
{
359+
name: "no learners in v2store, all learners in v3store",
360+
v2Members: []*Member{
361+
genMember(members[0], false, nil),
362+
genMember(members[1], false, nil),
363+
genMember(members[2], false, nil),
364+
},
365+
v3Members: []*Member{
366+
genMember(members[0], true, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.170:2380"}}),
367+
genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.171:2380"}}),
368+
genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.172:2380"}}),
369+
},
370+
expectedSyncedMembers: map[types.ID]*Member{
371+
members[0].ID: genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.170:2380"}}),
372+
members[1].ID: genMember(members[1], false, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.171:2380"}}),
373+
members[2].ID: genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.172:2380"}}),
374+
},
375+
},
376+
{
377+
name: "all learners in v2store, no learners in v3store",
378+
v2Members: []*Member{
379+
genMember(members[0], true, nil),
380+
genMember(members[1], true, nil),
381+
genMember(members[2], true, nil),
382+
},
383+
v3Members: []*Member{
384+
genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
385+
genMember(members[1], false, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
386+
genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
387+
},
388+
expectedSyncedMembers: map[types.ID]*Member{
389+
members[0].ID: genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.190:2380"}}),
390+
members[1].ID: genMember(members[1], false, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.191:2380"}}),
391+
members[2].ID: genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.192:2380"}}),
392+
},
393+
},
394+
{
395+
name: "same learners in v2store and v3store",
396+
v2Members: []*Member{
397+
genMember(members[0], false, nil),
398+
genMember(members[1], true, nil),
399+
genMember(members[2], true, nil),
400+
},
401+
v3Members: []*Member{
402+
genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.160:2380"}}),
403+
genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.161:2380"}}),
404+
genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.162:2380"}}),
405+
},
406+
expectedSyncedMembers: map[types.ID]*Member{
407+
members[0].ID: genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.160:2380"}}),
408+
members[1].ID: genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.161:2380"}}),
409+
members[2].ID: genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.162:2380"}}),
410+
},
411+
},
412+
{
413+
name: "different learners in v2store and v3store",
414+
v2Members: []*Member{
415+
genMember(members[0], true, nil),
416+
genMember(members[1], true, nil),
417+
genMember(members[2], false, nil),
418+
},
419+
v3Members: []*Member{
420+
genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.160:2380"}}),
421+
genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.161:2380"}}),
422+
genMember(members[2], true, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.162:2380"}}),
423+
},
424+
expectedSyncedMembers: map[types.ID]*Member{
425+
members[0].ID: genMember(members[0], false, &Attributes{Name: "foo1", ClientURLs: []string{"http://10.200.6.160:2380"}}),
426+
members[1].ID: genMember(members[1], true, &Attributes{Name: "foo2", ClientURLs: []string{"http://10.200.6.161:2380"}}),
427+
members[2].ID: genMember(members[2], false, &Attributes{Name: "foo3", ClientURLs: []string{"http://10.200.6.162:2380"}}),
428+
},
429+
},
430+
}
431+
432+
for _, tc := range testCases {
433+
t.Run(tc.name, func(t *testing.T) {
434+
lg := zaptest.NewLogger(t)
435+
436+
st := v2store.New()
437+
for _, m := range tc.v2Members {
438+
mustSaveMemberToStore(lg, st, m)
439+
}
440+
441+
be, _ := betesting.NewDefaultTmpBackend(t)
442+
mustCreateBackendBuckets(be)
443+
defer be.Close()
444+
445+
for _, m := range tc.v3Members {
446+
require.NoError(t, unsafeSaveMemberToBackend(lg, be, m))
447+
}
448+
449+
require.NoError(t, SyncLearnerPromotionIfNeeded(lg, be, st))
450+
451+
syncedMembers, _ := membersFromBackend(lg, be)
452+
require.Equal(t, tc.expectedSyncedMembers, syncedMembers)
453+
})
454+
}
455+
}
456+
283457
func TestClusterValidateConfigurationChange(t *testing.T) {
284458
cl := NewCluster(zaptest.NewLogger(t))
285459
cl.SetStore(v2store.New())

server/etcdserver/server.go

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,21 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
13501350
lg.Info("applySnapshot: opened snapshot backend")
13511351
// gofail: var applyAfterOpenSnapshot struct{}
13521352

1353+
lg.Info("restoring v2 store")
1354+
if err := s.v2store.Recovery(apply.snapshot.Data); err != nil {
1355+
lg.Panic("failed to restore v2 store", zap.Error(err))
1356+
}
1357+
1358+
if err := assertNoV2StoreContent(lg, s.v2store, s.Cfg.V2Deprecation); err != nil {
1359+
lg.Panic("illegal v2store content", zap.Error(err))
1360+
}
1361+
1362+
lg.Info("restored v2 store")
1363+
1364+
if err = membership.SyncLearnerPromotionIfNeeded(lg, newbe, s.v2store); err != nil {
1365+
lg.Error("Failed to sync learner promotion for v3store", zap.Error(err))
1366+
}
1367+
13531368
// We need to set the backend to consistIndex before recovering the lessor,
13541369
// because lessor.Recover will commit the boltDB transaction, accordingly it
13551370
// will get the old consistent_index persisted into the db in OnPreCommitUnsafe.
@@ -1410,17 +1425,6 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
14101425
lg.Info("restored auth store")
14111426
}
14121427

1413-
lg.Info("restoring v2 store")
1414-
if err := s.v2store.Recovery(apply.snapshot.Data); err != nil {
1415-
lg.Panic("failed to restore v2 store", zap.Error(err))
1416-
}
1417-
1418-
if err := assertNoV2StoreContent(lg, s.v2store, s.Cfg.V2Deprecation); err != nil {
1419-
lg.Panic("illegal v2store content", zap.Error(err))
1420-
}
1421-
1422-
lg.Info("restored v2 store")
1423-
14241428
s.cluster.SetBackend(newbe)
14251429

14261430
lg.Info("restoring cluster configuration")

0 commit comments

Comments
 (0)