Skip to content

Commit 45fdcb4

Browse files
committed
resetNodes should respect the deadNodeReclaimTime
When deadNodeReclaimTime is 0, dead nodes should not be reclaimed per the definition. To avoid breaking changes, set the default deadNodeReclaimTime of LAN/WAN configurations to 30 seconds.
1 parent 3f82dc1 commit 45fdcb4

File tree

4 files changed

+102
-52
lines changed

4 files changed

+102
-52
lines changed

config.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,8 @@ type Config struct {
241241
UDPBufferSize int
242242

243243
// DeadNodeReclaimTime controls the time before a dead node's name can be
244-
// reclaimed by one with a different address or port. By default, this is 0,
245-
// meaning nodes cannot be reclaimed this way.
244+
// reclaimed by one with a different address or port. Setting DeadNodeReclaimTime
245+
// to 0 means that dead nodes will never be reclaimed.
246246
DeadNodeReclaimTime time.Duration
247247

248248
// RequireNodeNames controls if the name of a node is required when sending
@@ -319,6 +319,8 @@ func DefaultLANConfig() *Config {
319319
DisableTcpPings: false, // TCP pings are safe, even with mixed versions
320320
AwarenessMaxMultiplier: 8, // Probe interval backs off to 8 seconds
321321

322+
DeadNodeReclaimTime: 30 * time.Second, // Reclaim dead nodes after 30 seconds
323+
322324
GossipNodes: 3, // Gossip to 3 nodes
323325
GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
324326
GossipToTheDeadTime: 30 * time.Second, // Same as push/pull

state.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ func (m *Memberlist) resetNodes() {
570570
defer m.nodeLock.Unlock()
571571

572572
// Move dead nodes, but respect gossip to the dead interval
573-
deadIdx := moveDeadNodes(m.nodes, m.config.GossipToTheDeadTime)
573+
deadIdx := moveDeadNodes(m.nodes, m.config.DeadNodeReclaimTime, m.config.GossipToTheDeadTime)
574574

575575
// Deregister the dead nodes
576576
for i := deadIdx; i < len(m.nodes); i++ {

util.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,16 +105,26 @@ func pushPullScale(interval time.Duration, n int) time.Duration {
105105

106106
// moveDeadNodes moves dead and left nodes that that have not changed during the gossipToTheDeadTime interval
107107
// to the end of the slice and returns the index of the first moved node.
108-
func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int {
108+
func moveDeadNodes(nodes []*nodeState, deadNodeReclaimTime, gossipToTheDeadTime time.Duration) int {
109109
numDead := 0
110110
n := len(nodes)
111111
for i := 0; i < n-numDead; i++ {
112112
if !nodes[i].DeadOrLeft() {
113113
continue
114114
}
115115

116-
// Respect the gossip to the dead interval
117-
if time.Since(nodes[i].StateChange) <= gossipToTheDeadTime {
116+
if nodes[i].State == StateDead && deadNodeReclaimTime == 0 {
117+
// If deadNodeReclaimTime is 0, we don't reclaim dead nodes
118+
continue
119+
}
120+
121+
reclaimTime := deadNodeReclaimTime
122+
if gossipToTheDeadTime > reclaimTime {
123+
reclaimTime = gossipToTheDeadTime
124+
}
125+
126+
// Respect the gossip to the dead interval and the dead node reclaim time
127+
if time.Since(nodes[i].StateChange) <= reclaimTime {
118128
continue
119129
}
120130

util_test.go

Lines changed: 84 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"testing"
1010
"time"
1111

12+
"github.com/stretchr/testify/assert"
1213
"github.com/stretchr/testify/require"
1314
)
1415

@@ -166,76 +167,113 @@ func TestPushPullScale(t *testing.T) {
166167
}
167168

168169
func TestMoveDeadNodes(t *testing.T) {
169-
nodes := []*nodeState{
170-
&nodeState{
170+
now := time.Now()
171+
nodeStates := []*nodeState{
172+
{
171173
State: StateDead,
172-
StateChange: time.Now().Add(-20 * time.Second),
174+
StateChange: now.Add(-20 * time.Second),
173175
},
174-
&nodeState{
176+
{
175177
State: StateAlive,
176-
StateChange: time.Now().Add(-20 * time.Second),
178+
StateChange: now.Add(-20 * time.Second),
177179
},
178-
// This dead node should not be moved, as its state changed
179-
// less than the specified GossipToTheDead time ago
180-
&nodeState{
180+
{
181181
State: StateDead,
182-
StateChange: time.Now().Add(-10 * time.Second),
182+
StateChange: now.Add(-10 * time.Second),
183183
},
184-
// This left node should not be moved, as its state changed
185-
// less than the specified GossipToTheDead time ago
186-
&nodeState{
184+
{
187185
State: StateLeft,
188-
StateChange: time.Now().Add(-10 * time.Second),
186+
StateChange: now.Add(-10 * time.Second),
189187
},
190-
&nodeState{
188+
{
191189
State: StateLeft,
192-
StateChange: time.Now().Add(-20 * time.Second),
190+
StateChange: now.Add(-20 * time.Second),
193191
},
194-
&nodeState{
192+
{
195193
State: StateAlive,
196-
StateChange: time.Now().Add(-20 * time.Second),
194+
StateChange: now.Add(-20 * time.Second),
197195
},
198-
&nodeState{
196+
{
199197
State: StateDead,
200-
StateChange: time.Now().Add(-20 * time.Second),
198+
StateChange: now.Add(-20 * time.Second),
201199
},
202-
&nodeState{
200+
{
203201
State: StateAlive,
204-
StateChange: time.Now().Add(-20 * time.Second),
202+
StateChange: now.Add(-20 * time.Second),
205203
},
206-
&nodeState{
204+
{
205+
State: StateLeft,
206+
StateChange: now.Add(-20 * time.Second),
207+
},
208+
{
207209
State: StateLeft,
208-
StateChange: time.Now().Add(-20 * time.Second),
210+
StateChange: now.Add(-30 * time.Second),
211+
},
212+
{
213+
State: StateDead,
214+
StateChange: now.Add(-30 * time.Second),
209215
},
210216
}
211217

212-
idx := moveDeadNodes(nodes, (15 * time.Second))
213-
if idx != 5 {
214-
t.Fatalf("bad index")
218+
tests := []struct {
219+
name string
220+
deadNodeReclaimTime time.Duration
221+
gossipToTheDeadTime time.Duration
222+
expectedIndex int
223+
}{
224+
{
225+
name: "Do not reclaim dead nodes when deadNodeReclaimTime is 0",
226+
deadNodeReclaimTime: 0,
227+
gossipToTheDeadTime: 15 * time.Second,
228+
expectedIndex: 8,
229+
},
230+
{
231+
name: "Reclaim dead nodes when deadNodeReclaimTime is greater than 0",
232+
deadNodeReclaimTime: 10 * time.Second,
233+
gossipToTheDeadTime: 15 * time.Second,
234+
expectedIndex: 5,
235+
},
236+
{
237+
name: "deadNodeReclaimTime is greater than gossipToTheDeadTime",
238+
deadNodeReclaimTime: 25 * time.Second,
239+
gossipToTheDeadTime: 15 * time.Second,
240+
expectedIndex: 9,
241+
},
215242
}
216-
for i := 0; i < idx; i++ {
217-
switch i {
218-
case 2:
219-
// Recently dead node remains at index 2,
220-
// since nodes are swapped out to move to end.
221-
if nodes[i].State != StateDead {
222-
t.Fatalf("Bad state %d", i)
243+
for _, tt := range tests {
244+
t.Run(tt.name, func(t *testing.T) {
245+
states := make([]*nodeState, len(nodeStates))
246+
copy(states, nodeStates)
247+
index := moveDeadNodes(states, tt.deadNodeReclaimTime, tt.gossipToTheDeadTime)
248+
assert.Equal(t, tt.expectedIndex, index)
249+
for i := 0; i < len(states); i++ {
250+
t.Logf("Node %d: %s, %v", i, states[i].State.metricsString(), states[i].StateChange)
223251
}
224-
case 3:
225-
//Recently left node should remain at 3
226-
if nodes[i].State != StateLeft {
227-
t.Fatalf("Bad State %d", i)
252+
reclaimTime := now.Add(tt.deadNodeReclaimTime)
253+
if tt.gossipToTheDeadTime > tt.deadNodeReclaimTime {
254+
reclaimTime = now.Add(tt.gossipToTheDeadTime)
228255
}
229-
default:
230-
if nodes[i].State != StateAlive {
231-
t.Fatalf("Bad state %d", i)
256+
257+
if tt.deadNodeReclaimTime == 0 {
258+
for i := 0; i < index; i++ {
259+
if states[i].State == StateLeft {
260+
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
261+
}
262+
}
263+
for i := index; i < len(states); i++ {
264+
assert.Equal(t, StateLeft, states[i].State)
265+
}
266+
} else {
267+
for i := 0; i < index; i++ {
268+
if states[i].DeadOrLeft() {
269+
assert.True(t, states[i].StateChange.Before(reclaimTime), "node %d should have been moved", i)
270+
}
271+
}
272+
for i := index; i < len(states); i++ {
273+
assert.True(t, states[i].DeadOrLeft())
274+
}
232275
}
233-
}
234-
}
235-
for i := idx; i < len(nodes); i++ {
236-
if !nodes[i].DeadOrLeft() {
237-
t.Fatalf("Bad state %d", i)
238-
}
276+
})
239277
}
240278
}
241279

0 commit comments

Comments
 (0)