Skip to content

Commit 49528d4

Browse files
committed
working draft
1 parent d6c0213 commit 49528d4

16 files changed

+1249
-5099
lines changed

bootstrap.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,7 @@ func (rn *RawNode) Bootstrap(peers []Peer) error {
7676
for _, peer := range peers {
7777
rn.raft.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode}.AsV2())
7878
}
79+
80+
traceBootstrap(rn.raft)
7981
return nil
8082
}

raft.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,6 +1963,8 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
19631963
panic(err)
19641964
}
19651965

1966+
traceConfChangeEvent(cfg, r)
1967+
19661968
return r.switchToConfig(cfg, trk)
19671969
}
19681970

@@ -1973,8 +1975,6 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
19731975
//
19741976
// The inputs usually result from restoring a ConfState or applying a ConfChange.
19751977
func (r *raft) switchToConfig(cfg tracker.Config, trk tracker.ProgressMap) pb.ConfState {
1976-
traceConfChangeEvent(cfg, r)
1977-
19781978
r.trk.Config = cfg
19791979
r.trk.Progress = trk
19801980

rawnode.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,8 @@ func (rn *RawNode) Advance(_ Ready) {
491491
rn.stepsOnAdvance[i] = pb.Message{}
492492
}
493493
rn.stepsOnAdvance = rn.stepsOnAdvance[:0]
494+
495+
traceAdvance(rn.raft)
494496
}
495497

496498
// Status returns the current status of the given group. This allocates, see

state_trace.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type stateMachineEventType int
3030

3131
const (
3232
rsmInitState stateMachineEventType = iota
33+
rsmBootstrap
3334
rsmBecomeCandidate
3435
rsmBecomeFollower
3536
rsmBecomeLeader
@@ -38,6 +39,7 @@ const (
3839
rsmChangeConf
3940
rsmApplyConfChange
4041
rsmReady
42+
rsmAdvance
4143
rsmSendAppendEntriesRequest
4244
rsmReceiveAppendEntriesRequest
4345
rsmSendAppendEntriesResponse
@@ -53,6 +55,7 @@ const (
5355
func (e stateMachineEventType) String() string {
5456
return []string{
5557
"InitState",
58+
"Bootstrap",
5659
"BecomeCandidate",
5760
"BecomeFollower",
5861
"BecomeLeader",
@@ -61,6 +64,7 @@ func (e stateMachineEventType) String() string {
6164
"ChangeConf",
6265
"ApplyConfChange",
6366
"Ready",
67+
"Advance",
6468
"SendAppendEntriesRequest",
6569
"ReceiveAppendEntriesRequest",
6670
"SendAppendEntriesResponse",
@@ -86,6 +90,7 @@ type TracingEvent struct {
8690
State TracingState `json:"state"`
8791
Role string `json:"role"`
8892
LogSize uint64 `json:"log"`
93+
Applied uint64 `json:"applied"`
8994
Conf [2][]string `json:"conf"`
9095
Message *TracingMessage `json:"msg,omitempty"`
9196
ConfChange *TracingConfChange `json:"cc,omitempty"`
@@ -173,6 +178,7 @@ func traceEvent(evt stateMachineEventType, r *raft, m *raftpb.Message, prop map[
173178
NodeID: strconv.FormatUint(r.id, 10),
174179
State: makeTracingState(r),
175180
LogSize: r.raftLog.lastIndex(),
181+
Applied: r.raftLog.applied,
176182
Conf: [2][]string{formatConf(r.trk.Voters[0].Slice()), formatConf(r.trk.Voters[1].Slice())},
177183
Role: r.state.String(),
178184
Message: makeTracingMessage(m),
@@ -206,10 +212,18 @@ func traceInitState(r *raft) {
206212
traceNodeEvent(rsmInitState, r)
207213
}
208214

215+
func traceBootstrap(r *raft) {
216+
traceNodeEvent(rsmBootstrap, r)
217+
}
218+
209219
func traceReady(r *raft) {
210220
traceNodeEvent(rsmReady, r)
211221
}
212222

223+
func traceAdvance(r *raft) {
224+
traceNodeEvent(rsmAdvance, r)
225+
}
226+
213227
func traceCommit(r *raft) {
214228
traceNodeEvent(rsmCommit, r)
215229
}

state_trace_nop.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ type TracingEvent struct{}
2929

3030
func traceInitState(*raft) {}
3131

32+
func traceBootstrap(*raft) {}
33+
3234
func traceReady(*raft) {}
3335

36+
func traceAdvance(*raft) {}
37+
3438
func traceCommit(*raft) {}
3539

3640
func traceReplicate(*raft, ...raftpb.Entry) {}

tla/MCetcdraft.cfg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ CONSTANTS
3434
AddNewServer <- MCAddNewServer
3535
DeleteServer <- MCDeleteServer
3636
AddLearner <- MCAddLearner
37+
Ready <- MCReady
3738

39+
3840
InitServerVars <- etcdInitServerVars
3941
InitLogVars <- etcdInitLogVars
4042
InitConfigVars <- etcdInitConfigVars
@@ -65,5 +67,4 @@ INVARIANTS
6567
LogMatchingInv
6668
QuorumLogInv
6769
MoreUpToDateCorrectInv
68-
LeaderCompletenessInv
69-
CommittedIsDurableInv
70+
LeaderCompletenessInv

tla/MCetcdraft.tla

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,30 @@ etcdInitServerVars == /\ currentTerm = [i \in Server |-> IF i \in InitServer TH
4242
/\ votedFor = [i \in Server |-> Nil]
4343
etcdInitLogVars == /\ log = [i \in Server |-> IF i \in InitServer THEN BootstrapLog ELSE <<>>]
4444
/\ commitIndex = [i \in Server |-> IF i \in InitServer THEN Cardinality(InitServer) ELSE 0]
45+
/\ applied = [i \in Server |-> 0]
4546
etcdInitConfigVars == /\ config = [i \in Server |-> [ jointConfig |-> IF i \in InitServer THEN <<InitServer, {}>> ELSE <<{}, {}>>, learners |-> {}]]
46-
/\ reconfigCount = 0 \* the bootstrap configuraitons are not counted
47+
/\ appliedConfChange = [i \in Server |-> Len(BootstrapLog)]
4748

4849
\* This file controls the constants as seen below.
4950
\* In addition to basic settings of how many nodes are to be model checked,
5051
\* the model allows to place additional limitations on the state space of the program.
5152

53+
\* Check the # of reconfigurations and return true if it is lower than ReconfigurationLimit
54+
UnderReconfigLimit ==
55+
LET leaders == {i \in Server : state[i] = Leader}
56+
IN
57+
\E i \in leaders :
58+
/\ \A j \in leaders \ {i} : Len(log[i]) >= Len(log[j])
59+
/\ ReconfigurationLimit > FoldSeq(LAMBDA x, y: IF x.type = ConfigEntry THEN 1 ELSE 0, -Cardinality(InitServer), log[i])
5260
\* Limit the # of reconfigurations to ReconfigurationLimit
5361
MCAddNewServer(i, j) ==
54-
/\ reconfigCount < ReconfigurationLimit
62+
/\ UnderReconfigLimit
5563
/\ etcd!AddNewServer(i, j)
5664
MCDeleteServer(i, j) ==
57-
/\ reconfigCount < ReconfigurationLimit
65+
/\ UnderReconfigLimit
5866
/\ etcd!DeleteServer(i, j)
5967
MCAddLearner(i, j) ==
60-
/\ reconfigCount < ReconfigurationLimit
68+
/\ UnderReconfigLimit
6169
/\ etcd!AddLearner(i, j)
6270

6371
\* Limit the terms that can be reached. Needs to be set to at least 3 to
@@ -104,6 +112,15 @@ MCSend(msg) ==
104112
/\ msg.mtype = AppendEntriesRequest
105113
/\ etcd!Send(msg)
106114

115+
\* If there is no new message and no state change, we can skip Advance to reduce state spece
116+
\* to be explored
117+
MCReady(i) ==
118+
LET rd == ReadyData(args[1])
119+
IN
120+
/\ \/ rd.msgs /= EmptyBag
121+
\/ DurableStateFromReady(rd) /= durableState[i]
122+
/\ Ready(i)
123+
107124
mc_etcdSpec ==
108125
/\ Init
109126
/\ [][NextDynamic]_vars

tla/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ The TLA+ spec defines the desired behaviors of the model. To validate the correc
7272
./validate-model.sh -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
7373
```
7474

75+
You can also add `-m` option to run model checking in simulation mode, which will randomly walk in the state machine and is helpful for finding issues quickly.
76+
77+
```console
78+
./validate-model.sh -m -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
79+
```
7580

7681
## Validate Collected Traces
7782
With above example trace logger, validate.sh can be used to validate traces parallelly.

tla/Traceetcdraft.cfg

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,4 @@ INVARIANTS
7171
LogMatchingInv
7272
QuorumLogInv
7373
MoreUpToDateCorrectInv
74-
LeaderCompletenessInv
75-
CommittedIsDurableInv
74+
LeaderCompletenessInv

tla/Traceetcdraft.tla

Lines changed: 69 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -54,40 +54,35 @@ TraceServer == TLCEval(FoldSeq(
5454
THEN {x.event.nid, x.event.prop.cc.changes[1].nid}
5555
ELSE {x.event.nid},
5656
{}, TraceLog))
57-
58-
BootstrapLogIndicesForServer(i) ==
59-
LET
60-
FirstBootstrapLogIndex == SelectInSeq(TraceLog, LAMBDA x: x.event.nid = i /\ x.event.name \in {"InitState", "BecomeFollower", "ApplyConfChange"})
61-
FirstNonBootstrapLogIndex == SelectInSeq(TraceLog, LAMBDA x: x.event.nid = i /\ x.event.name \notin {"InitState", "BecomeFollower", "ApplyConfChange"})
62-
LastBootstrapLogIndexUpperBound == IF FirstNonBootstrapLogIndex = 0 THEN Len(TraceLog) ELSE FirstNonBootstrapLogIndex-1
63-
IN
64-
{ k \in FirstBootstrapLogIndex..LastBootstrapLogIndexUpperBound: TraceLog[k].event.nid = i }
65-
66-
BootstrapLogIndices == UNION { BootstrapLogIndicesForServer(i): i \in Server }
67-
68-
LastBootstrapLog == [ i \in Server |-> TraceLog[Max(BootstrapLogIndicesForServer(i))] ]
69-
70-
BootstrappedConfig(i) ==
71-
IF LastBootstrapLog[i].event.name = "ApplyConfChange" THEN
72-
ToSet(LastBootstrapLog[i].event.prop.cc.newconf)
73-
ELSE
74-
ToSet(LastBootstrapLog[i].event.conf[1])
75-
76-
TraceInitServer == BootstrappedConfig(TraceLog[1].event.nid)
57+
58+
BootstrapIndex == [ i \in Server |-> SelectInSeq(TraceLog, LAMBDA x: x.event.nid = i /\ x.event.name = "Bootstrap") ]
59+
60+
TraceInitServer == { i \in Server : BootstrapIndex[i] > 0 }
7761
ASSUME TraceInitServer \subseteq TraceServer
7862

79-
TraceInitServerVars == /\ currentTerm = [i \in Server |-> LastBootstrapLog[i].event.state.term]
80-
/\ state = [i \in Server |-> LastBootstrapLog[i].event.role]
81-
/\ votedFor = [i \in Server |-> LastBootstrapLog[i].event.state.vote]
82-
TraceInitLogVars == /\ log = [i \in Server |-> [j \in 1..LastBootstrapLog[i].event.log |-> [ term |-> 1, type |-> "ConfigEntry", value |-> [newconf |-> BootstrappedConfig(i), learners |-> {}]]]]
83-
/\ commitIndex = [i \in Server |-> LastBootstrapLog[i].event.state.commit]
63+
BootstrapEntries(i) == FoldSeq(
64+
LAMBDA x, y: Append(y, [ term |-> 1,
65+
type |-> "ConfigEntry",
66+
value |-> [ newconf |-> ToSet(TraceLog[x].event.prop.cc.newconf), learners |-> {}] ]),
67+
<<>>,
68+
SetToSortSeq({j \in DOMAIN TraceLog : /\ j < BootstrapIndex[i]
69+
/\ TraceLog[j].event.nid = i
70+
/\ TraceLog[j].event.name = "ApplyConfChange"}, <) )
71+
72+
TraceInitServerVars == /\ currentTerm = [i \in Server |-> IF i \in InitServer THEN 1 ELSE 0]
73+
/\ state = [i \in Server |-> Follower]
74+
/\ votedFor = [i \in Server |-> Nil]
75+
TraceInitLogVars == /\ log = [i \in Server |-> BootstrapEntries(i)]
76+
/\ commitIndex = [i \in Server |-> Len(log[i])]
77+
/\ applied = [i \in Server |-> 0]
8478
TraceInitConfigVars ==
85-
/\ config = [i \in Server |-> [ jointConfig |-> <<BootstrappedConfig(i), {}>>, learners |-> {}] ]
86-
/\ reconfigCount = 0
79+
/\ config = [i \in Server |-> [ jointConfig |-> <<IF i \in InitServer THEN ToSet(TraceLog[BootstrapIndex[i]].event.conf[1]) ELSE {}, {}>>, learners |-> {}] ]
80+
/\ appliedConfChange = [i \in Server |-> Len(log[i])]
8781

8882

8983
-------------------------------------------------------------------------------------
90-
ConfFromLog(l) == << ToSet(l.event.conf[1]), ToSet(l.event.conf[2]) >>
84+
ConfFromTrace(l) == << ToSet(l.event.conf[1]), ToSet(l.event.conf[2]) >>
85+
NewConfFromTrace(l) == << ToSet(l.event.prop.cc.newconf), {} >>
9186

9287
OneMoreMessage(msg) ==
9388
\/ msg \notin DOMAIN pendingMessages /\ msg \in DOMAIN pendingMessages' /\ pendingMessages'[msg] = 1
@@ -99,29 +94,44 @@ OneLessMessage(msg) ==
9994

10095
-------------------------------------------------------------------------------------
10196

97+
\* In some state, we will restrict the state exploration to only follow the actions in
98+
\* constrained behavior. This is to reduce state space to be explored.
99+
VARIABLE behaviorProgress
100+
101+
Ready_PersistState_SendMessages_Behavior(i) ==
102+
<< "Ready", "PersistState", "SendMessage" >>
103+
104+
StepToNextBehaviorAction(i) ==
105+
IF behaviorProgress[i] = Len(Ready_PersistState_SendMessages_Behavior) THEN
106+
behaviorProgress' = [behaviorProgress EXCEPT ![i] = 1]
107+
ELSE
108+
behaviorProgress' = [behaviorProgress EXCEPT ![i] = @ + 1]
109+
110+
IsAllowedBehaviorAction(i, name) ==
111+
Ready_PersistState_SendMessages_Behavior[behaviorProgress[i]] = name
112+
102113
VARIABLE l
103114
logline == TraceLog[l]
104-
VARIABLE pl
105-
106115

107116
TraceInit ==
108117
/\ l = 1
109-
/\ pl = 0
110-
/\ logline = TraceLog[l]
118+
/\ behaviorProgress = [i \in Server |-> 1]
111119
/\ Init
112120

113121
StepToNextTrace ==
114-
/\ l' = l+1
115-
/\ pl' = l
116-
/\ l % (Len(TraceLog) \div 100) = 0 => PrintT(<< "Progress %:", (l * 100) \div Len(TraceLog)>>)
117-
/\ l' > Len(TraceLog) => PrintT(<< "Progress %:", 100>>)
122+
IF behaviorProgress[logline.event.nid] > 1 THEN
123+
\* Trace shall stay in current position until all actions
124+
\* in the behavior are performed
125+
UNCHANGED l
126+
ELSE
127+
/\ l' = l+1
128+
/\ l % (Len(TraceLog) \div 100) = 0 => PrintT(<< "Progress %:", (l * 100) \div Len(TraceLog) >>)
129+
/\ l' > Len(TraceLog) => PrintT(<< "Progress %:", 100>>)
118130

119131
StepToNextTraceIfMessageIsProcessed(msg) ==
120132
IF OneLessMessage(msg)
121133
THEN StepToNextTrace
122-
ELSE
123-
/\ pl' = l
124-
/\ UNCHANGED <<l>>
134+
ELSE UNCHANGED <<l>>
125135

126136
-------------------------------------------------------------------------------------
127137

@@ -354,9 +364,19 @@ ApplySimpleConfChangeIfLogged(i) ==
354364
/\ LoglineIsNodeEvent("ApplyConfChange", i)
355365
/\ ApplySimpleConfChange(i)
356366

357-
ReadyIfLogged(i) ==
367+
ReadyBehaviorIfLogged(i) ==
358368
/\ LoglineIsNodeEvent("Ready", i)
359-
/\ Ready(i)
369+
/\ \/ /\ IsAllowedBehaviorAction(i, "Ready")
370+
/\ Ready(i)
371+
\/ /\ IsAllowedBehaviorAction(i, "PersistState")
372+
/\ PersistState(i)
373+
\/ /\ IsAllowedBehaviorAction(i, "SendMessages")
374+
/\ SendMessages(i)
375+
/\ StepToNextBehaviorAction
376+
377+
AdvanceIfLogged(i) ==
378+
/\ LoglineIsNodeEvent("Advance", i)
379+
/\ Advance(i)
360380

361381
RestartIfLogged(i) ==
362382
/\ LoglineIsNodeEvent("InitState", i)
@@ -385,7 +405,9 @@ StepDownToFollowerIfLogged(i) ==
385405

386406
\* skip unused logs
387407
SkipUnusedLogline ==
388-
/\ \/ /\ LoglineIsEvent("SendAppendEntriesResponse")
408+
/\ \/ /\ l < Len(TraceLog)
409+
/\ l <= BootstrapIndex[logline.event.nid]
410+
\/ /\ LoglineIsEvent("SendAppendEntriesResponse")
389411
/\ logline.event.msg.from # logline.event.msg.to
390412
\/ /\ LoglineIsEvent("SendRequestVoteResponse")
391413
/\ logline.event.msg.from # logline.event.msg.to
@@ -421,7 +443,9 @@ TraceNextNonReceiveActions ==
421443
\/ /\ LoglineIsEvent("ApplyConfChange")
422444
/\ \E i \in Server: ApplySimpleConfChangeIfLogged(i)
423445
\/ /\ LoglineIsEvent("Ready")
424-
/\ \E i \in Server: ReadyIfLogged(i)
446+
/\ \E i \in Server: ReadyBehaviorIfLogged(i)
447+
\/ /\ LoglineIsEvent("Advance")
448+
/\ \E i \in Server: AdvanceIfLogged(i)
425449
\/ /\ LoglineIsEvent("InitState")
426450
/\ \E i \in Server: RestartIfLogged(i)
427451
\/ /\ LoglineIsEvent("BecomeFollower")
@@ -444,7 +468,7 @@ TraceNext ==
444468
\/ TraceNextReceiveActions
445469

446470
TraceSpec ==
447-
TraceInit /\ [][TraceNext]_<<l, pl, vars>>
471+
TraceInit /\ [][TraceNext]_<<l, behaviorProgress, vars>>
448472

449473
-------------------------------------------------------------------------------------
450474

@@ -454,7 +478,7 @@ TraceView ==
454478
\* appears the second time in the trace. Put differently, TraceView causes TLC to
455479
\* consider s_i and s_j , where i and j are the positions of s in the trace,
456480
\* to be different states.
457-
<<vars, l>>
481+
<<vars, l, behaviorProgress>>
458482

459483
-------------------------------------------------------------------------------------
460484

0 commit comments

Comments
 (0)