@@ -153,7 +153,10 @@ impl Decrypter {
153
153
}
154
154
None => {
155
155
let kv = KeyStoreVec :: new ( key_store) ;
156
- ( Arc :: new ( RwLock :: new ( kv) ) , WorkerState :: default ( ) )
156
+ (
157
+ Arc :: new ( RwLock :: new ( kv) ) ,
158
+ WorkerState :: DkgPending ( HashMap :: new ( ) ) ,
159
+ )
157
160
}
158
161
} ;
159
162
@@ -366,33 +369,40 @@ impl Drop for Decrypter {
366
369
367
370
/// The operational state of the Worker.
368
371
///
369
- /// State Machine Flow:
370
- /// - DkgPending -> Running <-> ResharingComplete -> ShuttingDown
371
- /// - HandoverPending -> HandoverComplete -> Running <-> ResharingComplete -> ShuttingDown
372
+ /// # State Machine Flow: (epoch e1 is special, e2 onwards are the same)
373
+ ///
374
+ /// note: ShuttingDown and ResharingComplete will trigger send_handover_msg,
375
+ /// Running is triggered by maybe_switch_committee except in epoch 1
376
+ ///
377
+ /// #1: "e1 -> e2, in C1, but not C2"
378
+ /// DkgPending -> Running -> ShuttingDown
379
+ ///
380
+ /// #2: "e1 -> e2, in C1 and C2"
381
+ /// DkgPending -> Running -> ResharingComplete -> Running (in e2)
382
+ ///
383
+ /// #3: "ex -> ex+1, in Cx, but not Cx+1"
384
+ /// HandoverPending -> HandoverComplete -> Running (in ex) -> ShuttingDown
385
+ ///
386
+ /// #4: "ex -> ex+1, in Cx and Cx+1"
387
+ /// HandoverPending -> HandoverComplete -> Running (in ex) -> ResharingComplete -> Running (in ex+1)
372
388
#[ derive( Debug , Clone ) ]
373
389
#[ allow( clippy:: large_enum_variant) ]
374
390
enum WorkerState {
375
391
/// Awaiting resharing messages from the previous committee.
376
392
HandoverPending ( HashMap < PublicKey , ResharingSubset > ) ,
377
- /// Received enough resharing messages to complete the handover.
378
- HandoverComplete ( DecryptionKey ) ,
393
+ /// Received enough resharing messages to complete the handover, but yet actively running .
394
+ HandoverComplete ,
379
395
/// Expects to obtain the initial DKG key through DKG bundles.
380
396
///
381
397
/// Upon startup the Worker requests DKG messages from remote nodes
382
398
/// such that, if the local node is behind, it will catchup immediately.
383
399
DkgPending ( HashMap < PublicKey , DkgSubset > ) ,
384
- /// Already completed at least one instance of DKG. Ready for resharing .
385
- Running ( DecryptionKey ) ,
386
- /// Obtained keys for both the current and next committee.
387
- ResharingComplete ( DecryptionKey , DecryptionKey ) ,
400
+ /// Active mode with decryption key ready .
401
+ Running ,
402
+ /// Obtained decryption key for the next committee also as a member (see case #2 and #4)
403
+ ResharingComplete ( DecryptionKey ) ,
388
404
/// Completed resharing and handover but is not a member of next committee.
389
- ShuttingDown ( DecryptionKey ) ,
390
- }
391
-
392
- impl Default for WorkerState {
393
- fn default ( ) -> Self {
394
- Self :: DkgPending ( HashMap :: new ( ) )
395
- }
405
+ ShuttingDown ,
396
406
}
397
407
398
408
/// Worker is responsible for "hatching" ciphertexts.
@@ -493,7 +503,7 @@ impl Worker {
493
503
loop {
494
504
let mut cache_modified = false ;
495
505
// process pending inclusion lists received during catchup
496
- if !self . pending . is_empty ( ) && matches ! ( self . state, WorkerState :: Running ( _ ) ) {
506
+ if !self . pending . is_empty ( ) && matches ! ( self . state, WorkerState :: Running ) {
497
507
for incl in std:: mem:: take ( & mut self . pending ) . into_values ( ) {
498
508
match self . on_decrypt_request ( incl, true ) . await {
499
509
Ok ( ( ) ) => { }
@@ -586,7 +596,7 @@ impl Worker {
586
596
Err ( err) => warn ! ( node = %self . label, %round, %err, "error on hatch" ) ,
587
597
}
588
598
589
- if matches ! ( self . state, WorkerState :: ShuttingDown ( _ ) ) {
599
+ if matches ! ( self . state, WorkerState :: ShuttingDown ) {
590
600
// graceful shut down
591
601
if let Some ( next_committee) = self . next_committee {
592
602
if next_committee. num ( ) - 1 == self . last_hatched_round {
@@ -726,7 +736,7 @@ impl Worker {
726
736
. map_err ( |e| DecrypterError :: Dkg ( e. to_string ( ) ) ) ?;
727
737
728
738
self . enc_key . set ( dec_sk. clone ( ) ) ;
729
- self . state = WorkerState :: Running ( dec_sk ) ;
739
+ self . state = WorkerState :: Running ;
730
740
info ! ( node = %self . label, committee_id = %committee. id( ) , "dkg finished (catchup successful)" ) ;
731
741
}
732
742
@@ -794,7 +804,7 @@ impl Worker {
794
804
. map_err ( |e| DecrypterError :: Dkg ( e. to_string ( ) ) ) ?;
795
805
796
806
info ! ( committee_id = %committee. id( ) , node = %self . label, "handover finished" ) ;
797
- self . state = WorkerState :: HandoverComplete ( next_dec_key . clone ( ) ) ;
807
+ self . state = WorkerState :: HandoverComplete ;
798
808
self . enc_key . set ( next_dec_key) ;
799
809
self . dkg_completed . insert ( committee. id ( ) ) ;
800
810
}
@@ -875,8 +885,8 @@ impl Worker {
875
885
. result ( )
876
886
. map_err ( |e| DecrypterError :: Dkg ( e. to_string ( ) ) ) ?;
877
887
878
- self . enc_key . set ( dec_sk. clone ( ) ) ;
879
- self . state = WorkerState :: Running ( dec_sk ) ;
888
+ self . enc_key . set ( dec_sk) ;
889
+ self . state = WorkerState :: Running ;
880
890
self . dkg_completed . insert ( committee. id ( ) ) ;
881
891
info ! ( committee_id = %committee. id( ) , node = %self . label, "dkg finished" ) ;
882
892
}
@@ -961,10 +971,10 @@ impl Worker {
961
971
)
962
972
. map_err ( |e| DecrypterError :: Dkg ( e. to_string ( ) ) ) ?;
963
973
964
- self . state = WorkerState :: ResharingComplete ( dec_key . clone ( ) , next_dec_key. clone ( ) ) ;
974
+ self . state = WorkerState :: ResharingComplete ( next_dec_key) ;
965
975
} else {
966
976
// resharing complete; node will shut down at next committee switch
967
- self . state = WorkerState :: ShuttingDown ( dec_key . clone ( ) ) ;
977
+ self . state = WorkerState :: ShuttingDown ;
968
978
}
969
979
970
980
trace ! ( committee_id = %committee. id( ) , node = %self . label, "resharing complete; handing over" ) ;
@@ -1100,7 +1110,7 @@ impl Worker {
1100
1110
/// NOTE: when a ciphertext is malformed, we will skip decrypting it (treat as garbage) here.
1101
1111
/// but will later be marked as decrypted during `hatch()`
1102
1112
async fn decrypt ( & mut self , incl : & InclusionList ) -> Result < DecShareBatch > {
1103
- let dec_sk = match & self . state {
1113
+ let dec_sk: DecryptionKey = match & self . state {
1104
1114
WorkerState :: DkgPending ( _) => {
1105
1115
self . pending . insert ( incl. round ( ) , incl. clone ( ) ) ;
1106
1116
return Err ( DecrypterError :: DkgPending ) ;
@@ -1110,10 +1120,16 @@ impl Worker {
1110
1120
"Worker state does not hold decryption key" . to_string ( ) ,
1111
1121
) ) ;
1112
1122
}
1113
- WorkerState :: Running ( dec_key)
1114
- | WorkerState :: ResharingComplete ( dec_key, _)
1115
- | WorkerState :: HandoverComplete ( dec_key)
1116
- | WorkerState :: ShuttingDown ( dec_key) => dec_key,
1123
+ WorkerState :: ResharingComplete ( _) => {
1124
+ return Err ( DecrypterError :: Dkg ( format ! (
1125
+ "resharing completed, but Worker not active: label={}, round={}" ,
1126
+ self . label,
1127
+ incl. round( )
1128
+ ) ) ) ;
1129
+ }
1130
+ _ => self . enc_key . get ( ) . ok_or_else ( || {
1131
+ DecrypterError :: Internal ( "Worker running without dec key" . to_string ( ) )
1132
+ } ) ?,
1117
1133
} ;
1118
1134
1119
1135
let round = Round :: new ( incl. round ( ) , self . current ) ;
@@ -1201,9 +1217,11 @@ impl Worker {
1201
1217
}
1202
1218
1203
1219
let dec_sk = match & self . state {
1204
- WorkerState :: Running ( dec_key)
1205
- | WorkerState :: ResharingComplete ( dec_key, _)
1206
- | WorkerState :: ShuttingDown ( dec_key) => dec_key,
1220
+ WorkerState :: Running
1221
+ | WorkerState :: ResharingComplete ( _)
1222
+ | WorkerState :: ShuttingDown => self . enc_key . get ( ) . ok_or_else ( || {
1223
+ DecrypterError :: Internal ( "Worker running without dec key" . to_string ( ) )
1224
+ } ) ?,
1207
1225
_ => {
1208
1226
return Err ( DecrypterError :: Dkg (
1209
1227
"(hatching) worker state does not hold decryption key" . to_string ( ) ,
@@ -1412,17 +1430,18 @@ impl Worker {
1412
1430
1413
1431
// update state machine
1414
1432
self . state = match & self . state {
1415
- WorkerState :: HandoverComplete ( decryption_key ) => {
1433
+ WorkerState :: HandoverComplete => {
1416
1434
info ! ( node = %self . label, committee = %self . current, "(new node) successful committee switch" ) ;
1417
- WorkerState :: Running ( decryption_key . clone ( ) )
1435
+ WorkerState :: Running
1418
1436
}
1419
- WorkerState :: ResharingComplete ( _ , next_key) => {
1437
+ WorkerState :: ResharingComplete ( next_key) => {
1420
1438
info ! ( node = %self . label, committee = %self . current, "(old node) successful committee switch" ) ;
1421
- WorkerState :: Running ( next_key. clone ( ) )
1439
+ self . enc_key . set ( next_key. clone ( ) ) ;
1440
+ WorkerState :: Running
1422
1441
}
1423
- WorkerState :: ShuttingDown ( dec_key ) => {
1442
+ WorkerState :: ShuttingDown => {
1424
1443
info ! ( "(old node) not a member of new committee. ready for shut down" ) ;
1425
- WorkerState :: ShuttingDown ( dec_key . clone ( ) )
1444
+ WorkerState :: ShuttingDown
1426
1445
}
1427
1446
_ => {
1428
1447
return Err ( DecrypterError :: Internal (
0 commit comments