Skip to content

Commit 9465f5a

Browse files
committed
Fix CommitAdvance behavior for expunged nodes
1 parent ad388eb commit 9465f5a

File tree

4 files changed

+56
-7
lines changed

4 files changed

+56
-7
lines changed

trust-quorum/src/alarm.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
88

99
use crate::{Configuration, Epoch, PlatformId};
1010

11+
#[allow(clippy::large_enum_variant)]
1112
#[derive(
1213
Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize,
1314
)]

trust-quorum/src/compute_key_share.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ impl KeyShareComputer {
103103
"epoch" => %epoch,
104104
"from" => %from
105105
);
106+
return false;
106107
}
107108

108109
// A valid share was received. Is it new?
@@ -118,12 +119,23 @@ impl KeyShareComputer {
118119
// What index are we in the configuration? This is our "x-coordinate"
119120
// for our key share calculation. We always start indexing from 1, since
120121
// 0 is the rack secret.
121-
let index = self
122-
.config
123-
.members
124-
.keys()
125-
.position(|id| id == ctx.platform_id())
126-
.expect("node exists");
122+
let index =
123+
self.config.members.keys().position(|id| id == ctx.platform_id());
124+
125+
let Some(index) = index else {
126+
let msg = concat!(
127+
"Failed to get index for ourselves in current configuration. ",
128+
"We are not a member, and must have been expunged."
129+
);
130+
error!(
131+
self.log,
132+
"{msg}";
133+
"platform_id" => %ctx.platform_id(),
134+
"config" => ?self.config
135+
);
136+
return false;
137+
};
138+
127139
let x_coordinate =
128140
Gf256::new(u8::try_from(index + 1).expect("index fits in u8"));
129141

trust-quorum/src/node.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ impl Node {
308308
"epoch" => %config.epoch
309309
);
310310
ctx.update_persistent_state(|ps| ps.commits.insert(config.epoch));
311+
return;
311312
}
312313

313314
// Do we have the configuration in our persistent state? If not save it.
@@ -347,6 +348,7 @@ impl Node {
347348
"received_epoch" => %config.epoch
348349
);
349350
self.coordinator_state = None;
351+
// Intentionally fall through
350352
} else if coordinating_epoch == config.epoch {
351353
info!(
352354
self.log,
@@ -402,7 +404,8 @@ impl Node {
402404
}
403405
}
404406

405-
// We either were collectiong shares for an old epoch or haven't started yet.
407+
// We either were collectiong shares for an old epoch or haven't started
408+
// yet.
406409
self.key_share_computer =
407410
Some(KeyShareComputer::new(&self.log, ctx, config));
408411
}
@@ -417,6 +420,18 @@ impl Node {
417420
ctx.persistent_state().latest_committed_configuration()
418421
{
419422
if latest_committed_config.epoch > epoch {
423+
if !latest_committed_config.members.contains_key(&from) {
424+
info!(
425+
self.log,
426+
"Received a GetShare message from expunged node";
427+
"from" => %from,
428+
"latest_committed_epoch" =>
429+
%latest_committed_config.epoch,
430+
"requested_epoch" => %epoch
431+
);
432+
// TODO: Send an expunged message
433+
return;
434+
}
420435
info!(
421436
self.log,
422437
concat!(
@@ -435,6 +450,20 @@ impl Node {
435450
}
436451
}
437452

453+
// Do we have the configuration? Is the requesting peer a member?
454+
if let Some(config) = ctx.persistent_state().configuration(epoch) {
455+
if !config.members.contains_key(&from) {
456+
info!(
457+
self.log,
458+
"Received a GetShare message from expunged node";
459+
"from" => %from,
460+
"epoch" => %epoch
461+
);
462+
// TODO: Send an expunged message
463+
return;
464+
}
465+
}
466+
438467
// If we have the share for the requested epoch, we always return it. We
439468
// know that it is at least as new as the last committed epoch. We might
440469
// not have learned about the configuration being committed yet, but

0 commit comments

Comments
 (0)