Skip to content

Commit fa5845c

Browse files
authored
Merge pull request #5302 from stacks-network/fix/stackerdb-sync
Fix/stackerdb sync for 3.0-rc3
2 parents f427505 + f381d7f commit fa5845c

File tree

4 files changed

+75
-14
lines changed

4 files changed

+75
-14
lines changed

stackslib/src/net/p2p.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4998,7 +4998,7 @@ impl PeerNetwork {
49984998
/// Log our neighbors.
49994999
/// Used for testing and debuggin
50005000
fn log_neighbors(&mut self) {
5001-
if self.get_connection_opts().log_neighbors_freq == 0 {
5001+
if !cfg!(test) && self.get_connection_opts().log_neighbors_freq == 0 {
50025002
return;
50035003
}
50045004

stackslib/src/net/stackerdb/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,8 @@ pub struct StackerDBSync<NC: NeighborComms> {
443443
rounds: u128,
444444
/// Round when we last pushed
445445
push_round: u128,
446+
/// time we last deliberately evicted a peer
447+
last_eviction_time: u64,
446448
}
447449

448450
impl StackerDBSyncResult {

stackslib/src/net/stackerdb/sync.rs

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
7979
num_attempted_connections: 0,
8080
rounds: 0,
8181
push_round: 0,
82+
last_eviction_time: get_epoch_time_secs(),
8283
};
8384
dbsync.reset(None, config);
8485
dbsync
@@ -217,9 +218,36 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
217218
self.expected_versions.clear();
218219
self.downloaded_chunks.clear();
219220

220-
// reset comms, but keep all connected replicas pinned
221+
// reset comms, but keep all connected replicas pinned.
222+
// Randomly evict one every so often.
221223
self.comms.reset();
222224
if let Some(network) = network {
225+
let mut eviction_index = None;
226+
if self.last_eviction_time + 60 < get_epoch_time_secs() {
227+
self.last_eviction_time = get_epoch_time_secs();
228+
if self.replicas.len() > 0 {
229+
eviction_index = Some(thread_rng().gen_range(0..self.replicas.len()));
230+
}
231+
}
232+
233+
let remove_naddr = eviction_index.and_then(|idx| {
234+
let removed = self.replicas.iter().nth(idx).cloned();
235+
if let Some(naddr) = removed.as_ref() {
236+
debug!(
237+
"{:?}: {}: don't reuse connection for replica {:?}",
238+
network.get_local_peer(),
239+
&self.smart_contract_id,
240+
&naddr,
241+
);
242+
}
243+
removed
244+
});
245+
246+
if let Some(naddr) = remove_naddr {
247+
self.replicas.remove(&naddr);
248+
}
249+
250+
// retain the remaining replica connections
223251
for naddr in self.replicas.iter() {
224252
if let Some(event_id) = network.get_event_id(&naddr.to_neighbor_key(network)) {
225253
self.comms.pin_connection(event_id);
@@ -668,7 +696,8 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
668696
/// We might not be connected to any yet.
669697
/// Clears self.replicas, and fills in self.connected_replicas with already-connected neighbors
670698
/// Returns Ok(true) if we can proceed to sync
671-
/// Returns Ok(false) if we have no known peers
699+
/// Returns Ok(false) if we should try this again
700+
/// Returns Err(NoSuchNeighbor) if we don't have anyone to talk to
672701
/// Returns Err(..) on DB query error
673702
pub fn connect_begin(&mut self, network: &mut PeerNetwork) -> Result<bool, net_error> {
674703
if self.replicas.len() == 0 {
@@ -686,7 +715,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
686715
);
687716
if self.replicas.len() == 0 {
688717
// nothing to do
689-
return Ok(false);
718+
return Err(net_error::NoSuchNeighbor);
690719
}
691720

692721
let naddrs = mem::replace(&mut self.replicas, HashSet::new());
@@ -729,11 +758,12 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
729758
);
730759
self.num_attempted_connections += 1;
731760
self.num_connections += 1;
761+
self.connected_replicas.insert(naddr);
732762
}
733763
Ok(false) => {
734764
// need to retry
735-
self.replicas.insert(naddr);
736765
self.num_attempted_connections += 1;
766+
self.replicas.insert(naddr);
737767
}
738768
Err(_e) => {
739769
debug!(
@@ -746,7 +776,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
746776
}
747777
}
748778
}
749-
Ok(self.replicas.len() == 0)
779+
Ok(self.connected_replicas.len() > 0)
750780
}
751781

752782
/// Finish up connecting to our replicas.
@@ -1154,7 +1184,8 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
11541184
);
11551185

11561186
// fill up our comms with $capacity requests
1157-
for _i in 0..self.request_capacity {
1187+
let mut num_sent = 0;
1188+
for _i in 0..self.chunk_push_priorities.len() {
11581189
if self.comms.count_inflight() >= self.request_capacity {
11591190
break;
11601191
}
@@ -1173,6 +1204,9 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
11731204
chunk_push.chunk_data.slot_id,
11741205
chunk_push.chunk_data.slot_version,
11751206
);
1207+
1208+
// next-prioritized chunk
1209+
cur_priority = (cur_priority + 1) % self.chunk_push_priorities.len();
11761210
continue;
11771211
};
11781212

@@ -1213,6 +1247,11 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
12131247

12141248
// next-prioritized chunk
12151249
cur_priority = (cur_priority + 1) % self.chunk_push_priorities.len();
1250+
1251+
num_sent += 1;
1252+
if num_sent > self.request_capacity {
1253+
break;
1254+
}
12161255
}
12171256
self.next_chunk_push_priority = cur_priority;
12181257
Ok(self
@@ -1370,14 +1409,22 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
13701409
let mut blocked = true;
13711410
match self.state {
13721411
StackerDBSyncState::ConnectBegin => {
1373-
let done = self.connect_begin(network)?;
1412+
let done = match self.connect_begin(network) {
1413+
Ok(done) => done,
1414+
Err(net_error::NoSuchNeighbor) => {
1415+
// nothing to do
1416+
self.state = StackerDBSyncState::Finished;
1417+
blocked = false;
1418+
false
1419+
}
1420+
Err(e) => {
1421+
return Err(e);
1422+
}
1423+
};
13741424
if done {
13751425
self.state = StackerDBSyncState::ConnectFinish;
1376-
} else {
1377-
// no replicas; try again
1378-
self.state = StackerDBSyncState::Finished;
1426+
blocked = false;
13791427
}
1380-
blocked = false;
13811428
}
13821429
StackerDBSyncState::ConnectFinish => {
13831430
let done = self.connect_try_finish(network)?;

stackslib/src/net/stackerdb/tests/sync.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,12 @@ fn load_stackerdb(peer: &TestPeer, idx: usize) -> Vec<(SlotMetadata, Vec<u8>)> {
183183

184184
fn check_sync_results(network_sync: &NetworkResult) {
185185
for res in network_sync.stacker_db_sync_results.iter() {
186-
assert!(res.num_connections >= res.num_attempted_connections);
186+
assert!(
187+
res.num_connections <= res.num_attempted_connections,
188+
"{} < {}",
189+
res.num_connections,
190+
res.num_attempted_connections
191+
);
187192
}
188193
}
189194

@@ -194,7 +199,14 @@ fn test_reconnect(network: &mut PeerNetwork) {
194199
.expect("FATAL: did not replace stacker dbs");
195200

196201
for (_sc, stacker_db_sync) in stacker_db_syncs.iter_mut() {
197-
stacker_db_sync.connect_begin(network).unwrap();
202+
match stacker_db_sync.connect_begin(network) {
203+
Ok(x) => {}
204+
Err(net_error::PeerNotConnected) => {}
205+
Err(net_error::NoSuchNeighbor) => {}
206+
Err(e) => {
207+
panic!("Failed to connect_begin: {:?}", &e);
208+
}
209+
}
198210
}
199211

200212
network.stacker_db_syncs = Some(stacker_db_syncs);

0 commit comments

Comments
 (0)