-
Notifications
You must be signed in to change notification settings - Fork 1k
Improve statement-store gossiping performance #9912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
f06dab8
eca57b4
7b8aeb1
3e69999
9a5decc
baeba9f
b0607bc
52fb0aa
f08c2a8
1e3e83b
8cdb31a
a317299
50ecef4
6b89e1c
a705c00
8703601
90c0799
bd3645e
310e05b
ed55be3
514ad77
0adf169
6daaee0
0012b9e
6cf91c4
6891b17
f52598f
5945066
b5b96eb
971fe1e
f47af22
dec3b53
894d00e
10a1fff
b8a1c2f
a3d15d2
3e17ffb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
title: '[WIP] Fix statement-store gossiping' | ||
doc: | ||
- audience: Node Dev | ||
description: |- | ||
Fixes gossiping and scalability issues in the statement-store networking: | ||
reduces traffic by propagating only recent statements, skips duplicate processing, | ||
and splits large batches to stay under MAX_STATEMENT_NOTIFICATION_SIZE. | ||
crates: | ||
- name: sc-network-statement | ||
bump: minor | ||
- name: sc-statement-store | ||
bump: minor | ||
- name: sp-statement-store | ||
bump: minor |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,7 +30,9 @@ use crate::config::*; | |
|
||
use codec::{Decode, Encode}; | ||
use futures::{channel::oneshot, prelude::*, stream::FuturesUnordered, FutureExt}; | ||
use prometheus_endpoint::{register, Counter, PrometheusError, Registry, U64}; | ||
use prometheus_endpoint::{ | ||
prometheus, register, Counter, Gauge, Histogram, HistogramOpts, PrometheusError, Registry, U64, | ||
}; | ||
use sc_network::{ | ||
config::{NonReservedPeerMode, SetConfig}, | ||
error, multiaddr, | ||
|
@@ -88,6 +90,10 @@ const LOG_TARGET: &str = "statement-gossip"; | |
|
||
struct Metrics { | ||
propagated_statements: Counter<U64>, | ||
known_statements_received: Counter<U64>, | ||
skipped_oversized_statements: Counter<U64>, | ||
propagated_statements_chunks: Histogram, | ||
pending_statements: Gauge<U64>, | ||
} | ||
|
||
impl Metrics { | ||
|
@@ -100,6 +106,36 @@ impl Metrics { | |
)?, | ||
r, | ||
)?, | ||
known_statements_received: register( | ||
Counter::new( | ||
"substrate_sync_known_statement_received", | ||
"Number of statements received via gossiping that were already in the statement store", | ||
)?, | ||
r, | ||
)?, | ||
skipped_oversized_statements: register( | ||
Counter::new( | ||
"substrate_sync_skipped_oversized_statements", | ||
"Number of oversized statements that were skipped to be gossiped", | ||
)?, | ||
r, | ||
)?, | ||
propagated_statements_chunks: register( | ||
Histogram::with_opts( | ||
HistogramOpts::new( | ||
"substrate_sync_propagated_statements_chunks", | ||
"Distribution of chunk sizes when propagating statements", | ||
).buckets(prometheus::exponential_buckets(1.0, 2.0, 14)?), | ||
)?, | ||
r, | ||
)?, | ||
pending_statements: register( | ||
Gauge::new( | ||
"substrate_sync_pending_statement_validations", | ||
"Number of pending statement validations", | ||
)?, | ||
r, | ||
)?, | ||
}) | ||
} | ||
} | ||
|
@@ -131,7 +167,7 @@ impl StatementHandlerPrototype { | |
let (config, notification_service) = Net::notification_config( | ||
protocol_name.clone().into(), | ||
Vec::new(), | ||
MAX_STATEMENT_SIZE, | ||
MAX_STATEMENT_NOTIFICATION_SIZE, | ||
None, | ||
SetConfig { | ||
in_peers: 0, | ||
|
@@ -264,6 +300,9 @@ where | |
futures::select! { | ||
_ = self.propagate_timeout.next() => { | ||
self.propagate_statements(); | ||
if let Some(ref metrics) = self.metrics { | ||
metrics.pending_statements.set(self.pending_statements.len() as u64); | ||
} | ||
}, | ||
(hash, result) = self.pending_statements.select_next_some() => { | ||
if let Some(peers) = self.pending_statements_peers.remove(&hash) { | ||
|
@@ -388,6 +427,13 @@ where | |
|
||
self.network.report_peer(who, rep::ANY_STATEMENT); | ||
|
||
if self.statement_store.has_statement(&hash) { | ||
if let Some(ref metrics) = self.metrics { | ||
metrics.known_statements_received.inc(); | ||
} | ||
continue; | ||
} | ||
|
||
match self.pending_statements_peers.entry(hash) { | ||
Entry::Vacant(entry) => { | ||
let (completion_sender, completion_receiver) = oneshot::channel(); | ||
|
@@ -470,9 +516,55 @@ where | |
|
||
propagated_statements += to_send.len(); | ||
|
||
if !to_send.is_empty() { | ||
log::trace!(target: LOG_TARGET, "Sending {} statements to {}", to_send.len(), who); | ||
self.notification_service.send_sync_notification(who, to_send.encode()); | ||
let mut offset = 0; | ||
while offset < to_send.len() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we still need to tackle this with a FuturesUnordered here. If we have multiple statements, we'll fill the 1 MiB ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can put in FuturesUnordered only cycles to one peer, not every batch, because if the peer disconnects, we don't go further. For current use we have about three peers, so we can postpone this improvement. |
||
// Try to send as many statements as possible in one notification | ||
let chunk_size = to_send.len() - offset; | ||
let mut current_end = to_send.len(); | ||
|
||
loop { | ||
let chunk = &to_send[offset..current_end]; | ||
let encoded_size = chunk.encoded_size(); | ||
|
||
// If chunk fits, send it | ||
if encoded_size <= MAX_STATEMENT_NOTIFICATION_SIZE as usize { | ||
log::trace!( | ||
target: LOG_TARGET, | ||
"Sending {} statements ({} KB) to {}", | ||
chunk.len(), | ||
encoded_size / 1024, | ||
who | ||
); | ||
self.notification_service.send_sync_notification(who, chunk.encode()); | ||
offset = current_end; | ||
if let Some(ref metrics) = self.metrics { | ||
metrics.propagated_statements_chunks.observe(chunk.len() as f64); | ||
} | ||
break; | ||
} | ||
|
||
// Size exceeded - split the chunk | ||
let split_factor = | ||
(encoded.len() / MAX_STATEMENT_NOTIFICATION_SIZE as usize) + 1; | ||
AndreiEres marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
let new_chunk_size = (current_end - offset) / split_factor; | ||
|
||
// Single statement is too large | ||
if new_chunk_size == 0 { | ||
log::warn!( | ||
target: LOG_TARGET, | ||
"Statement too large ({} KB), skipping", | ||
AndreiEres marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
encoded.len() / 1024 | ||
); | ||
if let Some(ref metrics) = self.metrics { | ||
metrics.skipped_oversized_statements.inc(); | ||
} | ||
offset = current_end; | ||
break; | ||
} | ||
|
||
// Reduce chunk size and try again | ||
current_end = offset + new_chunk_size; | ||
} | ||
} | ||
} | ||
|
||
|
@@ -489,7 +581,7 @@ where | |
} | ||
|
||
log::debug!(target: LOG_TARGET, "Propagating statements"); | ||
if let Ok(statements) = self.statement_store.statements() { | ||
if let Ok(statements) = self.statement_store.take_recent_statements() { | ||
self.do_propagate_statements(&statements); | ||
} | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.