Skip to content

Commit c9d0aa2

Browse files
authored
Add Summary stats per epoch (#3612)
* add summary stats per epoch * add timeout count
1 parent a02020e commit c9d0aa2

File tree

2 files changed

+76
-8
lines changed

2 files changed

+76
-8
lines changed

crates/hotshot/task-impls/src/stats.rs

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use std::{collections::BTreeMap, sync::Arc};
1+
use std::{
2+
collections::{BTreeMap, BTreeSet},
3+
sync::Arc,
4+
};
25

36
use async_broadcast::{Receiver, Sender};
47
use async_trait::async_trait;
@@ -7,7 +10,11 @@ use hotshot_task::task::TaskState;
710
use hotshot_types::{
811
consensus::OuterConsensus,
912
epoch_membership::EpochMembershipCoordinator,
10-
traits::node_implementation::{ConsensusTime, NodeType},
13+
traits::{
14+
block_contents::BlockHeader,
15+
node_implementation::{ConsensusTime, NodeType},
16+
BlockPayload,
17+
},
1118
vote::HasViewNumber,
1219
};
1320
use hotshot_utils::{
@@ -38,6 +45,7 @@ pub struct LeaderViewStats<TYPES: NodeType> {
3845
pub struct ReplicaViewStats<TYPES: NodeType> {
3946
pub view: TYPES::View,
4047
pub view_change: Option<i128>,
48+
pub proposal_timestamp: Option<i128>,
4149
pub proposal_recv: Option<i128>,
4250
pub vote_send: Option<i128>,
4351
pub timeout_vote_send: Option<i128>,
@@ -74,6 +82,7 @@ impl<TYPES: NodeType> ReplicaViewStats<TYPES> {
7482
Self {
7583
view,
7684
view_change: None,
85+
proposal_timestamp: None,
7786
proposal_recv: None,
7887
vote_send: None,
7988
timeout_vote_send: None,
@@ -97,6 +106,10 @@ pub struct StatsTaskState<TYPES: NodeType> {
97106
membership_coordinator: EpochMembershipCoordinator<TYPES>,
98107
leader_stats: BTreeMap<TYPES::View, LeaderViewStats<TYPES>>,
99108
replica_stats: BTreeMap<TYPES::View, ReplicaViewStats<TYPES>>,
109+
latencies_by_view: BTreeMap<TYPES::View, i128>,
110+
sizes_by_view: BTreeMap<TYPES::View, i128>,
111+
epoch_start_times: BTreeMap<TYPES::Epoch, i128>,
112+
timeouts: BTreeSet<TYPES::View>,
100113
}
101114

102115
impl<TYPES: NodeType> StatsTaskState<TYPES> {
@@ -115,6 +128,10 @@ impl<TYPES: NodeType> StatsTaskState<TYPES> {
115128
membership_coordinator,
116129
leader_stats: BTreeMap::new(),
117130
replica_stats: BTreeMap::new(),
131+
latencies_by_view: BTreeMap::new(),
132+
sizes_by_view: BTreeMap::new(),
133+
epoch_start_times: BTreeMap::new(),
134+
timeouts: BTreeSet::new(),
118135
}
119136
}
120137
fn leader_entry(&mut self, view: TYPES::View) -> &mut LeaderViewStats<TYPES> {
@@ -130,6 +147,9 @@ impl<TYPES: NodeType> StatsTaskState<TYPES> {
130147
fn garbage_collect(&mut self, view: TYPES::View) {
131148
self.leader_stats = self.leader_stats.split_off(&view);
132149
self.replica_stats = self.replica_stats.split_off(&view);
150+
self.latencies_by_view = self.latencies_by_view.split_off(&view);
151+
self.sizes_by_view = self.sizes_by_view.split_off(&view);
152+
self.timeouts = BTreeSet::new();
133153
}
134154

135155
fn dump_stats(&self) -> Result<()> {
@@ -163,6 +183,29 @@ impl<TYPES: NodeType> StatsTaskState<TYPES> {
163183
);
164184
Ok(())
165185
}
186+
187+
fn log_basic_stats(&self, now: i128, epoch: &TYPES::Epoch) {
188+
let num_views = self.latencies_by_view.len();
189+
let total_latency = self.latencies_by_view.values().sum::<i128>();
190+
let average_latency = total_latency / num_views as i128;
191+
tracing::warn!("Average latency: {}ms", average_latency);
192+
tracing::warn!(
193+
"Number of timeouts in epoch: {}, is {}",
194+
epoch,
195+
self.timeouts.len()
196+
);
197+
let total_size = self.sizes_by_view.values().sum::<i128>();
198+
if total_size == 0 {
199+
// Either no TXNs or we are not in the DA committee and don't know block sizes
200+
return;
201+
}
202+
if let Some(epoch_start_time) = self.epoch_start_times.get(epoch) {
203+
let elapsed_time = now - epoch_start_time;
204+
// multiply by 1000 to convert to seconds
205+
let throughput = (total_size / elapsed_time) * 1000;
206+
tracing::warn!("Throughput: {} bytes/s", throughput);
207+
}
208+
}
166209
}
167210

168211
#[async_trait]
@@ -240,6 +283,9 @@ impl<TYPES: NodeType> TaskState for StatsTaskState<TYPES> {
240283
HotShotEvent::QuorumProposalValidated(proposal, _) => {
241284
self.replica_entry(proposal.data.view_number())
242285
.proposal_validated = Some(now);
286+
self.replica_entry(proposal.data.view_number())
287+
.proposal_timestamp =
288+
Some(proposal.data.block_header().timestamp_millis() as i128);
243289
},
244290
HotShotEvent::DaProposalSend(proposal, _) => {
245291
self.leader_entry(proposal.data.view_number())
@@ -288,6 +334,7 @@ impl<TYPES: NodeType> TaskState for StatsTaskState<TYPES> {
288334
if self.view < *view {
289335
self.view = *view;
290336
}
337+
let prev_epoch = self.epoch;
291338
let mut new_epoch = false;
292339
if self.epoch < *epoch {
293340
self.epoch = *epoch;
@@ -298,6 +345,9 @@ impl<TYPES: NodeType> TaskState for StatsTaskState<TYPES> {
298345
}
299346

300347
if new_epoch {
348+
if let Some(prev_epoch) = prev_epoch {
349+
self.log_basic_stats(now, &prev_epoch);
350+
}
301351
let _ = self.dump_stats();
302352
self.garbage_collect(*view - 1);
303353
}
@@ -312,8 +362,9 @@ impl<TYPES: NodeType> TaskState for StatsTaskState<TYPES> {
312362
self.leader_entry(*view).builder_start = Some(now);
313363
}
314364
},
315-
HotShotEvent::Timeout(..) => {
316-
self.replica_entry(self.view).timeout_triggered = Some(now);
365+
HotShotEvent::Timeout(view, _) => {
366+
self.replica_entry(*view).timeout_triggered = Some(now);
367+
self.timeouts.insert(*view);
317368
},
318369
HotShotEvent::TransactionsRecv(_txns) => {
319370
// TODO: Track transactions by time
@@ -334,6 +385,23 @@ impl<TYPES: NodeType> TaskState for StatsTaskState<TYPES> {
334385
self.replica_entry(proposal.data.view_number())
335386
.proposal_prelim_validated = Some(now);
336387
},
388+
HotShotEvent::LeavesDecided(leaves) => {
389+
for leaf in leaves {
390+
if leaf.view_number() == TYPES::View::genesis() {
391+
continue;
392+
}
393+
let view = leaf.view_number();
394+
let timestamp = leaf.block_header().timestamp_millis() as i128;
395+
let now_millis = now / 1_000_000;
396+
let latency = now_millis - timestamp;
397+
tracing::debug!("View {} Latency: {}ms", view, latency);
398+
self.latencies_by_view.insert(view, latency);
399+
self.sizes_by_view.insert(
400+
view,
401+
leaf.block_payload().map(|p| p.txn_bytes()).unwrap_or(0) as i128,
402+
);
403+
}
404+
},
337405
_ => {},
338406
}
339407
Ok(())

crates/hotshot/types/src/data/ns_table.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pub fn parse_ns_table(payload_byte_len: usize, bytes: &[u8]) -> Vec<Range<usize>
2222
if bytes.len() < NUM_NSS_BYTE_LEN
2323
|| !(bytes.len() - NUM_NSS_BYTE_LEN).is_multiple_of(NS_OFFSET_BYTE_LEN + NS_ID_BYTE_LEN)
2424
{
25-
tracing::warn!(
25+
tracing::info!(
2626
"Failed to parse the metadata as namespace table. Use a single namespace table \
2727
instead."
2828
);
@@ -34,7 +34,7 @@ pub fn parse_ns_table(payload_byte_len: usize, bytes: &[u8]) -> Vec<Range<usize>
3434
/ NS_ID_BYTE_LEN.saturating_add(NS_OFFSET_BYTE_LEN)
3535
|| (num_entries == 0 && payload_byte_len != 0)
3636
{
37-
tracing::warn!(
37+
tracing::info!(
3838
"Failed to parse the metadata as namespace table. Use a single namespace table \
3939
instead."
4040
);
@@ -53,7 +53,7 @@ pub fn parse_ns_table(payload_byte_len: usize, bytes: &[u8]) -> Vec<Range<usize>
5353
.unwrap(),
5454
) as usize;
5555
if r < l || r > payload_byte_len {
56-
tracing::warn!(
56+
tracing::info!(
5757
"Failed to parse the metadata as namespace table. Use a single namespace table \
5858
instead."
5959
);
@@ -63,7 +63,7 @@ pub fn parse_ns_table(payload_byte_len: usize, bytes: &[u8]) -> Vec<Range<usize>
6363
l = r;
6464
}
6565
if l != payload_byte_len {
66-
tracing::warn!(
66+
tracing::info!(
6767
"Failed to parse the metadata as namespace table. Use a single namespace table \
6868
instead."
6969
);

0 commit comments

Comments
 (0)