feat: improve node syncing reporting#5589
Conversation
ebf05a1 to
a365028
Compare
src/cli/subcommands/sync_cmd.rs
Outdated
| Self::Wait { watch } => { | ||
| let ticker = Ticker::new(0.., Duration::from_secs(1)); | ||
| let mut stdout = stdout(); | ||
| let mut last_lines_printed = 0; |
There was a problem hiding this comment.
either this should be bool type or we can use a better var name to be clear
There was a problem hiding this comment.
Done (Renamed it).
src/cli/subcommands/sync_cmd.rs
Outdated
| print_sync_report_details(&report, &mut current_lines)?; | ||
|
|
||
| last_lines_printed = current_lines; | ||
| // Break if Synced and not watching |
There was a problem hiding this comment.
Exit feels more appropriate than Break as a reader
| // Break if Synced and not watching | |
| // Exit if synced and not in watch mode. |
src/cli/subcommands/sync_cmd.rs
Outdated
| fn format_tipset_cids(cids: &str) -> &str { | ||
| if cids.is_empty() { "[]" } else { cids } | ||
| /// Prints the sync status report details. | ||
| /// `line_count` is mutable and incremented for each line printed, used for clearing in `Wait`. |
There was a problem hiding this comment.
| /// `line_count` is mutable and incremented for each line printed, used for clearing in `Wait`. | |
| /// `line_count` is incremented for each printed line and is used for terminal cleanup (e.g., in `Wait` mode). |
There was a problem hiding this comment.
Completely removed this part, now returning the number of printed lines from the print_sync_report_details fn directly.
src/cli/subcommands/sync_cmd.rs
Outdated
| if cids.is_empty() { "[]" } else { cids } | ||
| /// Prints the sync status report details. | ||
| /// `line_count` is mutable and incremented for each line printed, used for clearing in `Wait`. | ||
| /// Pass `&mut 0` if line counting/clearing is not needed (like in `Status` or final print). |
There was a problem hiding this comment.
| /// Pass `&mut 0` if line counting/clearing is not needed (like in `Status` or final print). | |
| /// Pass `&mut 0` to disable line counting/clearing (like in `status` or final print). |
src/cli/subcommands/sync_cmd.rs
Outdated
| check_snapshot_progress(client, true).await | ||
| } | ||
|
|
||
| /// Handles the initial check for snapshot download if the node is initializing. |
There was a problem hiding this comment.
| /// Handles the initial check for snapshot download if the node is initializing. | |
| /// Checks if snapshot download is required or in progress when the node is initializing. | |
| /// If a snapshot download is in progress, it waits for completion before starting sync monitor. |
src/cli/subcommands/sync_cmd.rs
Outdated
| /// Handles the initial check for snapshot download if the node is initializing. | ||
| async fn handle_initial_snapshot_check(client: &rpc::Client) -> anyhow::Result<()> { | ||
| let initial_report = SyncStatus::call(client, ()).await?; | ||
| // Use the public getter method instead of accessing the private field |
There was a problem hiding this comment.
this comment is not clear to me, like which private field and why ?
There was a problem hiding this comment.
Added the comment as a reminder to use getters instead of directly accessing private fields during refactoring, but forgot to remove it. Will update it.
There was a problem hiding this comment.
Removed the comment.
|
@akaladarshi code changes overall LGTM 👍🏻, requested some minor changes. |
src/chain_sync/chain_follower.rs
Outdated
| let fork_info = ForkSyncInfo { | ||
| target_tipset_key: last_ts.key().clone(), | ||
| target_epoch: last_ts.epoch(), | ||
| // The epoch from which sync activities (fetch/validate) need to start for this fork. |
There was a problem hiding this comment.
this comment makes more sense in ForkSyncInfo struct than here
src/chain_sync/chain_follower.rs
Outdated
| target_sync_epoch_start: first_ts.epoch(), | ||
| stage, | ||
| validated_chain_head_epoch: current_validated_epoch, | ||
| start_time, // Track when this fork's sync task was initiated |
src/chain_sync/chain_follower.rs
Outdated
| stage, | ||
| validated_chain_head_epoch: current_validated_epoch, | ||
| start_time, // Track when this fork's sync task was initiated | ||
| last_updated: Some(now), // Mark the last update time |
There was a problem hiding this comment.
same here, I guess you already have it in ForkSyncInfo
src/cli/subcommands/sync_cmd.rs
Outdated
| } | ||
|
|
||
| // Print the status report once, without line counting for clearing | ||
| print_sync_report_details(&sync_status, &mut 0)?; |
There was a problem hiding this comment.
Using Option instead of &mut 0 to disable line counting is more suitable here
There was a problem hiding this comment.
Refactored it, now returning printed lines from the from function itself.
src/chain_sync/sync_status.rs
Outdated
| /// Node is significantly behind the network head and actively downloading/validating. | ||
| #[strum(to_string = "Syncing")] | ||
| Syncing, | ||
| /// Node is close to the network head (e.g., within a configurable threshold like 5 epochs). |
There was a problem hiding this comment.
is the threshold configurable somewhere?
There was a problem hiding this comment.
No it's not configurable, I don't think it needs to be configurable because its just for changing the sync status to synced from syncing, earlier it was 10.
There was a problem hiding this comment.
right, the comment was misleading
src/chain_sync/sync_status.rs
Outdated
| match stateless_mode { | ||
| true => self.set_status(NodeSyncStatus::Offline), | ||
| false => { | ||
| if time_diff < seconds_per_epoch as u64 * 5 { |
There was a problem hiding this comment.
worth commenting why 5, best if we can avoid hardcoding
There was a problem hiding this comment.
It was a randomly selected value, just like SyncState had 10, I just decrease it to be more closer to node head. will make it a constant but open to suggestion for this value.
There was a problem hiding this comment.
made it a constant.
There was a problem hiding this comment.
@LesnyRumcajs any suggestions for this value here
src/chain_sync/sync_status.rs
Outdated
| self.set_network_head(network_head_epoch as ChainEpoch); | ||
| self.set_epochs_behind(network_head_epoch as i64 - current_chain_head_epoch); |
There was a problem hiding this comment.
as ChainEpoch is same as i64, lets standardise
or handle it when initialising network_head_epoch
There was a problem hiding this comment.
or change calculate_expected_epoch return type to ChainEpoch
There was a problem hiding this comment.
Everywhere we were converting the returned value of calculate_expected_epoch to i64 so updated calculate_expected_epoch return type to i64.
src/cli/subcommands/sync_cmd.rs
Outdated
| if *line_count > 0 { | ||
| // Only increment if we are in the Wait command context | ||
| *line_count += 1; |
There was a problem hiding this comment.
from this it's not clear how we determine we are in Wait context, adding a comment above conditional check will make more sense
There was a problem hiding this comment.
also using Option::None instead of 0 is better
There was a problem hiding this comment.
Removed this part.
| signature for crate::shim::crypto::Signature, | ||
| signature_type for crate::shim::crypto::SignatureType, | ||
| signed_message for crate::message::SignedMessage, | ||
| sync_stage for crate::chain_sync::SyncStage, |
There was a problem hiding this comment.
we had some test in SyncStage, can we have some for SyncStatus?
There was a problem hiding this comment.
Test was there only for checking if the SyncStage is compatible with the LotusJson for comparing with the Lotus API, but since SyncStatus is specific to Forest we don't need those tests.
And to test SyncStatus we need mocks of all the other components it belongs to (ChainStore, StateManager etc.), I don't think we currently have that. We should though.
src/rpc/methods/eth.rs
Outdated
| match sync_status.get_status() == NodeSyncStatus::Syncing { | ||
| true => { |
There was a problem hiding this comment.
when matching on boolean values, I feel If and Else is more suitable and readable
There was a problem hiding this comment.
In general, I agree, though we already have several occurrences of such style in Forest, so it's not a huge deal. For this particular one, I'd avoid comparing booleans and suggest matching on sync_status.get_status() directly. This way, when a new state is introduced, we'll get a compilation error due to an unhandled enum variant.
There was a problem hiding this comment.
I've implemented the match statement. Since we only care about Syncing status and returning error in all other case, I think using wildcard (_) is more appropriate.
|
@akaladarshi some more code feedback |
CHANGELOG.md
Outdated
| ### Breaking | ||
|
|
||
| - [#5559](https://github.com/ChainSafe/forest/pull/5559) Change `Filecoin.ChainGetMinBaseFee` to `Forest.ChainGetMinBaseFee` with read access. | ||
| - [#5589](https://github.com/ChainSafe/forest/pull/5589) Replace exiting `Filecoin.SyncState` API with new `Forest.SyncStatus` to track node syncing progress specific to forest. |
There was a problem hiding this comment.
| - [#5589](https://github.com/ChainSafe/forest/pull/5589) Replace exiting `Filecoin.SyncState` API with new `Forest.SyncStatus` to track node syncing progress specific to forest. | |
| - [#5589](https://github.com/ChainSafe/forest/pull/5589) Replace existing `Filecoin.SyncState` API with new `Forest.SyncStatus` to track node syncing progress specific to Forest. |
src/chain_sync/sync_status.rs
Outdated
| use std::sync::Arc; | ||
| use tracing::log; | ||
|
|
||
| // Node considered synced if the head is within this many epochs |
There was a problem hiding this comment.
| // Node considered synced if the head is within this many epochs | |
| // Node considered synced if the head is within this threshold. |
src/chain_sync/sync_status.rs
Outdated
| } | ||
|
|
||
| pub(crate) fn get_status(&self) -> NodeSyncStatus { | ||
| self.status.clone() |
There was a problem hiding this comment.
clone can be avoided here
There was a problem hiding this comment.
I think this will be negligible since NodeSyncStatus is an enum, anyways added a copy trait in the enum and removed the clone.
src/chain_sync/sync_status.rs
Outdated
| /// Node is significantly behind the network head and actively downloading/validating. | ||
| #[strum(to_string = "Syncing")] | ||
| Syncing, | ||
| /// Node is close to the network head (e.g., 5 epochs). |
There was a problem hiding this comment.
| /// Node is close to the network head (e.g., 5 epochs). | |
| /// Node is close to the network head, within the `SYNCED_EPOCH_THRESHOLD` |
|
I am also planning to remove the existing |
LesnyRumcajs
left a comment
There was a problem hiding this comment.
Overall looks good, the output is more understandable than before.
src/chain_sync/sync_status.rs
Outdated
| // Node considered synced if the head is within this threshold. | ||
| const SYNCED_EPOCH_THRESHOLD: u64 = 5; |
There was a problem hiding this comment.
Hm, that's 2m30s for calibnet and mainnet. Did lower values give you trouble?
There was a problem hiding this comment.
I didn't see any problem with this, I was experimenting with different numbers to see if there could be any issue but didn't. So just choose 5, since last was 10 (@LesnyRumcajs was there any specific reason for choosing 10?)
There was a problem hiding this comment.
I don't remember exactly. Lower values might have given flaky results in the healthcheck endpoint, i.e., frequent transitions between healthy and unhealthy due to forks. I'm okay with both.
There was a problem hiding this comment.
Should I revert back to 10, It's not much of a gap in terms of waiting for node status to change to sync as it's a one time thing while starting up.
There was a problem hiding this comment.
Let's revert to limit the changes. If someone reports that the difference is too big, we can always revisit this.
src/chain_sync/sync_status.rs
Outdated
| #[strum(to_string = "Synced")] | ||
| Synced, | ||
| /// An error occurred during the sync process. | ||
| #[strum(to_string = "error")] |
There was a problem hiding this comment.
| #[strum(to_string = "error")] | |
| #[strum(to_string = "Error")] |
use same case
src/chain_sync/sync_status.rs
Outdated
| #[strum(to_string = "error")] | ||
| Error, | ||
| /// Node is configured to not sync (offline mode). | ||
| #[strum(to_string = "offline")] |
There was a problem hiding this comment.
| #[strum(to_string = "offline")] | |
| #[strum(to_string = "Offline")] |
src/chain_sync/sync_status.rs
Outdated
| } | ||
| } | ||
|
|
||
| pub(crate) fn set_current_chain_head_key(&mut self, tipset_key: TipsetKey) { |
There was a problem hiding this comment.
I'd avoid having a structure with private fields and a bunch of setters and getters. If they are meant to be used outside of the struct, mark the fields as pub(crate). Otherwise, it'd be nice to encapsulate this if possible so that the user doesn't have to know the internal structure of it.
There was a problem hiding this comment.
Will remove the setters and getters and use the pub(crate) directly on the fields.
| let now = Utc::now(); | ||
| let now_ts = now.timestamp() as u64; | ||
| let seconds_per_epoch = state_manager.chain_config().block_delay_secs; | ||
| let network_head_epoch = calculate_expected_epoch( | ||
| now_ts, | ||
| state_manager.chain_store().genesis_block_header().timestamp, | ||
| seconds_per_epoch, | ||
| ); |
There was a problem hiding this comment.
I think this could be a separate method somewhere.
There was a problem hiding this comment.
Everywhere else we are directly using the calculate_expected_epoch and passing the fields, but here we have introduced the variables just so we can reuse them. I don't think we need to introduce a separate method just for this specific use case.
| const PARAM_NAMES: [&'static str; 0] = []; | ||
| const API_PATHS: BitFlags<ApiPaths> = ApiPaths::all(); | ||
| const PERMISSION: Permission = Permission::Read; | ||
|
|
There was a problem hiding this comment.
let's add DESCRIPTION
Summary of changes
Changes introduced in this pull request:
SyncStatusReportfor tracking the syncing progressSyncStatusReportis the replacement of theSyncStateto track forest syncingSyncStateFilecoin.SyncStateAPISyncStatuswith the name ofForest.SyncStatusto return the sync status reportchain_followerto use newSyncStatusReportinstead ofSyncState.Reference issue to close (if applicable)
Closes #5539
Other information and links
Change checklist