Skip to content

Commit 3721bd0

Browse files
authored
[forge] for max throughput test, remove configs that reduce blocks/s (aptos-labs#12095)
### Description Also tune down some of the more extreme overload. These configs give us 13K TPS at 4.4s average end-to-end latency, with > 4 blocks/s and execution time per second is > 900 ms. Compared to previously 17K TPS at 28s average end-to-end latency, with < 1 block/s and execution time per second ~1s. While the TPS is significantly lower, the blocks/s is much closer to what we would want in an at-capacity deployed network. To track raw-TPS, the single node execution benchmarks should be used. The configs need to be tweaked again if execution time per second dips significantly lower than 900 ms. ### Test Plan Run ad-hoc test https://github.com/aptos-labs/aptos-core/actions/runs/7981224726
1 parent e52145d commit 3721bd0

File tree

1 file changed

+48
-39
lines changed

1 file changed

+48
-39
lines changed

testsuite/forge-cli/src/main.rs

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
use anyhow::{format_err, Context, Result};
88
use aptos_config::config::{
99
BootstrappingMode, ConsensusConfig, ContinuousSyncingMode, MempoolConfig, NetbenchConfig,
10-
NodeConfig, QcAggregatorType, StateSyncConfig,
10+
NodeConfig, StateSyncConfig,
1111
};
1212
use aptos_forge::{
1313
args::TransactionTypeArg,
@@ -774,7 +774,7 @@ fn run_consensus_only_realistic_env_max_tps() -> ForgeConfig {
774774
helm_values["chain"]["epoch_duration_secs"] = (24 * 3600).into();
775775
}))
776776
.with_validator_override_node_config_fn(Arc::new(|config, _| {
777-
optimize_for_maximum_throughput(config);
777+
optimize_for_maximum_throughput(config, 20_000, 4_500, 3.0);
778778
}))
779779
// TODO(ibalajiarun): tune these success critiera after we have a better idea of the test behavior
780780
.with_success_criteria(
@@ -788,47 +788,50 @@ fn run_consensus_only_realistic_env_max_tps() -> ForgeConfig {
788788
)
789789
}
790790

791-
fn optimize_for_maximum_throughput(config: &mut NodeConfig) {
792-
mempool_config_practically_non_expiring(&mut config.mempool);
793-
794-
config.consensus.max_sending_block_txns = 30000;
795-
config.consensus.max_receiving_block_txns = 40000;
796-
config.consensus.max_sending_block_bytes = 10 * 1024 * 1024;
797-
config.consensus.max_receiving_block_bytes = 12 * 1024 * 1024;
798-
config.consensus.pipeline_backpressure = vec![];
799-
config.consensus.chain_health_backoff = vec![];
800-
801-
config
802-
.consensus
803-
.quorum_store
804-
.back_pressure
805-
.backlog_txn_limit_count = 200000;
806-
config
807-
.consensus
808-
.quorum_store
809-
.back_pressure
810-
.backlog_per_validator_batch_limit_count = 50;
791+
fn quorum_store_backlog_txn_limit_count(
792+
config: &mut NodeConfig,
793+
target_tps: usize,
794+
vn_latency: f64,
795+
) {
811796
config
812797
.consensus
813798
.quorum_store
814799
.back_pressure
815-
.dynamic_min_txn_per_s = 2000;
800+
.backlog_txn_limit_count = (target_tps as f64 * vn_latency) as u64;
816801
config
817802
.consensus
818803
.quorum_store
819804
.back_pressure
820-
.dynamic_max_txn_per_s = 8000;
805+
.dynamic_max_txn_per_s = 4000;
806+
}
807+
808+
fn optimize_for_maximum_throughput(
809+
config: &mut NodeConfig,
810+
target_tps: usize,
811+
max_txns_per_block: usize,
812+
vn_latency: f64,
813+
) {
814+
mempool_config_practically_non_expiring(&mut config.mempool);
815+
816+
config.consensus.max_sending_block_txns = max_txns_per_block as u64;
817+
config.consensus.max_receiving_block_txns = (max_txns_per_block as f64 * 4.0 / 3.0) as u64;
818+
config.consensus.max_sending_block_bytes = 10 * 1024 * 1024;
819+
config.consensus.max_receiving_block_bytes = 12 * 1024 * 1024;
820+
config.consensus.pipeline_backpressure = vec![];
821+
config.consensus.chain_health_backoff = vec![];
821822

822-
config.consensus.quorum_store.sender_max_batch_txns = 1000;
823+
quorum_store_backlog_txn_limit_count(config, target_tps, vn_latency);
824+
825+
config.consensus.quorum_store.sender_max_batch_txns = 500;
823826
config.consensus.quorum_store.sender_max_batch_bytes = 4 * 1024 * 1024;
824827
config.consensus.quorum_store.sender_max_num_batches = 100;
825828
config.consensus.quorum_store.sender_max_total_txns = 4000;
826829
config.consensus.quorum_store.sender_max_total_bytes = 8 * 1024 * 1024;
827830
config.consensus.quorum_store.receiver_max_batch_txns = 1000;
828-
config.consensus.quorum_store.receiver_max_batch_bytes = 4 * 1024 * 1024;
829-
config.consensus.quorum_store.receiver_max_num_batches = 100;
830-
config.consensus.quorum_store.receiver_max_total_txns = 4000;
831-
config.consensus.quorum_store.receiver_max_total_bytes = 8 * 1024 * 1024;
831+
config.consensus.quorum_store.receiver_max_batch_bytes = 8 * 1024 * 1024;
832+
config.consensus.quorum_store.receiver_max_num_batches = 200;
833+
config.consensus.quorum_store.receiver_max_total_txns = 8000;
834+
config.consensus.quorum_store.receiver_max_total_bytes = 16 * 1024 * 1024;
832835
}
833836

834837
fn large_db_simple_test() -> ForgeConfig {
@@ -1840,32 +1843,38 @@ fn realistic_network_tuned_for_throughput_test() -> ForgeConfig {
18401843
const ENABLE_VFNS: bool = true;
18411844
const VALIDATOR_COUNT: usize = 12;
18421845

1846+
// Config is based on these values. The target TPS should be a slight overestimate of
1847+
// the actual throughput to be able to have reasonable queueing but also so throughput
1848+
// will improve as performance improves.
1849+
// Overestimate: causes mempool and/or batch queueing. Underestimate: not enough txns in blocks.
1850+
const TARGET_TPS: usize = 15_000;
1851+
// Overestimate: causes blocks to be too small. Underestimate: causes blocks that are too large.
1852+
// Ideally, want the block size to take 200-250ms of execution time to match broadcast RTT.
1853+
const MAX_TXNS_PER_BLOCK: usize = 3500;
1854+
// Overestimate: causes batch queueing. Underestimate: not enough txns in quorum store.
1855+
// This is validator latency, minus mempool queueing time.
1856+
const VN_LATENCY_S: f64 = 2.5;
1857+
// Overestimate: causes mempool queueing. Underestimate: not enough txns incoming.
1858+
const VFN_LATENCY_S: f64 = 4.0;
1859+
18431860
let mut forge_config = ForgeConfig::default()
18441861
.with_initial_validator_count(NonZeroUsize::new(VALIDATOR_COUNT).unwrap())
18451862
.add_network_test(MultiRegionNetworkEmulationTest::default())
18461863
.with_emit_job(EmitJobRequest::default().mode(EmitJobMode::MaxLoad {
1847-
mempool_backlog: 500_000,
1864+
mempool_backlog: (TARGET_TPS as f64 * VFN_LATENCY_S) as usize,
18481865
}))
18491866
.with_validator_override_node_config_fn(Arc::new(|config, _| {
18501867
// Increase the state sync chunk sizes (consensus blocks are much larger than 1k)
18511868
optimize_state_sync_for_throughput(config);
18521869

1853-
// consensus and quorum store configs copied from the consensus-only suite
1854-
optimize_for_maximum_throughput(config);
1870+
optimize_for_maximum_throughput(config, TARGET_TPS, MAX_TXNS_PER_BLOCK, VN_LATENCY_S);
18551871

18561872
// Other consensus / Quroum store configs
1857-
config
1858-
.consensus
1859-
.wait_for_full_blocks_above_recent_fill_threshold = 0.2;
1860-
config.consensus.wait_for_full_blocks_above_pending_blocks = 8;
18611873
config.consensus.quorum_store_pull_timeout_ms = 200;
18621874

18631875
// Experimental storage optimizations
18641876
config.storage.rocksdb_configs.enable_storage_sharding = true;
18651877

1866-
// Experimental delayed QC aggregation
1867-
config.consensus.qc_aggregator_type = QcAggregatorType::default_delayed();
1868-
18691878
// Increase the concurrency level
18701879
if USE_CRAZY_MACHINES {
18711880
config.execution.concurrency_level = 48;

0 commit comments

Comments
 (0)