remote-ext: fix state download stall on slow connections and reduce memory usage (#1295)

liamaharon · web-flow · commit 55f354429c41 · 2023-10-11T01:41:58.000+11:00
Original PR paritytech/substrate#14746 --- ## Fixing stall ### Introduction I experienced an apparent stall downloading state from `https://rococo-try-runtime-node.parity-chains.parity.io:443` which was having networking difficulties only responding to my JSONRPC requests with 50-200KB/s of bandwidth. This PR fixes the issue causing the stall, and generally improves performance remote-ext when it downloads state by greatly reducing the chances of a timeout occuring. ### Description Introduces a new `REQUEST_DURATION_TARGET` constant and modifies `get_storage_data_dynamic_batch_size` to - Increase or decrease the batch size of the next request depending on whether the elapsed time of the last request was gt or lt the target - Reset the batch size to 1 if the request times out This fixes an issue on slow connections that can otherwise cause multiple timeouts and a stalled download when: 1. The batch size increases rapidly as remote-ext downloads keys with small associated storage values 2. remote-ext tries to process a large series of subsequent keys all with extremely large associated storage values (Rococo has a series of keys 1-5MB large) 3. The huge storage values download for 5 minutes until the request times out 4. The partially downloaded keys are thrown out and remote-ext tries again with a smaller batch size, but the batch size is still far too large and takes 5 minutes to be reduced again 5. The download will be essentially stalled for many hours while the above step cycles After this PR, the request size will - Not grow as large to begin with, as it is regulated downwards as the request duration exceeds the target - Drop immediately to 1 if the request times out. A timeout indicates the keys next in line to download have extremely large storage values compared to previously downloaded keys, and we need to reset the batch size to figure out what our new ideal batch size is. By not resetting down to 1, we risk the next request timing out again. ## Reducing memory As suggested by @bkchr, I adjusted `get_storage_data_dynamic_batch_size` from being recursive to a loop which allows removing a bunch of clones that were chewing through a lot of memory. I noticed actually it was using up to 50GB swap previously when downloading Polkadot keys on a slow connection, because it needed to recurse and clone a lot. After this change it uses only ~1.5GB memory.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/substrate/utils/frame/remote-externalities/Cargo.toml b/substrate/utils/frame/remote-externalities/Cargo.toml
@@ -23,7 +23,6 @@ sp-runtime = { path = "../../../primitives/runtime" }
 tokio = { version = "1.22.0", features = ["macros", "rt-multi-thread"] }
 substrate-rpc-client = { path = "../rpc/client" }
 futures = "0.3"
-async-recursion = "1.0.4"
 indicatif = "0.17.3"
 spinners = "4.1.0"
 tokio-retry = "0.3.0"
diff --git a/substrate/utils/frame/remote-externalities/src/lib.rs b/substrate/utils/frame/remote-externalities/src/lib.rs
@@ -20,7 +20,6 @@
 //! An equivalent of `sp_io::TestExternalities` that can load its state from a remote substrate
 //! based chain, or a local state snapshot file.
 
-use async_recursion::async_recursion;
 use codec::{Compact, Decode, Encode};
 use indicatif::{ProgressBar, ProgressStyle};
 use jsonrpsee::{
@@ -44,7 +43,7 @@ use sp_runtime::{
 use sp_state_machine::TestExternalities;
 use spinners::{Spinner, Spinners};
 use std::{
-	cmp::max,
+	cmp::{max, min},
 	fs,
 	ops::{Deref, DerefMut},
 	path::{Path, PathBuf},
@@ -353,10 +352,11 @@ where
 	const PARALLEL_REQUESTS: usize = 4;
 	const BATCH_SIZE_INCREASE_FACTOR: f32 = 1.10;
 	const BATCH_SIZE_DECREASE_FACTOR: f32 = 0.50;
-	const INITIAL_BATCH_SIZE: usize = 5000;
+	const REQUEST_DURATION_TARGET: Duration = Duration::from_secs(15);
+	const INITIAL_BATCH_SIZE: usize = 10;
 	// nodes by default will not return more than 1000 keys per request
 	const DEFAULT_KEY_DOWNLOAD_PAGE: u32 = 1000;
-	const KEYS_PAGE_MAX_RETRIES: usize = 12;
+	const MAX_RETRIES: usize = 12;
 	const KEYS_PAGE_RETRY_INTERVAL: Duration = Duration::from_secs(5);
 
 	async fn rpc_get_storage(
@@ -411,8 +411,8 @@ where
 		let keys = loop {
 			// This loop can hit the node with very rapid requests, occasionally causing it to
 			// error out in CI (https://github.com/paritytech/substrate/issues/14129), so we retry.
-			let retry_strategy = FixedInterval::new(Self::KEYS_PAGE_RETRY_INTERVAL)
-				.take(Self::KEYS_PAGE_MAX_RETRIES);
+			let retry_strategy =
+				FixedInterval::new(Self::KEYS_PAGE_RETRY_INTERVAL).take(Self::MAX_RETRIES);
 			let get_page_closure =
 				|| self.get_keys_single_page(Some(prefix.clone()), last_key.clone(), at);
 			let page = Retry::spawn(retry_strategy, get_page_closure).await?;
@@ -448,8 +448,6 @@ where
 	///
 	/// * `client` - An `Arc` wrapped `HttpClient` used for making the requests.
 	/// * `payloads` - A vector of tuples containing a JSONRPC method name and `ArrayParams`
-	/// * `batch_size` - The initial batch size to use for the request. The batch size will be
-	///   adjusted dynamically in case of failure.
 	///
 	/// # Returns
 	///
@@ -485,80 +483,107 @@ where
 	///     }
 	/// }
 	/// ```
-	#[async_recursion]
 	async fn get_storage_data_dynamic_batch_size(
 		client: &HttpClient,
 		payloads: Vec<(String, ArrayParams)>,
-		batch_size: usize,
 		bar: &ProgressBar,
 	) -> Result<Vec<Option<StorageData>>, String> {
-		// All payloads have been processed
-		if payloads.is_empty() {
-			return Ok(vec![])
-		};
-
-		log::debug!(
-			target: LOG_TARGET,
-			"Remaining payloads: {} Batch request size: {}",
-			payloads.len(),
-			batch_size,
-		);
+		let mut all_data: Vec<Option<StorageData>> = vec![];
+		let mut start_index = 0;
+		let mut retries = 0usize;
+		let mut batch_size = Self::INITIAL_BATCH_SIZE;
+		let total_payloads = payloads.len();
+
+		while start_index < total_payloads {
+			log::debug!(
+				target: LOG_TARGET,
+				"Remaining payloads: {} Batch request size: {}",
+				total_payloads - start_index,
+				batch_size,
+			);
 
-		// Payloads to attempt to process this batch
-		let page = payloads.iter().take(batch_size).cloned().collect::<Vec<_>>();
+			let end_index = usize::min(start_index + batch_size, total_payloads);
+			let page = &payloads[start_index..end_index];
 
-		// Build the batch request
-		let mut batch = BatchRequestBuilder::new();
-		for (method, params) in page.iter() {
-			batch
-				.insert(method, params.clone())
-				.map_err(|_| "Invalid batch method and/or params")?
-		}
-		let batch_response = match client.batch_request::<Option<StorageData>>(batch).await {
-			Ok(batch_response) => batch_response,
-			Err(e) => {
-				if batch_size < 2 {
-					return Err(e.to_string())
-				}
+			// Build the batch request
+			let mut batch = BatchRequestBuilder::new();
+			for (method, params) in page.iter() {
+				batch
+					.insert(method, params.clone())
+					.map_err(|_| "Invalid batch method and/or params")?;
+			}
 
-				log::debug!(
-					target: LOG_TARGET,
-					"Batch request failed, trying again with smaller batch size. {}",
-					e.to_string()
-				);
+			let request_started = Instant::now();
+			let batch_response = match client.batch_request::<Option<StorageData>>(batch).await {
+				Ok(batch_response) => {
+					retries = 0;
+					batch_response
+				},
+				Err(e) => {
+					if retries > Self::MAX_RETRIES {
+						return Err(e.to_string())
+					}
+
+					retries += 1;
+					let failure_log = format!(
+						"Batch request failed ({}/{} retries). Error: {}",
+						retries,
+						Self::MAX_RETRIES,
+						e.to_string()
+					);
+					// after 2 subsequent failures something very wrong is happening. log a warning
+					// and reset the batch size down to 1.
+					if retries >= 2 {
+						log::warn!("{}", failure_log);
+						batch_size = 1;
+					} else {
+						log::debug!("{}", failure_log);
+						// Decrease batch size by DECREASE_FACTOR
+						batch_size =
+							(batch_size as f32 * Self::BATCH_SIZE_DECREASE_FACTOR) as usize;
+					}
+					continue
+				},
+			};
 
-				return Self::get_storage_data_dynamic_batch_size(
-					client,
-					payloads,
-					max(1, (batch_size as f32 * Self::BATCH_SIZE_DECREASE_FACTOR) as usize),
-					bar,
+			let request_duration = request_started.elapsed();
+			batch_size = if request_duration > Self::REQUEST_DURATION_TARGET {
+				// Decrease batch size
+				max(1, (batch_size as f32 * Self::BATCH_SIZE_DECREASE_FACTOR) as usize)
+			} else {
+				// Increase batch size, but not more than the remaining total payloads to process
+				min(
+					total_payloads - start_index,
+					max(
+						batch_size + 1,
+						(batch_size as f32 * Self::BATCH_SIZE_INCREASE_FACTOR) as usize,
+					),
 				)
-				.await
-			},
-		};
+			};
+
+			log::debug!(
+				target: LOG_TARGET,
+				"Request duration: {:?} Target duration: {:?} Last batch size: {} Next batch size: {}",
+				request_duration,
+				Self::REQUEST_DURATION_TARGET,
+				end_index - start_index,
+				batch_size
+			);
 
-		// Collect the data from this batch
-		let mut data: Vec<Option<StorageData>> = vec![];
-		let batch_response_len = batch_response.len();
-		for item in batch_response.into_iter() {
-			match item {
-				Ok(x) => data.push(x),
-				Err(e) => return Err(e.message().to_string()),
+			let batch_response_len = batch_response.len();
+			for item in batch_response.into_iter() {
+				match item {
+					Ok(x) => all_data.push(x),
+					Err(e) => return Err(e.message().to_string()),
+				}
 			}
+			bar.inc(batch_response_len as u64);
+
+			// Update the start index for the next iteration
+			start_index = end_index;
 		}
-		bar.inc(batch_response_len as u64);
 
-		// Return this data joined with the remaining keys
-		let remaining_payloads = payloads.iter().skip(batch_size).cloned().collect::<Vec<_>>();
-		let mut rest = Self::get_storage_data_dynamic_batch_size(
-			client,
-			remaining_payloads,
-			max(batch_size + 1, (batch_size as f32 * Self::BATCH_SIZE_INCREASE_FACTOR) as usize),
-			bar,
-		)
-		.await?;
-		data.append(&mut rest);
-		Ok(data)
+		Ok(all_data)
 	}
 
 	/// Synonym of `getPairs` that uses paged queries to first get the keys, and then
@@ -605,12 +630,7 @@ where
 		);
 		let payloads_chunked = payloads.chunks((&payloads.len() / Self::PARALLEL_REQUESTS).max(1));
 		let requests = payloads_chunked.map(|payload_chunk| {
-			Self::get_storage_data_dynamic_batch_size(
-				&client,
-				payload_chunk.to_vec(),
-				Self::INITIAL_BATCH_SIZE,
-				&bar,
-			)
+			Self::get_storage_data_dynamic_batch_size(&client, payload_chunk.to_vec(), &bar)
 		});
 		// Execute the requests and move the Result outside.
 		let storage_data_result: Result<Vec<_>, _> =
@@ -683,20 +703,14 @@ where
 			.collect::<Vec<_>>();
 
 		let bar = ProgressBar::new(payloads.len() as u64);
-		let storage_data = match Self::get_storage_data_dynamic_batch_size(
-			client,
-			payloads,
-			Self::INITIAL_BATCH_SIZE,
-			&bar,
-		)
-		.await
-		{
-			Ok(storage_data) => storage_data,
-			Err(e) => {
-				log::error!(target: LOG_TARGET, "batch processing failed: {:?}", e);
-				return Err("batch processing failed")
-			},
-		};
+		let storage_data =
+			match Self::get_storage_data_dynamic_batch_size(client, payloads, &bar).await {
+				Ok(storage_data) => storage_data,
+				Err(e) => {
+					log::error!(target: LOG_TARGET, "batch processing failed: {:?}", e);
+					return Err("batch processing failed")
+				},
+			};
 
 		assert_eq!(child_keys_len, storage_data.len());