Skip to content

Commit b5a7e3c

Browse files
authored
Merge pull request #2459 from subspace/use-l3-cache-topology
Use L3 cache topology instead of NUMA topology for default plotting threads
2 parents afd74b3 + 20ae3f5 commit b5a7e3c

File tree

2 files changed

+42
-17
lines changed

2 files changed

+42
-17
lines changed

crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ use subspace_farmer::utils::readers_and_pieces::ReadersAndPieces;
3434
use subspace_farmer::utils::ss58::parse_ss58_reward_address;
3535
use subspace_farmer::utils::{
3636
all_cpu_cores, create_plotting_thread_pool_manager, parse_cpu_cores_sets,
37-
run_future_in_dedicated_thread, thread_pool_core_indices, AsyncJoinOnDrop,
37+
recommended_number_of_farming_threads, run_future_in_dedicated_thread,
38+
thread_pool_core_indices, AsyncJoinOnDrop,
3839
};
3940
use subspace_farmer::{Identity, NodeClient, NodeRpcClient};
4041
use subspace_farmer_components::plotting::PlottedSector;
@@ -116,7 +117,8 @@ pub(crate) struct FarmingArgs {
116117
#[arg(long)]
117118
sector_downloading_concurrency: Option<NonZeroUsize>,
118119
/// Defines how many sectors farmer will encode concurrently, defaults to 1 on UMA system and
119-
/// number of NUMA nodes on NUMA system. It is further restricted by
120+
/// number of NUMA nodes on NUMA system or L3 cache groups on large CPUs. It is further
121+
/// restricted by
120122
/// `--sector-downloading-concurrency` and setting this option higher than
121123
/// `--sector-downloading-concurrency` will have no effect.
122124
#[arg(long)]
@@ -133,7 +135,8 @@ pub(crate) struct FarmingArgs {
133135
#[arg(long)]
134136
farming_thread_pool_size: Option<NonZeroUsize>,
135137
/// Size of one thread pool used for plotting, defaults to number of logical CPUs available
136-
/// on UMA system and number of logical CPUs available in NUMA node on NUMA system.
138+
/// on UMA system and number of logical CPUs available in NUMA node on NUMA system or L3 cache
139+
/// groups on large CPUs.
137140
///
138141
/// Number of thread pools is defined by `--sector-encoding-concurrency` option, different
139142
/// thread pools might have different number of threads if NUMA nodes do not have the same size.
@@ -154,7 +157,8 @@ pub(crate) struct FarmingArgs {
154157
plotting_cpu_cores: Option<String>,
155158
/// Size of one thread pool used for replotting, typically smaller pool than for plotting
156159
/// to not affect farming as much, defaults to half of the number of logical CPUs available on
157-
/// UMA system and number of logical CPUs available in NUMA node on NUMA system.
160+
/// UMA system and number of logical CPUs available in NUMA node on NUMA system or L3 cache
161+
/// groups on large CPUs.
158162
///
159163
/// Number of thread pools is defined by `--sector-encoding-concurrency` option, different
160164
/// thread pools might have different number of threads if NUMA nodes do not have the same size.
@@ -498,22 +502,16 @@ where
498502
.unwrap_or(plotting_thread_pool_core_indices.len() + 1),
499503
));
500504

501-
let all_cpu_cores = all_cpu_cores();
502505
let plotting_thread_pool_manager = create_plotting_thread_pool_manager(
503506
plotting_thread_pool_core_indices
504507
.into_iter()
505508
.zip(replotting_thread_pool_core_indices),
506509
)?;
507510
let farming_thread_pool_size = farming_thread_pool_size
508511
.map(|farming_thread_pool_size| farming_thread_pool_size.get())
509-
.unwrap_or_else(|| {
510-
all_cpu_cores
511-
.first()
512-
.expect("Not empty according to function description; qed")
513-
.cpu_cores()
514-
.len()
515-
});
512+
.unwrap_or_else(recommended_number_of_farming_threads);
516513

514+
let all_cpu_cores = all_cpu_cores();
517515
if all_cpu_cores.len() > 1 {
518516
info!(numa_nodes = %all_cpu_cores.len(), "NUMA system detected");
519517

crates/subspace-farmer/src/utils.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::thread_pool_manager::{PlottingThreadPoolManager, PlottingThreadPoolPa
99
use futures::channel::oneshot;
1010
use futures::channel::oneshot::Canceled;
1111
use futures::future::Either;
12+
use hwlocality::object::types::ObjectType;
1213
use rayon::{ThreadBuilder, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder};
1314
use std::future::Future;
1415
use std::num::{NonZeroUsize, ParseIntError};
@@ -187,7 +188,30 @@ impl CpuCoreSet {
187188
}
188189
}
189190

190-
/// Get all cpu cores, grouped into sets according to NUMA nodes.
191+
/// Recommended number of thread pool size for farming, equal to number of CPU cores in the first
192+
/// NUMA node
193+
pub fn recommended_number_of_farming_threads() -> usize {
194+
#[cfg(feature = "numa")]
195+
match hwlocality::Topology::new().map(std::sync::Arc::new) {
196+
Ok(topology) => {
197+
return topology
198+
// Iterate over NUMA nodes
199+
.objects_at_depth(hwlocality::object::depth::Depth::NUMANode)
200+
// For each NUMA nodes get CPU set
201+
.filter_map(|node| node.cpuset())
202+
// Get number of CPU cores
203+
.map(|cpuset| cpuset.iter_set().count())
204+
.find(|&count| count > 0)
205+
.unwrap_or_else(num_cpus::get);
206+
}
207+
Err(error) => {
208+
warn!(%error, "Failed to get NUMA topology");
209+
}
210+
}
211+
num_cpus::get()
212+
}
213+
214+
/// Get all cpu cores, grouped into sets according to NUMA nodes or L3 cache groups on large CPUs.
191215
///
192216
/// Returned vector is guaranteed to have at least one element and have non-zero number of CPU cores
193217
/// in each set.
@@ -196,8 +220,8 @@ pub fn all_cpu_cores() -> Vec<CpuCoreSet> {
196220
match hwlocality::Topology::new().map(std::sync::Arc::new) {
197221
Ok(topology) => {
198222
let cpu_cores = topology
199-
// Iterate over NUMA nodes
200-
.objects_at_depth(hwlocality::object::depth::Depth::NUMANode)
223+
// Iterate over groups of L3 caches
224+
.objects_with_type(ObjectType::L3Cache)
201225
// For each NUMA nodes get CPU set
202226
.filter_map(|node| node.cpuset())
203227
// For each CPU set extract individual cores
@@ -214,7 +238,7 @@ pub fn all_cpu_cores() -> Vec<CpuCoreSet> {
214238
}
215239
}
216240
Err(error) => {
217-
warn!(%error, "Failed to get CPU topology");
241+
warn!(%error, "Failed to get L3 cache topology");
218242
}
219243
}
220244
vec![CpuCoreSet {
@@ -227,6 +251,9 @@ pub fn all_cpu_cores() -> Vec<CpuCoreSet> {
227251
/// Parse space-separated set of groups of CPU cores (individual cores are coma-separated) into
228252
/// vector of CPU core sets that can be used for creation of plotting/replotting thread pools.
229253
pub fn parse_cpu_cores_sets(s: &str) -> Result<Vec<CpuCoreSet>, ParseIntError> {
254+
#[cfg(feature = "numa")]
255+
let topology = hwlocality::Topology::new().map(std::sync::Arc::new).ok();
256+
230257
s.split(' ')
231258
.map(|s| {
232259
let cores = s
@@ -237,7 +264,7 @@ pub fn parse_cpu_cores_sets(s: &str) -> Result<Vec<CpuCoreSet>, ParseIntError> {
237264
Ok(CpuCoreSet {
238265
cores,
239266
#[cfg(feature = "numa")]
240-
topology: hwlocality::Topology::new().map(std::sync::Arc::new).ok(),
267+
topology: topology.clone(),
241268
})
242269
})
243270
.collect()

0 commit comments

Comments
 (0)