Skip to content

Commit 3992f84

Browse files
claridge-googleCQ Bot
authored andcommitted
[cpu_manager] Mutex-guard mutable state.
CPU boost requests can overlap with thermal updates, so a RefCell is no longer sufficient. Bug: 441817651 Change-Id: Ie485d43d39c4e1d539254a569ba607c6f9477de9 Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/1357456 Commit-Queue: Kevin Lindkvist <[email protected]> Reviewed-by: Kevin Lindkvist <[email protected]>
1 parent 26c6716 commit 3992f84

File tree

1 file changed

+27
-24
lines changed

1 file changed

+27
-24
lines changed

src/power/cpu-manager/src/cpu_manager_main.rs

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ use crate::message::{Message, MessageResult, MessageReturn};
88
use crate::node::Node;
99
use crate::ok_or_default_err;
1010
use crate::types::{NormPerfs, OperatingPoint, ThermalLoad, Watts};
11-
use anyhow::{anyhow, bail, format_err, Error};
11+
use anyhow::{Error, anyhow, bail, format_err};
1212
use async_trait::async_trait;
1313
use async_utils::event::Event as AsyncEvent;
1414
use fuchsia_inspect::{self as inspect, ArrayProperty as _, Property as _};
15+
use futures::lock::{Mutex, MutexGuard};
1516
use serde_derive::Deserialize;
16-
use std::cell::{Cell, RefCell, RefMut};
17+
use std::cell::Cell;
1718
use std::collections::HashMap;
1819
use std::convert::TryInto as _;
1920
use std::fmt::Debug;
@@ -524,7 +525,7 @@ impl<'a> CpuManagerMainBuilder<'a> {
524525
syscall_handler: self.syscall_handler,
525526
min_power_for_boost: self.min_power_for_boost,
526527
inspect,
527-
mutable_inner: RefCell::new(mutable_inner),
528+
mutable_inner: Mutex::new(mutable_inner),
528529
}))
529530
}
530531

@@ -561,8 +562,9 @@ pub struct CpuManagerMain {
561562
/// Minimum available power to boost cpu clusters to the maximum.
562563
min_power_for_boost: Watts,
563564

564-
/// Mutable inner state.
565-
mutable_inner: RefCell<MutableInner>,
565+
/// Mutable inner state. This must be guarded with a Mutex rather than a RefCell because CPU
566+
/// boost requests can overlap with thermal updates.
567+
mutable_inner: Mutex<MutableInner>,
566568
}
567569

568570
/// A performance model for a future time interval.
@@ -601,12 +603,10 @@ impl CpuManagerMain {
601603
// Determines the thermal state that should be used for the given available power and
602604
// performance model, as well as its estimated performance and power.
603605
fn select_thermal_state(
604-
&self,
606+
thermal_states: &Vec<ThermalState>,
605607
available_power: Watts,
606608
performance_model: PerformanceModel,
607609
) -> (usize, PerfAndPower) {
608-
let thermal_states = &self.mutable_inner.borrow().thermal_states;
609-
610610
// State 0 is guaranteed to be admissible. If it meets the power criterion, we return it.
611611
// Otherwise, we use it to initialize the fallback -- the lowest-index admissible state,
612612
// which will be used if no states meet the power criterion.
@@ -642,7 +642,7 @@ impl CpuManagerMain {
642642
"index" => index as u32
643643
);
644644

645-
let mut inner = self.mutable_inner.borrow_mut();
645+
let mut inner = self.mutable_inner.lock().await;
646646

647647
if let Some(current_index) = inner.current_thermal_state {
648648
// Return early if no update is required. We're assuming that opps have not changed.
@@ -737,7 +737,7 @@ impl CpuManagerMain {
737737
self.init_done.wait().await;
738738

739739
self.inspect.available_power.set(available_power.0);
740-
self.mutable_inner.borrow_mut().available_power = available_power.clone();
740+
self.mutable_inner.lock().await.available_power = available_power.clone();
741741

742742
// Gather CPU loads over the last time interval. In the unlikely event of an error, use the
743743
// worst-case CPU load of 1.0 for all CPUs and throttle accordingly.
@@ -749,13 +749,13 @@ impl CpuManagerMain {
749749
e
750750
);
751751
self.inspect.last_error.set(&e.to_string());
752-
vec![1.0; self.mutable_inner.borrow().num_cpus as usize]
752+
vec![1.0; self.mutable_inner.lock().await.num_cpus as usize]
753753
}
754754
};
755755

756756
// Determine the normalized performance over the last interval.
757757
let mut last_performance = NormPerfs(0.0);
758-
for cluster in self.mutable_inner.borrow().clusters.iter() {
758+
for cluster in self.mutable_inner.lock().await.clusters.iter() {
759759
let (load, performance) = cluster.process_fractional_loads(&cpu_loads);
760760
last_performance += performance;
761761

@@ -782,8 +782,11 @@ impl CpuManagerMain {
782782
// Determine the next thermal state, updating if needed. We use the performance over the
783783
// last interval as an estimate of performance over the next interval; in principle a more
784784
// sophisticated estimate could be used.
785-
let (new_thermal_state_index, estimate) =
786-
self.select_thermal_state(available_power, performance_model);
785+
let (new_thermal_state_index, estimate) = Self::select_thermal_state(
786+
&self.mutable_inner.lock().await.thermal_states,
787+
available_power,
788+
performance_model,
789+
);
787790
fuchsia_trace::counter!(
788791
c"cpu_manager",
789792
c"CpuManagerMain new_thermal_state_index",
@@ -820,7 +823,7 @@ impl CpuManagerMain {
820823
c"CpuManagerMain::handle_set_boost",
821824
"enable" => enable
822825
);
823-
let mut inner = self.mutable_inner.borrow_mut();
826+
let mut inner = self.mutable_inner.lock().await;
824827
inner.boost_enabled = enable;
825828
fuchsia_trace::counter!(
826829
c"cpu_manager",
@@ -835,7 +838,7 @@ impl CpuManagerMain {
835838

836839
async fn update_cluster_opps(
837840
&self,
838-
inner: &RefMut<'_, MutableInner>,
841+
inner: &MutexGuard<'_, MutableInner>,
839842
thermal_state: usize,
840843
) -> Result<(), CpuManagerError> {
841844
fuchsia_trace::duration!(
@@ -933,15 +936,15 @@ impl Node for CpuManagerMain {
933936
fuchsia_trace::duration!(c"cpu_manager", c"CpuManagerMain::init");
934937

935938
let cluster_configs =
936-
ok_or_default_err!(self.mutable_inner.borrow_mut().cluster_configs.take())
939+
ok_or_default_err!(self.mutable_inner.lock().await.cluster_configs.take())
937940
.or_debug_panic()?;
938941

939942
let cluster_handlers =
940-
ok_or_default_err!(self.mutable_inner.borrow_mut().cluster_handlers.take())
943+
ok_or_default_err!(self.mutable_inner.lock().await.cluster_handlers.take())
941944
.or_debug_panic()?;
942945

943946
let thermal_state_configs =
944-
ok_or_default_err!(self.mutable_inner.borrow_mut().thermal_state_configs.take())
947+
ok_or_default_err!(self.mutable_inner.lock().await.thermal_state_configs.take())
945948
.or_debug_panic()?;
946949

947950
// Retrieve the total number of CPUs, and ensure that clusters' logical CPU numbers exactly
@@ -1019,7 +1022,7 @@ impl Node for CpuManagerMain {
10191022
self.inspect.set_thermal_states(&thermal_states);
10201023

10211024
{
1022-
let mut inner = self.mutable_inner.borrow_mut();
1025+
let mut inner = self.mutable_inner.lock().await;
10231026
inner.num_cpus = num_cpus;
10241027
inner.clusters = clusters;
10251028
inner.thermal_states = thermal_states;
@@ -1132,7 +1135,7 @@ impl InspectData {
11321135
#[cfg(test)]
11331136
mod tests {
11341137
use super::*;
1135-
use crate::test::mock_node::{create_dummy_node, MessageMatcher, MockNode, MockNodeMaker};
1138+
use crate::test::mock_node::{MessageMatcher, MockNode, MockNodeMaker, create_dummy_node};
11361139
use crate::types::{Hertz, Volts};
11371140
use crate::{msg_eq, msg_ok_return};
11381141
use assert_matches::assert_matches;
@@ -1678,7 +1681,7 @@ mod tests {
16781681

16791682
// We choose a max power above state 1's max and below state 0's.
16801683
let thermal_load = {
1681-
let inner = node.mutable_inner.borrow();
1684+
let inner = node.mutable_inner.lock().await;
16821685
let state = &inner.thermal_states[1];
16831686
let state_1_max_power = state.estimate_power(Saturated);
16841687
let max_power = state_1_max_power + Watts(0.1);
@@ -1738,7 +1741,7 @@ mod tests {
17381741
assert_matches!(result, Ok(_));
17391742

17401743
let estimate =
1741-
node.mutable_inner.borrow().thermal_states[1].estimate_perf_and_power(Saturated);
1744+
node.mutable_inner.lock().await.thermal_states[1].estimate_perf_and_power(Saturated);
17421745

17431746
assert_data_tree!(
17441747
inspector,
@@ -1845,7 +1848,7 @@ mod tests {
18451848
.await;
18461849

18471850
// SetBoost should succeed when no active throttling
1848-
assert!(!node.mutable_inner.borrow().throttling_active());
1851+
assert!(!node.mutable_inner.lock().await.throttling_active());
18491852
handlers.expect_big_opp(0);
18501853
handlers.expect_little_opp(0);
18511854
let mut result = node.handle_message(&Message::SetBoost(true)).await;

0 commit comments

Comments
 (0)