Skip to content

Commit f5ef862

Browse files
committed
Fix some issues detected while testing system
1 parent 92440a8 commit f5ef862

File tree

4 files changed

+30
-12
lines changed

4 files changed

+30
-12
lines changed

edgeless_cloud_offloading/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,15 @@ security_group_id = "sg-xxxxxxxxxxxxxxxxx"
5656
[scaling.thresholds]
5757
# --- Scale-Up Triggers ---
5858
# Sum of "credits" from overloaded nodes to trigger a scale-up
59-
credit_overload = 1.0
59+
credit_overload = 2.0
6060
# CPU percentage above which a node is considered saturated
61-
cpu_high_percent = 80.0
61+
cpu_high_percent = 75.0
6262
# Memory percentage above which a node is considered saturated
63-
mem_high_percent = 80.0
63+
mem_high_percent = 75.0
6464

6565
# --- Scale-Down Triggers ---
6666
# CPU percentage below which a node is considered underutilized
67-
cpu_low_percent = 10.0
67+
cpu_low_percent = 20.0
6868
# Memory percentage below which a node is considered underutilized
6969
mem_low_percent = 20.0
7070
# Time in seconds a node must remain underutilized before being deleted

edgeless_cloud_offloading/src/config.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,11 @@ impl Default for AwsConfig {
7878
impl Default for ThresholdsConfig {
7979
fn default() -> Self {
8080
Self {
81-
credit_overload: 1.0,
82-
cpu_high_percent: 80.0,
83-
mem_high_percent: 80.0,
84-
cpu_low_percent: 30.0,
85-
mem_low_percent: 40.0,
81+
credit_overload: 2.0,
82+
cpu_high_percent: 75.0,
83+
mem_high_percent: 75.0,
84+
cpu_low_percent: 20.0,
85+
mem_low_percent: 20.0,
8686
delete_cooldown_seconds: 30,
8787
}
8888
}

edgeless_cloud_offloading/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub struct CloudNodeData {
2424
pub node_id: String,
2525
pub active: bool,
2626
pub creation_time: Instant,
27+
pub activation_time: Option<Instant>,
2728
}
2829

2930
fn generate_instance_name() -> String {
@@ -84,6 +85,7 @@ pub async fn create_cloud_node(input_data: CloudNodeInputData) -> Result<CloudNo
8485
node_id,
8586
active: false,
8687
creation_time: Instant::now(),
88+
activation_time: None,
8789
};
8890

8991
Ok(cloud_node)

edgeless_cloud_offloading/src/main.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::time::Duration;
1212
use tokio::time::sleep;
1313

1414
const DEFAULT_CONFIG_FILENAME: &str = "cloud_offloading.toml";
15+
const NODE_STABILIZATION_TIME_SECS: u64 = 120;
1516

1617
#[derive(Parser, Debug)]
1718
#[command(author, about, long_about = None)]
@@ -90,6 +91,7 @@ async fn run_cloud_offloading_delegated_orc(config: Config) -> anyhow::Result<()
9091
for node in cloud_nodes.iter_mut() {
9192
if !node.active && active_orc_nodes.contains(&node.node_id) {
9293
node.active = true;
94+
node.activation_time = Some(std::time::Instant::now());
9395
log::info!("Cloud node {} is now active in the orchestrator!", node.node_id);
9496
// After a node becomes active, we force a rebalance to ensure it receives load
9597
rebalancer.rebalance_cluster();
@@ -138,7 +140,9 @@ async fn run_cloud_offloading_delegated_orc(config: Config) -> anyhow::Result<()
138140
// 2. DECIDE WHETHER TO CREATE A NEW NODE
139141
// Only create a new node if there isn't one already being created.
140142
// If the total number of active nodes is less than the minimum required, we also create a new node.
141-
let is_creating_node = cloud_nodes.iter().any(|n| !n.active);
143+
let is_creating_node = cloud_nodes.iter().any(|n| {
144+
!n.active || n.creation_time.elapsed().as_secs() < NODE_STABILIZATION_TIME_SECS
145+
});
142146
if !is_creating_node
143147
&& (rebalancer.should_create_node(
144148
config.scaling.thresholds.credit_overload,
@@ -182,9 +186,21 @@ async fn run_cloud_offloading_delegated_orc(config: Config) -> anyhow::Result<()
182186
// We only delete a node if there are more nodes available
183187
if active_orc_nodes.len() > 1 {
184188
// If there's no node being created or emptied, check if we can find an underutilized node to delete
185-
let managed_cloud_node_ids: HashSet<String> = cloud_nodes.iter().map(|n| n.node_id.clone()).collect();
189+
// and consider only nodes that have been active for a certain stabilization time
190+
let stable_managed_node_ids: HashSet<String> = cloud_nodes
191+
.iter()
192+
.filter(|n| {
193+
if let Some(activation_time) = n.activation_time {
194+
activation_time.elapsed().as_secs() >= NODE_STABILIZATION_TIME_SECS
195+
} else {
196+
true // If activation_time is None, consider it stable (it was never active)
197+
}
198+
})
199+
.map(|n| n.node_id.clone())
200+
.collect();
201+
186202
if let Some(victim_id) = rebalancer.find_node_to_delete(
187-
&managed_cloud_node_ids,
203+
&stable_managed_node_ids,
188204
config.scaling.thresholds.cpu_low_percent,
189205
config.scaling.thresholds.mem_low_percent,
190206
) {

0 commit comments

Comments
 (0)