Skip to content

Commit c978522

Browse files
committed
graph: Add a setting to just simulate load manager decisions
1 parent f5c1c5a commit c978522

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

docs/environment-variables.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,7 @@ those.
127127
is resolved) If this variable is not set, no queries will ever be jailed,
128128
but they will still be subject to normal load management when the system
129129
is overloaded.
130+
- `GRAPH_LOAD_SIMULATE`: Perform all the steps that the load manager would
131+
given the other load management configuration settings, but never
132+
actually decline to run a query, instead log about load management
133+
decisions. Set to `true` to turn simulation on, defaults to `false`

graph/src/data/graphql/effort.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::time::{Duration, Instant};
1212
use crate::components::metrics::{Gauge, MetricsRegistry};
1313
use crate::components::store::PoolWaitStats;
1414
use crate::data::graphql::shape_hash::shape_hash;
15-
use crate::prelude::{info, o, warn, Logger};
15+
use crate::prelude::{debug, info, o, warn, Logger};
1616
use crate::util::stats::{MovingStats, BIN_SIZE, WINDOW_SIZE};
1717

1818
const ZERO_DURATION: Duration = Duration::from_millis(0);
@@ -51,6 +51,8 @@ lazy_static! {
5151
static ref LOAD_MANAGEMENT_DISABLED: bool = *LOAD_THRESHOLD == ZERO_DURATION;
5252

5353
static ref KILL_RATE_UPDATE_INTERVAL: Duration = Duration::from_millis(1000);
54+
55+
static ref SIMULATE: bool = env::var("GRAPH_LOAD_SIMULATE").is_ok();
5456
}
5557

5658
struct QueryEffort {
@@ -297,7 +299,7 @@ impl LoadManager {
297299
}
298300

299301
if self.jailed_queries.read().unwrap().contains(&shape_hash) {
300-
return true;
302+
return !*SIMULATE;
301303
}
302304

303305
let (overloaded, wait_ms) = self.overloaded();
@@ -326,17 +328,30 @@ impl LoadManager {
326328
// effort in an overload situation gets killed
327329
warn!(self.logger, "Jailing query";
328330
"query" => query,
331+
"wait_ms" => wait_ms.as_millis(),
329332
"query_effort_ms" => query_effort,
330333
"total_effort_ms" => total_effort,
331334
"ratio" => format!("{:.4}", query_effort/total_effort));
332335
self.jailed_queries.write().unwrap().insert(shape_hash);
333-
return true;
336+
return !*SIMULATE;
334337
}
335338

336339
// Kill random queries in case we have no queries, or not enough queries
337340
// that cause at least 20% of the effort
338341
let kill_rate = self.update_kill_rate(kill_rate, last_update, overloaded, wait_ms);
339-
thread_rng().gen_bool((kill_rate * query_effort / total_effort).min(1.0).max(0.0))
342+
let decline =
343+
thread_rng().gen_bool((kill_rate * query_effort / total_effort).min(1.0).max(0.0));
344+
if *SIMULATE && decline {
345+
debug!(self.logger, "Declining query";
346+
"query" => query,
347+
"wait_ms" => wait_ms.as_millis(),
348+
"query_effort_ms" => query_effort,
349+
"total_effort_ms" => total_effort,
350+
"kill_rate" => format!("{:.4}", query_effort/total_effort),
351+
);
352+
return false;
353+
}
354+
return decline;
340355
}
341356

342357
fn overloaded(&self) -> (bool, Duration) {

0 commit comments

Comments
 (0)