Skip to content

feat: add resource utilisation middleware to monitor CPU and memory. #1352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 21, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c3bdb12
feat: add resource utilization middleware to monitor CPU and memory u…
vkhinvasara Jun 17, 2025
4fb2da7
refactor: changed thresholds to 90%
vkhinvasara Jun 17, 2025
5ce1ab9
added resource monitoring with configurable CPU and memory thresholds
vkhinvasara Jun 18, 2025
b58212f
fix: use blocking task for resource usage retrieval and update memory…
vkhinvasara Jun 18, 2025
f6a2517
fix: update memory unit logging from GiB to GB for consistency
vkhinvasara Jun 18, 2025
891e881
refactor: replace RwLock with LazyLock and AtomicBool for speeeedddd
vkhinvasara Jun 18, 2025
f90714d
fix: add resource utilization middleware to ONLY the ingest routes
vkhinvasara Jun 18, 2025
cd8c029
Added resource check interval option and updated the resource monitor…
vkhinvasara Jun 19, 2025
4578d0c
refactor: changed the default interval to 15 seconds.
vkhinvasara Jun 19, 2025
53a4532
refactor: remove resource monitor initialization from ingest, query, …
vkhinvasara Jun 19, 2025
0279f36
Merge branch 'main' into main
nikhilsinhaparseable Jun 19, 2025
88b4596
refactor: add resource utilization middleware to logstream routes, de…
vkhinvasara Jun 19, 2025
5b142b7
refactor: removed resource_check from the PUT stream.
vkhinvasara Jun 19, 2025
5f0fa34
Merge branch 'main' into main
nikhilsinhaparseable Jun 20, 2025
da3583a
refactor: simplify uptime retrieval in Report::new method
vkhinvasara Jun 20, 2025
305ad25
refactor: glazing clippy
vkhinvasara Jun 20, 2025
1469507
empty push
vkhinvasara Jun 20, 2025
b083724
nit: checking if the build runs
vkhinvasara Jun 20, 2025
63e7a58
version change to not use cache for this build
vkhinvasara Jun 20, 2025
3262815
Merge branch 'main' into main
nikhilsinhaparseable Jun 21, 2025
4f1ae5c
fix: update cache key format in build workflow
vkhinvasara Jun 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/handlers/http/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub mod cluster;
pub mod correlation;
pub mod health_check;
pub mod ingest;
pub mod resource_check;
mod kinesis;
pub mod llm;
pub mod logstream;
Expand Down
3 changes: 2 additions & 1 deletion src/handlers/http/modal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use crate::{
utils::get_node_id,
};

use super::{audit, cross_origin_config, health_check, API_BASE_PATH, API_VERSION};
use super::{audit, cross_origin_config, health_check, resource_check, API_BASE_PATH, API_VERSION};

pub mod ingest;
pub mod ingest_server;
Expand Down Expand Up @@ -113,6 +113,7 @@ pub trait ParseableServer {
.wrap(prometheus.clone())
.configure(|config| Self::configure_routes(config, oidc_client.clone()))
.wrap(from_fn(health_check::check_shutdown_middleware))
.wrap(from_fn(resource_check::check_resource_utilization_middleware))
.wrap(from_fn(audit::audit_log_middleware))
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
Expand Down
76 changes: 76 additions & 0 deletions src/handlers/http/resource_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Parseable Server (C) 2022 - 2024 Parseable, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

use actix_web::{
body::MessageBody,
dev::{ServiceRequest, ServiceResponse},
error::Error,
error::ErrorServiceUnavailable,
middleware::Next,
};
use sysinfo::System;
use tracing::warn;

const CPU_UTILIZATION_THRESHOLD: f32 = 90.0;
const MEMORY_UTILIZATION_THRESHOLD: f32 = 90.0;

/// Middleware to check system resource utilization before processing requests
/// Returns 503 Service Unavailable if CPU or memory usage exceeds thresholds
pub async fn check_resource_utilization_middleware(
req: ServiceRequest,
next: Next<impl MessageBody>,
) -> Result<ServiceResponse<impl MessageBody>, Error> {

let mut sys = System::new_all();
sys.refresh_cpu_usage();
sys.refresh_memory();

let used_memory = sys.used_memory() as f32;
let total_memory = sys.total_memory() as f32;

// Check memory utilization
if total_memory > 0.0 {
let memory_usage = (used_memory / total_memory) * 100.0;
if memory_usage > MEMORY_UTILIZATION_THRESHOLD {
let error_msg = format!("Memory is over-utilized: {:.1}%", memory_usage);
warn!(
"Rejecting request to {} due to high memory usage: {:.1}% (threshold: {:.1}%)",
req.path(),
memory_usage,
MEMORY_UTILIZATION_THRESHOLD
);
return Err(ErrorServiceUnavailable(error_msg));
}
}

// Check CPU utilization
let cpu_usage = sys.global_cpu_usage();
if cpu_usage > CPU_UTILIZATION_THRESHOLD {
let error_msg = format!("CPU is over-utilized: {:.1}%", cpu_usage);
warn!(
"Rejecting request to {} due to high CPU usage: {:.1}% (threshold: {:.1}%)",
req.path(),
cpu_usage,
CPU_UTILIZATION_THRESHOLD
);
return Err(ErrorServiceUnavailable(error_msg));
}

// Continue processing the request if resource utilization is within limits
next.call(req).await
}
Loading