Skip to content

FEAT: Resource Utilization Checks before ingestion. #1351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/handlers/http/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub mod cluster;
pub mod correlation;
pub mod health_check;
pub mod ingest;
pub mod resource_check;
mod kinesis;
pub mod llm;
pub mod logstream;
Expand Down
3 changes: 2 additions & 1 deletion src/handlers/http/modal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use crate::{
utils::get_node_id,
};

use super::{audit, cross_origin_config, health_check, API_BASE_PATH, API_VERSION};
use super::{audit, cross_origin_config, health_check, resource_check, API_BASE_PATH, API_VERSION};

pub mod ingest;
pub mod ingest_server;
Expand Down Expand Up @@ -113,6 +113,7 @@ pub trait ParseableServer {
.wrap(prometheus.clone())
.configure(|config| Self::configure_routes(config, oidc_client.clone()))
.wrap(from_fn(health_check::check_shutdown_middleware))
.wrap(from_fn(resource_check::check_resource_utilization_middleware))
.wrap(from_fn(audit::audit_log_middleware))
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
Expand Down
76 changes: 76 additions & 0 deletions src/handlers/http/resource_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Parseable Server (C) 2022 - 2024 Parseable, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

use actix_web::{
body::MessageBody,
dev::{ServiceRequest, ServiceResponse},
error::Error,
error::ErrorServiceUnavailable,
middleware::Next,
};
use sysinfo::System;
use tracing::warn;

const CPU_UTILIZATION_THRESHOLD: f32 = 50.0;
const MEMORY_UTILIZATION_THRESHOLD: f32 = 50.0;

/// Middleware to check system resource utilization before processing requests
/// Returns 503 Service Unavailable if CPU or memory usage exceeds thresholds
pub async fn check_resource_utilization_middleware(
req: ServiceRequest,
next: Next<impl MessageBody>,
) -> Result<ServiceResponse<impl MessageBody>, Error> {

let mut sys = System::new_all();
sys.refresh_cpu_usage();
sys.refresh_memory();

let used_memory = sys.used_memory() as f32;
let total_memory = sys.total_memory() as f32;

// Check memory utilization
if total_memory > 0.0 {
let memory_usage = (used_memory / total_memory) * 100.0;
if memory_usage > MEMORY_UTILIZATION_THRESHOLD {
let error_msg = format!("Memory is over-utilized: {:.1}%", memory_usage);
warn!(
"Rejecting request to {} due to high memory usage: {:.1}% (threshold: {:.1}%)",
req.path(),
memory_usage,
MEMORY_UTILIZATION_THRESHOLD
);
return Err(ErrorServiceUnavailable(error_msg));
}
}

// Check CPU utilization
let cpu_usage = sys.global_cpu_usage();
if cpu_usage > CPU_UTILIZATION_THRESHOLD {
let error_msg = format!("CPU is over-utilized: {:.1}%", cpu_usage);
warn!(
"Rejecting request to {} due to high CPU usage: {:.1}% (threshold: {:.1}%)",
req.path(),
cpu_usage,
CPU_UTILIZATION_THRESHOLD
);
return Err(ErrorServiceUnavailable(error_msg));
}

// Continue processing the request if resource utilization is within limits
next.call(req).await
}
Loading