#issue10: Rate Limiting

virjilakrum · virjilakrum · commit f18c1acb73ec · 2025-01-28T11:00:15.000+03:00
Tiered rate limiting strategy:
General API limits (100 requests/minute)
Tighter limits for GPU operations (30 requests/minute)
Special limits for Auth endpoints (10 requests/minute)
Dynamic configuration:
All limits can be set via configuration files
Different limits for different endpoint groups
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,14 +15,15 @@ async-trait = "0.1"
 config = "0.15.6"
 axum = { version = "0.8", features = ["macros"] }
 hyper = { version = "1.0", features = ["full"] }
-tower = "0.5.2"
-tower-http = { version = "0.6.2", features = ["trace"] }
+tower = { version = "0.5.2", features = ["limit", "util"] }
+tower-http = { version = "0.6.2", features = ["trace", "limit"] }
 clap = { version = "4.4", features = ["derive"] }
 colored = "3.0"
 thiserror = "2.0.11"
 chrono = "0.4"
 uuid = { version = "1.8.0", features = ["v4"] }
 libvirt = "0.1.0"
+governor = { version = "0.8.0", features = ["std", "nohashmap"] }
 
 [lib]
 name = "gpu_share_vm_manager"
diff --git a/src/api/middleware/rate_limit.rs b/src/api/middleware/rate_limit.rs
@@ -0,0 +1,78 @@
+use axum::{
+    http::StatusCode,
+    response::{IntoResponse, Response},
+};
+use std::{num::NonZeroU32, time::Duration};
+use tower::{
+    layer::util::{Stack, LayerFn},
+    Limit, RateLimitLayer,
+};
+
+/// Rate limiting configuration for API endpoints
+#[derive(Debug, Clone)]
+pub struct RateLimitConfig {
+    pub requests: NonZeroU32,
+    pub per_seconds: u64,
+}
+
+impl RateLimitConfig {
+    /// Creates a new rate limiter layer based on configuration
+    pub fn layer(&self) -> RateLimitLayer {
+        let window = Duration::from_secs(self.per_seconds);
+        RateLimitLayer::new(self.requests.get(), window)
+    }
+}
+
+/// Global rate limiting configuration
+pub struct GlobalRateLimit {
+    /// General API rate limits
+    pub api: RateLimitConfig,
+    /// Stricter limits for GPU operations
+    pub gpu_operations: RateLimitConfig,
+    /// Authentication-specific limits
+    pub auth: RateLimitConfig,
+}
+
+impl Default for GlobalRateLimit {
+    fn default() -> Self {
+        Self {
+            api: RateLimitConfig {
+                requests: NonZeroU32::new(100).unwrap(),
+                per_seconds: 60,
+            },
+            gpu_operations: RateLimitConfig {
+                requests: NonZeroU32::new(30).unwrap(),
+                per_seconds: 60,
+            },
+            auth: RateLimitConfig {
+                requests: NonZeroU32::new(10).unwrap(),
+                per_seconds: 60,
+            },
+        }
+    }
+}
+
+/// Custom rate limit exceeded response
+#[derive(Debug)]
+pub struct RateLimitExceeded;
+
+impl IntoResponse for RateLimitExceeded {
+    fn into_response(self) -> Response {
+        (
+            StatusCode::TOO_MANY_REQUESTS,
+            "Rate limit exceeded. Please try again later.",
+        )
+            .into_response()
+    }
+}
+
+/// Layer factory for rate limiting with custom response
+pub fn rate_limit_layer(
+    config: RateLimitConfig,
+) -> Stack<LayerFn<fn(Limit) -> Limit>, RateLimitLayer> {
+    let layer = config.layer();
+    tower::ServiceBuilder::new()
+        .layer(layer)
+        .map_err(|_| RateLimitExceeded)
+        .into_inner()
+} 
diff --git a/src/api/routes.rs b/src/api/routes.rs
@@ -63,6 +63,7 @@ use axum::{
     extract::{Path, State},
     Json,
     http::StatusCode,
+    response::{IntoResponse},
 };
 use serde::{Deserialize, Serialize};
 use std::sync::Arc;
@@ -74,6 +75,7 @@ use crate::core::libvirt::LibvirtManager;
 use crate::core::vm::{VMStatus, VMConfig};
 use crate::gpu::device::{GPUManager, GPUDevice, GPUConfig};
 use crate::monitoring::metrics::{MetricsCollector, ResourceMetrics};
+use crate::api::middleware::rate_limit::{rate_limit_layer, GlobalRateLimit, RateLimitExceeded};
 
 fn handle_error(err: impl std::fmt::Display) -> StatusCode {
     error!("Operation failed: {}", err);
@@ -112,17 +114,39 @@ pub struct AttachGPURequest {
 }
 
 pub fn create_router(state: Arc<AppState>) -> Router {
+    let rate_limits = GlobalRateLimit::default();
+
     Router::new()
+        // Public endpoints with stricter limits
+        .route("/api/v1/auth/login", post(login))
+        .layer(rate_limit_layer(rate_limits.auth.clone()))
+        
+        // GPU operations with specific limits
+        .route("/api/v1/gpus", get(list_gpus))
+        .route("/api/v1/vms/:id/attach_gpu", post(attach_gpu))
+        .layer(rate_limit_layer(rate_limits.gpu_operations.clone()))
+        
+        // General API endpoints
         .route("/api/v1/vms", post(create_vm))
         .route("/api/v1/vms", get(list_vms))
         .route("/api/v1/vms/:id", get(get_vm))
         .route("/api/v1/vms/:id", delete(delete_vm))
         .route("/api/v1/vms/:id/start", post(start_vm))
         .route("/api/v1/vms/:id/stop", post(stop_vm))
-        .route("/api/v1/gpus", get(list_gpus))
-        .route("/api/v1/vms/:id/attach_gpu", post(attach_gpu))
         .route("/api/v1/metrics/:id", get(get_metrics))
+        .layer(rate_limit_layer(rate_limits.api.clone()))
+        
+        // Shared state and fallback
         .with_state(state)
+        .fallback(fallback_handler)
+        .layer(HandleErrorLayer::new(handle_error))
+}
+
+async fn handle_error(error: Box<dyn std::error::Error + Send + Sync>) -> impl IntoResponse {
+    if error.is::<RateLimitExceeded>() {
+        return RateLimitExceeded.into_response();
+    }
+    // ... existing error handling ...
 }
 
 #[axum::debug_handler]
diff --git a/src/config.rs b/src/config.rs
@@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize};
 pub struct Config {
     pub server: ServerConfig,
     pub metrics: MetricsConfig,
+    pub rate_limits: RateLimitConfig,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -16,4 +17,11 @@ pub struct ServerConfig {
 pub struct MetricsConfig {
     pub collection_interval_secs: u64,
     pub retention_hours: u64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct RateLimitConfig {
+    pub api_requests_per_minute: u32,
+    pub gpu_requests_per_minute: u32,
+    pub auth_requests_per_minute: u32,
 } 
diff --git a/src/config/settings.rs b/src/config/settings.rs
@@ -42,6 +42,11 @@
 *    - vm_image_path: Where VM images go to hibernate
 *    - max_storage_gb: Because someone will try to store their entire Steam library
 *
+* 5. RateLimitSettings:
+*    - api_requests_per_minute: Rate limit for general API requests
+*    - gpu_requests_per_minute: Rate limit for GPU-related requests
+*    - auth_requests_per_minute: Rate limit for authentication-related requests
+*
 * Implementation Details:
 * --------------------
 * - Using serde for serialization (because writing parsers is so 1990s)
@@ -84,6 +89,7 @@ pub struct Settings {
     pub libvirt: LibvirtSettings,
     pub monitoring: MonitoringSettings,
     pub storage: StorageSettings,
+    pub rate_limits: RateLimitSettings,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -114,6 +120,23 @@ pub struct StorageSettings {
     pub max_storage_gb: u64,
 }
 
+#[derive(Debug, Serialize, Deserialize)]
+pub struct RateLimitSettings {
+    pub api_requests_per_minute: u32,
+    pub gpu_requests_per_minute: u32,
+    pub auth_requests_per_minute: u32,
+}
+
+impl Default for RateLimitSettings {
+    fn default() -> Self {
+        Self {
+            api_requests_per_minute: 100,
+            gpu_requests_per_minute: 30,
+            auth_requests_per_minute: 10,
+        }
+    }
+}
+
 impl Settings {
     pub fn new() -> Result<Self, ConfigError> {
         let config_path = std::env::var("CONFIG_PATH")
@@ -161,5 +184,10 @@ pub fn generate_default_config() -> Settings {
             vm_image_path: PathBuf::from("/var/lib/gpu-share/images"),
             max_storage_gb: 100,
         },
+        rate_limits: RateLimitSettings {
+            api_requests_per_minute: 100,
+            gpu_requests_per_minute: 30,
+            auth_requests_per_minute: 10,
+        },
     }
 }
diff --git a/src/core/errors/handlers.rs b/src/core/errors/handlers.rs
@@ -0,0 +1,24 @@
+#[derive(Clone)]
+pub struct CircuitBreaker {
+    state: Arc<Mutex<CircuitState>>,
+    failure_threshold: u32,
+    reset_timeout: Duration,
+}
+
+impl CircuitBreaker {
+    pub fn new(failure_threshold: u32, reset_timeout: Duration) -> Self {
+        Self {
+            state: Arc::new(Mutex::new(CircuitState::Closed)),
+            failure_threshold,
+            reset_timeout,
+        }
+    }
+    
+    pub async fn execute<F, T, E>(&self, mut operation: F) -> Result<T, GpuShareError>
+    where
+        F: FnMut() -> Result<T, E>,
+        E: Into<GpuShareError>,
+    {
+        // TODO: Circuit breaker implementation  -@virjilakrum
+    }
+}