ChrsMark
diff --git a/‎.chloggen/adaptive-window-sizing.yaml‎
Lines changed: 32 additions & 0 deletions b/‎.chloggen/adaptive-window-sizing.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎processor/isolationforestprocessor/README.md‎
Lines changed: 28 additions & 5 deletions b/‎processor/isolationforestprocessor/README.md‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎processor/isolationforestprocessor/config.go‎
Lines changed: 93 additions & 0 deletions b/‎processor/isolationforestprocessor/config.go‎
Lines changed: 93 additions & 0 deletions
@@ -0,0 +1,32 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: isolationforestprocessor
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Add adaptive window sizing feature that automatically adjusts window size based on traffic patterns, memory usage, and model stability
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [42751]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: |
+  The adaptive window sizing feature enables dynamic adjustment of the isolation forest sliding window size based on:
+  - Traffic velocity and throughput patterns
+  - Memory usage and resource constraints  
+  - Model stability and performance metrics
+  This enhancement improves resource utilization and anomaly detection accuracy for varying workload patterns.
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
@@ -24,6 +24,7 @@ The **Isolation Forest processor** adds inline, unsupervised anomaly detection t
 | **Realtime Isolation Forest** | Builds an ensemble of random trees over a sliding window of recent data and assigns a 0–1 anomaly score on ingestion (≈ *O(log n)* per point).      |
 | **Multi‑signal support**      | Can be inserted into **traces**, **metrics**, **logs** pipelines – one config powers all three.                                                     |
 | **Per‑entity modelling**      | `features` config lets you maintain a separate model per unique combination of resource / attribute keys (e.g. per‑pod, per‑service).               |
+| **Adaptive Window Sizing**    | Automatically adjusts window size based on traffic patterns, memory usage, and model stability for optimal performance and resource utilization.    |
 | **Flexible output**           | • Add an attribute `iforest.is_anomaly=true` <br>• Emit a gauge metric `iforest.anomaly_score` <br>• Drop anomalous telemetry entirely.             |
 | **Config‑driven**             | Tune tree count, subsample size, contamination rate, sliding‑window length, retraining interval, target metrics, and more – all in `collector.yml`. |
 | **Zero external deps**        | Pure Go implementation; runs wherever the Collector does (edge, gateway, or backend).                                                               |
@@ -35,7 +36,8 @@ The **Isolation Forest processor** adds inline, unsupervised anomaly detection t
 1. **Training window** – The processor keeps up to `window_size` of the most recent data points for every feature‑group.
 2. **Periodic (re‑)training** – Every `training_interval`, it draws `subsample_size` points from that window and grows `forest_size` random isolation trees.
 3. **Scoring** – Each new point is pushed through the forest. Shorter average path length ⇒ higher anomaly score.
-4. **Post‑processing** –
+4. **Adaptive sizing** – When enabled, window size automatically adjusts based on traffic velocity, memory usage, and model stability.
+5. **Post‑processing** –
 
    * If `add_anomaly_score: true`, a gauge metric `iforest.anomaly_score` is emitted with identical attributes/timestamp.
    * If the score ≥ `anomaly_threshold`, the original span/metric/log is flagged with `iforest.is_anomaly=true`.
@@ -61,6 +63,21 @@ Performance is linear in `forest_size` and logarithmic in `window_size`; a defau
 | `metrics_to_analyze`  | \[]string   | `[]`      | Only these metric names are scored (metrics pipeline only). Blank ⇒ all.       |
 | `add_anomaly_score`   | bool        | `false`   | Emit `iforest.anomaly_score` metric.                                           |
 | `drop_anomalous_data` | bool        | `false`   | Remove anomalous items from the batch instead of forwarding.                   |
+| `adaptive_window`     | object      | `null`    | Enables adaptive window sizing (see Adaptive Window section below).            |
+
+### 🔄 Adaptive Window Configuration
+
+When enabled, the processor automatically adjusts window size based on traffic patterns and resource constraints:
+
+| Field                      | Type     | Default | Notes                                                    |
+| -------------------------- | -------- | ------- | -------------------------------------------------------- |
+| `enabled`                  | bool     | `false` | Enable adaptive window sizing.                          |
+| `min_window_size`          | int      | `1000`  | Minimum window size (safety bound).                     |
+| `max_window_size`          | int      | `100000`| Maximum window size (memory protection).                |
+| `memory_limit_mb`          | int      | `256`   | Shrink window when memory usage exceeds this limit.     |
+| `adaptation_rate`          | float    | `0.1`   | Rate of window size changes (0.0-1.0).                  |
+| `velocity_threshold`       | float    | `50.0`  | Samples/sec threshold for triggering window growth.     |
+| `stability_check_interval` | duration | `5m`    | How often to evaluate model stability for expansion.    |
 
 See the sample below for context.
 
@@ -120,7 +137,7 @@ service:
  
 ### What the example does
 
-| Signal      | What’s scored                                              | Feature grouping               | Output                                    | Notes                                                                                            |
+| Signal      | What’s scored                                              | Feature grouping               | Output                                    | Notes                                                                                          |
 | ----------- | ---------------------------------------------------------- | ------------------------------ | ----------------------------------------- | ------------------------------------------------------------------------------------------------ |
 | **Traces**  | Span **duration** (ns)                                     | `service.name`, `k8s.pod.name` | `iforest.is_anomaly` attr + optional drop | Use a span/trace exporter to route anomalies.                                                    |
 | **Metrics** | Only `system.cpu.utilization`, `system.memory.utilization` | Same                           | Attribute + score metric                  | The score appears as `iforest.anomaly_score` gauge.                                              |
@@ -133,6 +150,7 @@ service:
 * **Tune `forest_size` vs. latency** – start with 100 trees; raise to 200–300 if scores look noisy.
 * **Use per‑entity models** – add `features` (service, pod, host) to avoid global comparisons across very different series.
 * **Let contamination drive threshold** – set `contamination_rate` to the % of traffic you’re comfortable labelling outlier; avoid hand‑tuning `anomaly_threshold`.
+* **Use adaptive window sizing** – enable for dynamic workloads; the processor will automatically grow windows during high traffic and shrink under memory pressure.
 * **Route anomalies** – keep `drop_anomalous_data=false` and add a simple \[routing‑processor] downstream to ship anomalies to a dedicated exporter or topic.
 * **Monitor model health** – the emitted `iforest.anomaly_score` metric is perfect for a Grafana panel; watch its distribution and adapt window / contamination accordingly.
 
@@ -147,21 +165,26 @@ service:
                │  • Sliding window (per feature‑group)             │
                │  • Forest of N trees (per feature‑group)          │
 Telemetry ───▶ │  • Score calculator & anomaly decision            │ ───▶  Next processor/exporter
+               │  • Adaptive window sizing (optional)              │
                └───────────────────────────────────────────────────┘
 ```
 
-*Training cost*: **O(window\_size × forest\_size × log subsample\_size)** every `training_interval`
-*Scoring cost*: **O(forest\_size × log subsample\_size)** per item
+
+*Training cost*: **O(current_window_size × forest_size × log subsample_size)** every `training_interval`
+*Scoring cost*: **O(forest_size × log subsample_size)** per item
+
+**Note:** With adaptive window sizing enabled, `current_window_size` dynamically adjusts between `min_window_size` and `max_window_size` based on traffic patterns and memory constraints, making training costs adaptive to workload conditions.
+
 
 ---
 
 ## 🤝 Contributing
 
 * **Bugs / Questions** – please open an issue in the fork first.
+* **Recently added**: Adaptive window sizing for dynamic traffic patterns.
 * **Planned enhancements**
 
   * Multivariate scoring (multiple numeric attributes per point).
-  * Adaptive window size.
   * Expose Prometheus counters for training time / CPU cost.
 
 PRs welcome – please include unit tests and doc updates.
 
@@ -28,6 +28,23 @@ type Config struct {
 	Features                FeatureConfig     `mapstructure:"features"`
 	Models                  []ModelConfig     `mapstructure:"models"`
 	Performance             PerformanceConfig `mapstructure:"performance"`
+
+	// Adaptive window sizing configuration
+	AdaptiveWindow *AdaptiveWindowConfig `mapstructure:"adaptive_window"`
+}
+
+// AdaptiveWindowConfig configures automatic window size adjustment based on traffic patterns
+type AdaptiveWindowConfig struct {
+	// Core configuration
+	Enabled        bool    `mapstructure:"enabled"`         // Enable adaptive sizing
+	MinWindowSize  int     `mapstructure:"min_window_size"` // Minimum samples to keep
+	MaxWindowSize  int     `mapstructure:"max_window_size"` // Maximum samples (memory protection)
+	MemoryLimitMB  int     `mapstructure:"memory_limit_mb"` // Auto-shrink when exceeded
+	AdaptationRate float64 `mapstructure:"adaptation_rate"` // Adjustment speed (0.0-1.0)
+
+	// Optional parameters with defaults
+	VelocityThreshold      float64 `mapstructure:"velocity_threshold"`       // Grow when >N samples/sec
+	StabilityCheckInterval string  `mapstructure:"stability_check_interval"` // Check model accuracy interval
 }
 
 type FeatureConfig struct {
@@ -79,6 +96,17 @@ func createDefaultConfig() component.Config {
 			BatchSize:       1000,
 			ParallelWorkers: 4,
 		},
+
+		// Default adaptive window configuration (disabled by default for backward compatibility)
+		AdaptiveWindow: &AdaptiveWindowConfig{
+			Enabled:                false,  // Disabled by default - backward compatibility
+			MinWindowSize:          1000,   // Match MinSamples for consistency
+			MaxWindowSize:          100000, // Reasonable upper bound
+			MemoryLimitMB:          256,    // Half of total processor memory
+			AdaptationRate:         0.1,    // Conservative adjustment speed
+			VelocityThreshold:      50,     // Default growth threshold
+			StabilityCheckInterval: "5m",   // Check model stability every 5 minutes
+		},
 	}
 }
 
@@ -118,9 +146,74 @@ func (cfg *Config) Validate() error {
 		return errors.New("at least one feature type must be configured")
 	}
 
+	// Validate adaptive window configuration
+	if cfg.AdaptiveWindow != nil {
+		if err := cfg.validateAdaptiveWindow(); err != nil {
+			return fmt.Errorf("adaptive_window validation failed: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// validateAdaptiveWindow validates the adaptive window configuration
+func (cfg *Config) validateAdaptiveWindow() error {
+	aw := cfg.AdaptiveWindow
+
+	if aw.MinWindowSize <= 0 {
+		return errors.New("min_window_size must be positive")
+	}
+
+	if aw.MaxWindowSize <= aw.MinWindowSize {
+		return errors.New("max_window_size must be greater than min_window_size")
+	}
+
+	// Ensure consistency with main config
+	if aw.MinWindowSize < cfg.MinSamples {
+		return fmt.Errorf("adaptive_window.min_window_size (%d) should be >= min_samples (%d) for consistency",
+			aw.MinWindowSize, cfg.MinSamples)
+	}
+
+	if aw.MemoryLimitMB <= 0 {
+		return errors.New("memory_limit_mb must be positive")
+	}
+
+	// Memory limit should be reasonable compared to total processor memory
+	if aw.MemoryLimitMB > cfg.Performance.MaxMemoryMB {
+		return fmt.Errorf("adaptive_window.memory_limit_mb (%d) should not exceed performance.max_memory_mb (%d)",
+			aw.MemoryLimitMB, cfg.Performance.MaxMemoryMB)
+	}
+
+	if aw.AdaptationRate < 0.0 || aw.AdaptationRate > 1.0 {
+		return errors.New("adaptation_rate must be between 0.0 and 1.0")
+	}
+
+	if aw.VelocityThreshold < 0 {
+		return errors.New("velocity_threshold must be non-negative")
+	}
+
+	if aw.StabilityCheckInterval != "" {
+		if _, err := time.ParseDuration(aw.StabilityCheckInterval); err != nil {
+			return fmt.Errorf("stability_check_interval is not a valid duration: %w", err)
+		}
+	}
+
 	return nil
 }
 
+// IsAdaptiveWindowEnabled returns true if adaptive window sizing is enabled
+func (cfg *Config) IsAdaptiveWindowEnabled() bool {
+	return cfg.AdaptiveWindow != nil && cfg.AdaptiveWindow.Enabled
+}
+
+// GetStabilityCheckInterval returns the stability check interval duration
+func (cfg *Config) GetStabilityCheckInterval() (time.Duration, error) {
+	if cfg.AdaptiveWindow == nil || cfg.AdaptiveWindow.StabilityCheckInterval == "" {
+		return 5 * time.Minute, nil // Default
+	}
+	return time.ParseDuration(cfg.AdaptiveWindow.StabilityCheckInterval)
+}
+
 func (cfg *Config) GetTrainingWindowDuration() (time.Duration, error) {
 	return time.ParseDuration(cfg.TrainingWindow)
 }