fix(stats): Complete PR review fixes for statistics implementation

TexasCoding · TexasCoding · commit f3da28a1ea0f · 2025-08-17T21:19:19.000-05:00
- Added full try-catch wrapper to _calculate_cross_metrics with safe defaults
- Enhanced health score bounds checking with explicit min/max clamping (0-100)
- Optimized memory calculation with sampling for large collections (&gt;100 items)
- Fixed cache_hit_rate safe division to prevent division by zero
- Fixed linting issues (variable naming convention)

These changes complete the high-priority fixes identified in PR review that were
partially implemented in the previous commit.
diff --git a/src/project_x_py/utils/enhanced_stats_tracking.py b/src/project_x_py/utils/enhanced_stats_tracking.py
@@ -460,9 +460,10 @@ def _calculate_memory_usage(self) -> float:
             Memory usage in MB
         """
         size = 0
+        max_items_to_sample = 100  # Sample limit for large collections
 
-        # Check common attributes
-        attrs_to_check = [
+        # Priority attributes to always check
+        priority_attrs = [
             "_error_history",
             "_error_types",
             "_api_timings",
@@ -471,7 +472,10 @@ def _calculate_memory_usage(self) -> float:
             "_network_stats",
             "_data_quality",
             "_component_stats",
-            # Component-specific attributes
+        ]
+
+        # Component-specific attributes (check only if they exist)
+        component_attrs = [
             "tracked_orders",
             "order_status_cache",
             "position_orders",
@@ -486,19 +490,52 @@ def _calculate_memory_usage(self) -> float:
             "_position_history",
         ]
 
-        for attr_name in attrs_to_check:
+        # Check priority attributes fully
+        for attr_name in priority_attrs:
             if hasattr(self, attr_name):
                 attr = getattr(self, attr_name)
                 size += sys.getsizeof(attr)
 
-                # For collections, also count items
+                # For small collections, count all items
                 if isinstance(attr, list | dict | set | deque):
                     try:
-                        for item in attr.values() if isinstance(attr, dict) else attr:
-                            size += sys.getsizeof(item)
+                        items = attr.values() if isinstance(attr, dict) else attr
+                        item_count = len(items) if hasattr(items, "__len__") else 0
+
+                        if item_count <= max_items_to_sample:
+                            # Count all items for small collections
+                            for item in items:
+                                size += sys.getsizeof(item)
+                        else:
+                            # Sample for large collections
+                            sample_size = 0
+                            for i, item in enumerate(items):
+                                if i >= max_items_to_sample:
+                                    break
+                                sample_size += sys.getsizeof(item)
+                            # Estimate total size based on sample
+                            avg_item_size = sample_size / max_items_to_sample
+                            size += int(avg_item_size * item_count)
                     except (AttributeError, TypeError):
                         pass
 
+        # For component-specific attributes, use sampling for performance
+        for attr_name in component_attrs:
+            if hasattr(self, attr_name):
+                attr = getattr(self, attr_name)
+                size += sys.getsizeof(attr)
+
+                # Only sample large component collections
+                if isinstance(attr, dict) and len(attr) > max_items_to_sample:
+                    # Sample a subset
+                    sample_size = 0
+                    for i, (k, v) in enumerate(attr.items()):
+                        if i >= 10:  # Small sample for component attrs
+                            break
+                        sample_size += sys.getsizeof(k) + sys.getsizeof(v)
+                    # Rough estimate
+                    size += (sample_size // 10) * len(attr)
+
         return size / (1024 * 1024)
 
     def _calculate_percentile(
diff --git a/src/project_x_py/utils/statistics_aggregator.py b/src/project_x_py/utils/statistics_aggregator.py
@@ -548,45 +548,59 @@ async def _calculate_cross_metrics(
         Returns:
             Statistics with cross-component metrics added
         """
-        # Calculate total memory usage across all components
-        total_memory = sum(
-            comp.get("memory_usage_mb", 0)
-            for comp in stats.get("components", {}).values()
-        )
-        stats["memory_usage_mb"] = total_memory
+        try:
+            # Calculate total memory usage across all components
+            total_memory = sum(
+                comp.get("memory_usage_mb", 0)
+                for comp in stats.get("components", {}).values()
+            )
+            stats["memory_usage_mb"] = max(0, total_memory)  # Ensure non-negative
 
-        # Calculate total error count
-        total_errors = sum(
-            comp.get("error_count", 0) for comp in stats.get("components", {}).values()
-        )
-        stats["total_errors"] = total_errors
+            # Calculate total error count
+            total_errors = sum(
+                comp.get("error_count", 0)
+                for comp in stats.get("components", {}).values()
+            )
+            stats["total_errors"] = max(0, total_errors)  # Ensure non-negative
 
-        # Calculate overall health score (0-100)
-        health_score = 100.0
+            # Calculate overall health score (0-100) with bounds checking
+            health_score = 100.0
 
-        # Deduct for errors
-        if total_errors > 0:
-            health_score -= min(20, total_errors * 2)
+            # Deduct for errors (max 20 points)
+            if total_errors > 0:
+                health_score -= min(20, total_errors * 2)
 
-        # Deduct for disconnected components
-        disconnected = sum(
-            1
-            for comp in stats.get("components", {}).values()
-            if comp.get("status") != "connected" and comp.get("status") != "active"
-        )
-        if disconnected > 0:
-            health_score -= disconnected * 10
+            # Deduct for disconnected components (max 30 points)
+            disconnected = sum(
+                1
+                for comp in stats.get("components", {}).values()
+                if comp.get("status") != "connected" and comp.get("status") != "active"
+            )
+            if disconnected > 0:
+                health_score -= min(30, disconnected * 10)
 
-        # Deduct for high memory usage (>500MB total)
-        if total_memory > 500:
-            health_score -= min(20, (total_memory - 500) / 50)
+            # Deduct for high memory usage (>500MB total, max 20 points)
+            if total_memory > 500:
+                memory_penalty = min(20, (total_memory - 500) / 50)
+                health_score -= memory_penalty
 
-        # Deduct for poor cache performance
-        cache_hit_rate = stats.get("cache_hit_rate", 0)
-        if cache_hit_rate < 0.5:
-            health_score -= (0.5 - cache_hit_rate) * 20
+            # Deduct for poor cache performance (max 10 points)
+            cache_hit_rate = stats.get("cache_hit_rate", 0)
+            # Ensure cache_hit_rate is between 0 and 1
+            cache_hit_rate = max(0.0, min(1.0, cache_hit_rate))
+            if cache_hit_rate < 0.5:
+                cache_penalty = min(10, (0.5 - cache_hit_rate) * 20)
+                health_score -= cache_penalty
 
-        stats["health_score"] = max(0, health_score)
+            # Ensure health score is within bounds [0, 100]
+            stats["health_score"] = max(0.0, min(100.0, health_score))
+
+        except Exception as e:
+            logger.error(f"Error calculating cross-component metrics: {e}")
+            # Set safe defaults on error
+            stats["health_score"] = 0.0
+            stats["total_errors"] = stats.get("total_errors", 0)
+            stats["memory_usage_mb"] = stats.get("memory_usage_mb", 0.0)
 
         return stats