gh-138122: Fix sample counting for filtered profiling modes (#142677)

pablogsal · web-flow · commit 52daab111b60 · 2025-12-14T03:31:51.000Z
diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
@@ -395,11 +395,9 @@ def collect(self, stack_frames):
         if has_gc_frame:
             self.gc_frame_samples += 1
 
-        # Only count as successful if we actually processed frames
-        # This is important for modes like --mode exception where most samples
-        # may be filtered out at the C level
-        if frames_processed:
-            self.successful_samples += 1
+        # Count as successful - the sample worked even if no frames matched the filter
+        # (e.g., in --mode exception when no thread has an active exception)
+        self.successful_samples += 1
         self.total_samples += 1
 
         # Handle input on every sample for instant responsiveness
diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py
@@ -308,31 +308,21 @@ def draw_sample_stats(self, line, width, elapsed):
 
     def draw_efficiency_bar(self, line, width):
         """Draw sample efficiency bar showing success/failure rates."""
-        success_pct = (
-            self.collector.successful_samples
-            / max(1, self.collector.total_samples)
-        ) * 100
-        failed_pct = (
-            self.collector.failed_samples
-            / max(1, self.collector.total_samples)
-        ) * 100
+        # total_samples = successful_samples + failed_samples, so percentages add to 100%
+        total = max(1, self.collector.total_samples)
+        success_pct = (self.collector.successful_samples / total) * 100
+        failed_pct = (self.collector.failed_samples / total) * 100
 
         col = 0
         self.add_str(line, col, "Efficiency:", curses.A_BOLD)
         col += 11
 
-        label = f" {success_pct:>5.2f}% good, {failed_pct:>4.2f}% failed"
+        label = f" {success_pct:>5.2f}% good, {failed_pct:>5.2f}% failed"
         available_width = width - col - len(label) - 3
 
         if available_width >= MIN_BAR_WIDTH:
             bar_width = min(MAX_EFFICIENCY_BAR_WIDTH, available_width)
-            success_fill = int(
-                (
-                    self.collector.successful_samples
-                    / max(1, self.collector.total_samples)
-                )
-                * bar_width
-            )
+            success_fill = int((self.collector.successful_samples / total) * bar_width)
             failed_fill = bar_width - success_fill
 
             self.add_str(line, col, "[", curses.A_NORMAL)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py
@@ -267,21 +267,59 @@ def test_collect_with_frames(self):
         self.assertEqual(collector.failed_samples, 0)
 
     def test_collect_with_empty_frames(self):
-        """Test collect with empty frames."""
+        """Test collect with empty frames counts as successful.
+
+        A sample is considered successful if the profiler could read from the
+        target process, even if no frames matched the current filter (e.g.,
+        --mode exception when no thread has an active exception). The sample
+        itself worked; it just didn't produce frame data.
+        """
         collector = LiveStatsCollector(1000)
         thread_info = MockThreadInfo(123, [])
         interpreter_info = MockInterpreterInfo(0, [thread_info])
         stack_frames = [interpreter_info]
 
         collector.collect(stack_frames)
 
-        # Empty frames do NOT count as successful - this is important for
-        # filtered modes like --mode exception where most samples may have
-        # no matching data. Only samples with actual frame data are counted.
-        self.assertEqual(collector.successful_samples, 0)
+        # Empty frames still count as successful - the sample worked even
+        # though no frames matched the filter
+        self.assertEqual(collector.successful_samples, 1)
         self.assertEqual(collector.total_samples, 1)
         self.assertEqual(collector.failed_samples, 0)
 
+    def test_sample_counts_invariant(self):
+        """Test that total_samples == successful_samples + failed_samples.
+
+        Empty frame data (e.g., from --mode exception with no active exception)
+        still counts as successful since the profiler could read process state.
+        """
+        collector = LiveStatsCollector(1000)
+
+        # Mix of samples with and without frame data
+        frames = [MockFrameInfo("test.py", 10, "func")]
+        thread_with_frames = MockThreadInfo(123, frames)
+        thread_empty = MockThreadInfo(456, [])
+        interp_with_frames = MockInterpreterInfo(0, [thread_with_frames])
+        interp_empty = MockInterpreterInfo(0, [thread_empty])
+
+        # Collect various samples
+        collector.collect([interp_with_frames])  # Has frames
+        collector.collect([interp_empty])         # No frames (filtered)
+        collector.collect([interp_with_frames])  # Has frames
+        collector.collect([interp_empty])         # No frames (filtered)
+        collector.collect([interp_empty])         # No frames (filtered)
+
+        # All 5 samples are successful (profiler could read process state)
+        self.assertEqual(collector.total_samples, 5)
+        self.assertEqual(collector.successful_samples, 5)
+        self.assertEqual(collector.failed_samples, 0)
+
+        # Invariant must hold
+        self.assertEqual(
+            collector.total_samples,
+            collector.successful_samples + collector.failed_samples
+        )
+
     def test_collect_skip_idle_threads(self):
         """Test that idle threads are skipped when skip_idle=True."""
         collector = LiveStatsCollector(1000, skip_idle=True)
@@ -327,9 +365,10 @@ def test_collect_multiple_threads(self):
     def test_collect_filtered_mode_percentage_calculation(self):
         """Test that percentages use successful_samples, not total_samples.
 
-        This is critical for filtered modes like --mode exception where most
-        samples may be filtered out at the C level. The percentages should
-        be relative to samples that actually had frame data, not all attempts.
+        With the current behavior, all samples are considered successful
+        (the profiler could read from the process), even when filters result
+        in no frame data. This means percentages are relative to all sampling
+        attempts that succeeded in reading process state.
         """
         collector = LiveStatsCollector(1000)
 
@@ -338,35 +377,30 @@ def test_collect_filtered_mode_percentage_calculation(self):
         thread_with_data = MockThreadInfo(123, frames_with_data)
         interpreter_with_data = MockInterpreterInfo(0, [thread_with_data])
 
-        # Empty thread simulates filtered-out data
+        # Empty thread simulates filtered-out data at C level
         thread_empty = MockThreadInfo(456, [])
         interpreter_empty = MockInterpreterInfo(0, [thread_empty])
 
         # 2 samples with data
         collector.collect([interpreter_with_data])
         collector.collect([interpreter_with_data])
 
-        # 8 samples without data (filtered out)
+        # 8 samples without data (filtered out at C level, but sample still succeeded)
         for _ in range(8):
             collector.collect([interpreter_empty])
 
-        # Verify counts
+        # All 10 samples are successful - the profiler could read from the process
         self.assertEqual(collector.total_samples, 10)
-        self.assertEqual(collector.successful_samples, 2)
+        self.assertEqual(collector.successful_samples, 10)
 
         # Build stats and check percentage
         stats_list = collector.build_stats_list()
         self.assertEqual(len(stats_list), 1)
 
-        # The function appeared in 2 out of 2 successful samples = 100%
-        # NOT 2 out of 10 total samples = 20%
+        # The function appeared in 2 out of 10 successful samples = 20%
         location = ("test.py", 10, "exception_handler")
         self.assertEqual(collector.result[location]["direct_calls"], 2)
 
-        # Verify the percentage calculation in build_stats_list
-        # direct_calls / successful_samples * 100 = 2/2 * 100 = 100%
-        # This would be 20% if using total_samples incorrectly
-
     def test_percentage_values_use_successful_samples(self):
         """Test that percentages are calculated from successful_samples.