From 84d833d850e1aec4e0b46f610a957b87055878ef Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 14:42:45 -0700
Subject: [PATCH 1/6] fix(ci): Enable CPU profiling and add dotnet tools to
 PATH

- Add dotnet tools directory to PATH for subsequent steps
- Switch from --providers to --profile cpu-sampling for actual CPU profiling
- This generates flamegraphs with real stack traces instead of empty traces

Previous approach only captured runtime events (GC, JIT) without CPU samples.
The cpu-sampling profile includes Microsoft-DotNETCore-SampleProfiler which
captures stack traces at 1ms intervals, enabling proper performance analysis.

Tested locally with act - generates 22KB trace with actual profiling data.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/workflows/dotnet.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml
index f69dcdc..7dc73db 100644
--- a/.github/workflows/dotnet.yml
+++ b/.github/workflows/dotnet.yml
@@ -102,6 +102,8 @@ jobs:
           dotnet-version: 8.0.x
       - name: Install dotnet-trace
         run: dotnet tool install --global dotnet-trace
+      - name: Add dotnet tools to PATH
+        run: echo "$HOME/.dotnet/tools" >> $GITHUB_PATH
       - name: Restore Dependencies
         run: dotnet restore ProbotSharp.sln
       - name: Restore HelloWorldBot
@@ -146,7 +148,7 @@ jobs:
         run: |
           echo "Collecting trace from HelloWorldBot process..."
           echo "TMPDIR is set to: $TMPDIR"
-          dotnet-trace collect --name HelloWorldBot --providers Microsoft-Windows-DotNETRuntime --duration 00:00:30 -o trace.nettrace || (echo "Trace collection failed!" && exit 1)
+          dotnet-trace collect --name HelloWorldBot --profile cpu-sampling --duration 00:00:30 -o trace.nettrace || (echo "Trace collection failed!" && exit 1)
       - name: Convert trace to Speedscope format
         run: dotnet-trace convert trace.nettrace --format Speedscope
       - name: Upload flamegraph artifact

From fcfc08df4308dcaa77d8bb8d31ababffe00c8d1b Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 18:39:05 -0700
Subject: [PATCH 2/6] feat(ci): Enhance performance tracing with metrics
 analysis and historical tracking

- Add webhook workload simulation to trace real request processing
- Create analyze-trace.py script to extract performance metrics
- Implement baseline comparison against main branch via Actions cache
- Add historical performance tracking in perf-history/ directory
- Display performance reports in GitHub Actions summary
- Upload metrics artifacts with 30-day retention

This provides actionable performance insights and trend tracking
over time, enabling early detection of performance regressions.
---
 .github/workflows/dotnet.yml |  84 ++++++++++++++
 scripts/analyze-trace.py     | 205 +++++++++++++++++++++++++++++++++++
 2 files changed, 289 insertions(+)
 create mode 100755 scripts/analyze-trace.py

diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml
index 7dc73db..57e781c 100644
--- a/.github/workflows/dotnet.yml
+++ b/.github/workflows/dotnet.yml
@@ -130,6 +130,36 @@ jobs:
           echo "Started HelloWorldBot with PID $(cat app.pid)"
       - name: Wait for app to initialize
         run: sleep 15
+      - name: Send webhook workload in background
+        env:
+          WEBHOOK_SECRET: "test-secret-for-tracing"
+        run: |
+          # Start sending webhooks in background while trace runs
+          for i in {1..20}; do
+            # Send issues.opened webhook
+            PAYLOAD=$(cat fixtures/issues-opened.json)
+            SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$WEBHOOK_SECRET" | sed 's/^.* //')
+            curl -X POST http://localhost:5000/api/github/webhooks \
+              -H "Content-Type: application/json" \
+              -H "X-GitHub-Event: issues" \
+              -H "X-GitHub-Delivery: $(uuidgen)" \
+              -H "X-Hub-Signature-256: sha256=$SIGNATURE" \
+              -d "$PAYLOAD" &
+
+            # Send pull_request.opened webhook
+            PAYLOAD=$(cat fixtures/pull-request-opened.json)
+            SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$WEBHOOK_SECRET" | sed 's/^.* //')
+            curl -X POST http://localhost:5000/api/github/webhooks \
+              -H "Content-Type: application/json" \
+              -H "X-GitHub-Event: pull_request" \
+              -H "X-GitHub-Delivery: $(uuidgen)" \
+              -H "X-Hub-Signature-256: sha256=$SIGNATURE" \
+              -d "$PAYLOAD" &
+
+            sleep 0.5
+          done &
+
+          echo "Webhook workload started in background"
       - name: Check if process is running
         run: |
           PID=$(cat app.pid)
@@ -151,12 +181,66 @@ jobs:
           dotnet-trace collect --name HelloWorldBot --profile cpu-sampling --duration 00:00:30 -o trace.nettrace || (echo "Trace collection failed!" && exit 1)
       - name: Convert trace to Speedscope format
         run: dotnet-trace convert trace.nettrace --format Speedscope
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+      - name: Restore baseline from cache
+        id: restore-baseline
+        uses: actions/cache/restore@v4
+        with:
+          path: perf-baseline.json
+          key: perf-baseline-main-${{ github.sha }}
+          restore-keys: |
+            perf-baseline-main-
+      - name: Analyze trace and compare to baseline
+        run: |
+          if [ -f perf-baseline.json ]; then
+            echo "Comparing to baseline from main branch"
+            python3 scripts/analyze-trace.py trace.speedscope.json \
+              --output perf-metrics.json \
+              --markdown perf-summary.md \
+              --baseline perf-baseline.json
+          else
+            echo "No baseline found, generating initial metrics"
+            python3 scripts/analyze-trace.py trace.speedscope.json \
+              --output perf-metrics.json \
+              --markdown perf-summary.md
+          fi
+      - name: Save to performance history
+        run: |
+          mkdir -p perf-history
+          TIMESTAMP=$(date -u +%Y%m%d-%H%M%S)
+          COMMIT_SHORT=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
+          cp perf-metrics.json "perf-history/${TIMESTAMP}-${COMMIT_SHORT}.json"
+          # Keep only last 100 entries
+          ls -t perf-history/*.json 2>/dev/null | tail -n +101 | xargs -r rm || true
+          echo "Saved metrics to perf-history/${TIMESTAMP}-${COMMIT_SHORT}.json"
+      - name: Display performance report
+        run: |
+          echo "## Performance Trace Analysis" >> $GITHUB_STEP_SUMMARY
+          cat perf-summary.md >> $GITHUB_STEP_SUMMARY
+      - name: Update baseline cache
+        if: github.ref == 'refs/heads/main'
+        uses: actions/cache/save@v4
+        with:
+          path: perf-metrics.json
+          key: perf-baseline-main-${{ github.sha }}
       - name: Upload flamegraph artifact
         uses: actions/upload-artifact@v4
         with:
           name: flamegraph
           path: trace.speedscope.json
           retention-days: 7
+      - name: Upload performance metrics
+        uses: actions/upload-artifact@v4
+        with:
+          name: performance-metrics
+          path: |
+            perf-metrics.json
+            perf-summary.md
+            perf-history/
+          retention-days: 30
 
   all-tests-passed:
     name: All Tests Passed
diff --git a/scripts/analyze-trace.py b/scripts/analyze-trace.py
new file mode 100755
index 0000000..96303b4
--- /dev/null
+++ b/scripts/analyze-trace.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+"""
+Analyze .NET performance traces in Speedscope format.
+
+Extracts key metrics from trace.speedscope.json:
+- Total samples and CPU time
+- Top hotspot methods
+- Thread activity
+- GC and allocation patterns
+
+Usage:
+    python3 analyze-trace.py trace.speedscope.json [--output metrics.json] [--markdown summary.md]
+"""
+
+import json
+import sys
+import argparse
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+
+
+def parse_speedscope(trace_path):
+    """Parse Speedscope JSON format and extract metrics."""
+    with open(trace_path, 'r') as f:
+        data = json.load(f)
+
+    frames = data.get('shared', {}).get('frames', [])
+    profiles = data.get('profiles', [])
+
+    if not frames or not profiles:
+        return {
+            'error': 'Empty trace file - no frames or profiles found',
+            'total_samples': 0,
+            'cpu_time_ms': 0,
+            'top_methods': [],
+            'thread_count': 0
+        }
+
+    # Calculate total samples across all profiles
+    total_samples = 0
+    frame_samples = Counter()
+
+    for profile in profiles:
+        samples = profile.get('samples', [])
+        weights = profile.get('weights', [])
+
+        # Count samples per frame
+        for i, frame_idx in enumerate(samples):
+            weight = weights[i] if i < len(weights) else 1
+            total_samples += weight
+            frame_samples[frame_idx] += weight
+
+    # Estimate CPU time (1ms per sample is typical for dotnet-trace cpu-sampling)
+    cpu_time_ms = total_samples * 1.0
+
+    # Get top hotspot methods (frames with most samples)
+    top_frames = frame_samples.most_common(10)
+    top_methods = []
+
+    for frame_idx, sample_count in top_frames:
+        if frame_idx < len(frames):
+            frame = frames[frame_idx]
+            frame_name = frame.get('name', 'Unknown')
+
+            # Clean up frame name for readability
+            if '!' in frame_name:
+                # Format: "Assembly!Namespace.Class.Method"
+                parts = frame_name.split('!')
+                if len(parts) > 1:
+                    method_path = parts[1]
+                else:
+                    method_path = frame_name
+            else:
+                method_path = frame_name
+
+            # Calculate percentage
+            percentage = (sample_count / total_samples * 100) if total_samples > 0 else 0
+
+            top_methods.append({
+                'method': method_path,
+                'samples': sample_count,
+                'cpu_time_ms': sample_count * 1.0,
+                'percentage': round(percentage, 2)
+            })
+
+    # Analyze threads
+    thread_count = len(profiles)
+
+    # Look for GC and allocation patterns
+    gc_samples = 0
+    alloc_samples = 0
+
+    for frame_idx, count in frame_samples.items():
+        if frame_idx < len(frames):
+            frame_name = frames[frame_idx].get('name', '').lower()
+            if 'gc' in frame_name or 'garbage' in frame_name:
+                gc_samples += count
+            if 'alloc' in frame_name:
+                alloc_samples += count
+
+    gc_percentage = (gc_samples / total_samples * 100) if total_samples > 0 else 0
+    alloc_percentage = (alloc_samples / total_samples * 100) if total_samples > 0 else 0
+
+    return {
+        'total_samples': total_samples,
+        'cpu_time_ms': round(cpu_time_ms, 2),
+        'top_methods': top_methods,
+        'thread_count': thread_count,
+        'gc_samples': gc_samples,
+        'gc_percentage': round(gc_percentage, 2),
+        'alloc_samples': alloc_samples,
+        'alloc_percentage': round(alloc_percentage, 2),
+        'timestamp': datetime.utcnow().isoformat() + 'Z'
+    }
+
+
+def generate_markdown(metrics, baseline=None):
+    """Generate markdown summary of metrics."""
+    lines = ['# Performance Trace Analysis\n']
+
+    if 'error' in metrics:
+        lines.append(f"**Error:** {metrics['error']}\n")
+        return '\n'.join(lines)
+
+    # Summary stats
+    lines.append('## Summary\n')
+    lines.append(f"- **Total Samples:** {metrics['total_samples']:,}")
+    lines.append(f"- **Estimated CPU Time:** {metrics['cpu_time_ms']:.2f} ms")
+    lines.append(f"- **Thread Count:** {metrics['thread_count']}")
+    lines.append(f"- **GC Activity:** {metrics['gc_percentage']:.2f}% of samples")
+    lines.append('')
+
+    # Baseline comparison
+    if baseline and 'total_samples' in baseline and baseline['total_samples'] > 0:
+        lines.append('## Comparison to Baseline (main)\n')
+
+        cpu_diff = metrics['cpu_time_ms'] - baseline.get('cpu_time_ms', 0)
+        cpu_pct = (cpu_diff / baseline.get('cpu_time_ms', 1)) * 100
+
+        if cpu_diff > 0:
+            emoji = '🔴' if cpu_pct > 10 else '🟡'
+            lines.append(f"{emoji} **CPU Time:** +{cpu_diff:.2f} ms (+{cpu_pct:.1f}%)")
+        elif cpu_diff < 0:
+            emoji = '🟢'
+            lines.append(f"{emoji} **CPU Time:** {cpu_diff:.2f} ms ({cpu_pct:.1f}%)")
+        else:
+            lines.append(f"⚪ **CPU Time:** No change")
+
+        lines.append('')
+
+    # Top hotspots
+    lines.append('## Top 10 Hotspot Methods\n')
+    lines.append('| Method | Samples | CPU Time | % |')
+    lines.append('|--------|---------|----------|---|')
+
+    for method in metrics['top_methods']:
+        method_name = method['method']
+        # Truncate very long method names
+        if len(method_name) > 80:
+            method_name = method_name[:77] + '...'
+
+        lines.append(f"| `{method_name}` | {method['samples']:,} | {method['cpu_time_ms']:.1f} ms | {method['percentage']:.1f}% |")
+
+    lines.append('')
+
+    return '\n'.join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Analyze .NET performance traces')
+    parser.add_argument('trace_file', help='Path to trace.speedscope.json')
+    parser.add_argument('--output', '-o', help='Output metrics to JSON file')
+    parser.add_argument('--markdown', '-m', help='Output summary to markdown file')
+    parser.add_argument('--baseline', '-b', help='Baseline metrics JSON for comparison')
+
+    args = parser.parse_args()
+
+    # Parse trace
+    metrics = parse_speedscope(args.trace_file)
+
+    # Load baseline if provided
+    baseline = None
+    if args.baseline and Path(args.baseline).exists():
+        with open(args.baseline, 'r') as f:
+            baseline = json.load(f)
+
+    # Output JSON
+    if args.output:
+        with open(args.output, 'w') as f:
+            json.dump(metrics, f, indent=2)
+        print(f"Metrics written to {args.output}")
+    else:
+        print(json.dumps(metrics, indent=2))
+
+    # Output markdown
+    if args.markdown:
+        markdown = generate_markdown(metrics, baseline)
+        with open(args.markdown, 'w') as f:
+            f.write(markdown)
+        print(f"Summary written to {args.markdown}")
+
+
+if __name__ == '__main__':
+    main()

From 880c00611a73fedfe94805b49627934a3c168bdc Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 19:01:25 -0700
Subject: [PATCH 3/6] fix: Support speedscope evented profile format in trace
 analysis

The analyze-trace.py script was only parsing the 'sampled' profile
format (samples/weights arrays), but dotnet-trace produces 'evented'
format (open/close events with timestamps). This caused the script to
extract 0 samples from valid trace files.

Changes:
- Add support for evented profile type with O/C event parsing
- Track stack state and calculate frame durations from events
- Fix deprecated datetime.utcnow() usage
- Maintain backward compatibility with sampled format

The evented format is parsed by tracking open/close events to
reconstruct the call stack and measure time each frame spent active.
This produces accurate hotspot metrics from dotnet-trace output.
---
 scripts/analyze-trace.py | 52 +++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/scripts/analyze-trace.py b/scripts/analyze-trace.py
index 96303b4..028f00c 100755
--- a/scripts/analyze-trace.py
+++ b/scripts/analyze-trace.py
@@ -16,7 +16,7 @@
 import sys
 import argparse
 from collections import Counter
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 
 
@@ -37,19 +37,49 @@ def parse_speedscope(trace_path):
             'thread_count': 0
         }
 
-    # Calculate total samples across all profiles
+    # Calculate total samples/time across all profiles
+    # Speedscope supports two profile types: 'sampled' and 'evented'
     total_samples = 0
     frame_samples = Counter()
 
     for profile in profiles:
-        samples = profile.get('samples', [])
-        weights = profile.get('weights', [])
-
-        # Count samples per frame
-        for i, frame_idx in enumerate(samples):
-            weight = weights[i] if i < len(weights) else 1
-            total_samples += weight
-            frame_samples[frame_idx] += weight
+        profile_type = profile.get('type', 'sampled')
+
+        if profile_type == 'sampled':
+            # Sampled format: samples and weights arrays
+            samples = profile.get('samples', [])
+            weights = profile.get('weights', [])
+
+            # Count samples per frame
+            for i, frame_idx in enumerate(samples):
+                weight = weights[i] if i < len(weights) else 1
+                total_samples += weight
+                frame_samples[frame_idx] += weight
+
+        elif profile_type == 'evented':
+            # Evented format: open/close events with timestamps
+            events = profile.get('events', [])
+            start_value = profile.get('startValue', 0)
+            end_value = profile.get('endValue', 0)
+
+            # Track stack and calculate time for each frame
+            stack = []  # Stack of (frame_idx, open_time)
+
+            for event in events:
+                event_type = event.get('type')
+                frame_idx = event.get('frame')
+                timestamp = event.get('at')
+
+                if event_type == 'O':  # Open frame
+                    stack.append((frame_idx, timestamp))
+                elif event_type == 'C':  # Close frame
+                    # Match with most recent open of same frame
+                    if stack and stack[-1][0] == frame_idx:
+                        _, open_time = stack.pop()
+                        duration = timestamp - open_time
+                        # Convert duration to samples (treat 1ms = 1 sample for consistency)
+                        frame_samples[frame_idx] += duration
+                        total_samples += duration
 
     # Estimate CPU time (1ms per sample is typical for dotnet-trace cpu-sampling)
     cpu_time_ms = total_samples * 1.0
@@ -111,7 +141,7 @@ def parse_speedscope(trace_path):
         'gc_percentage': round(gc_percentage, 2),
         'alloc_samples': alloc_samples,
         'alloc_percentage': round(alloc_percentage, 2),
-        'timestamp': datetime.utcnow().isoformat() + 'Z'
+        'timestamp': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
     }
 
 

From 964ab15a0514eee69bf22012c4047f1ea85f1f03 Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 19:11:29 -0700
Subject: [PATCH 4/6] feat(ci): Add PR comment for performance trace visibility

Add a step to post performance metrics as a PR comment, making
them immediately visible on the PR page without requiring users
to click through to job details.

The comment includes:
- Full performance summary with metrics
- Link to detailed trace in Actions
- Link to downloadable flamegraph artifact

Only runs on pull requests to avoid errors on direct pushes.
---
 .github/workflows/dotnet.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml
index 57e781c..b2a2a79 100644
--- a/.github/workflows/dotnet.yml
+++ b/.github/workflows/dotnet.yml
@@ -220,6 +220,22 @@ jobs:
         run: |
           echo "## Performance Trace Analysis" >> $GITHUB_STEP_SUMMARY
           cat perf-summary.md >> $GITHUB_STEP_SUMMARY
+      - name: Post performance report to PR
+        if: github.event_name == 'pull_request'
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          # Create comment body with performance summary
+          COMMENT_BODY="## 📊 Performance Trace Report
+
+          $(cat perf-summary.md)
+
+          ---
+          [View full trace](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) | [Download flamegraph](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
+
+          # Post comment to PR
+          gh pr comment ${{ github.event.pull_request.number }} \
+            --body "$COMMENT_BODY"
       - name: Update baseline cache
         if: github.ref == 'refs/heads/main'
         uses: actions/cache/save@v4

From 68c288f669c57cb08f8484d506ac15fa011bbc85 Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 19:37:10 -0700
Subject: [PATCH 5/6] refactor(perf): Enhance performance report
 user-friendliness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Significantly improved readability and usability of performance trace reports:

**Readability Improvements:**
- Human-readable time formats (41.6 min vs 2498937ms)
- Visual progress bars for percentages using block characters
- Clear verdict at top (✅/⚠️/🔴) based on baseline comparison
- Collapsible sections to reduce information density

**Balanced View:**
- Show verdict + key metrics prominently
- Filter application hotspots from framework noise
- Categorize methods as app/framework/noise with emoji badges
- Full technical details available in collapsible sections

**New Features:**
- format_duration(): Convert ms to readable format
- format_percentage_bar(): Create visual █░ progress bars
- categorize_method(): Detect ProbotSharp app code vs framework
- Improved baseline comparison with trend indicators (↑↓→)
- Contextual help: "Changes within normal variance" guidance

**User Experience:**
- Before: Dense technical table, unclear if performance is good/bad
- After: Clean verdict, visual bars, filtered hotspots, actionable insights

Tested with real trace files - output is much more scannable and actionable.
---
 scripts/analyze-trace.py | 234 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 204 insertions(+), 30 deletions(-)

diff --git a/scripts/analyze-trace.py b/scripts/analyze-trace.py
index 028f00c..90fc7bb 100755
--- a/scripts/analyze-trace.py
+++ b/scripts/analyze-trace.py
@@ -20,6 +20,67 @@
 from pathlib import Path
 
 
+def format_duration(ms):
+    """Convert milliseconds to human-readable format.
+
+    Examples:
+        125 ms -> "125 ms"
+        2500 ms -> "2.5 sec"
+        65000 ms -> "1.1 min"
+        2472055 ms -> "41.2 min"
+    """
+    if ms < 1000:
+        return f"{ms:.0f} ms"
+    elif ms < 60000:
+        return f"{ms / 1000:.1f} sec"
+    elif ms < 3600000:
+        return f"{ms / 60000:.1f} min"
+    else:
+        return f"{ms / 3600000:.1f} hours"
+
+
+def format_percentage_bar(percentage, width=20):
+    """Create visual progress bar for percentages.
+
+    Example:
+        40.0 -> "████████░░░░░░░░░░░░ 40.0%"
+    """
+    filled = int((percentage / 100.0) * width)
+    empty = width - filled
+    bar = '█' * filled + '░' * empty
+    return f"{bar} {percentage:.1f}%"
+
+
+def categorize_method(method_name):
+    """Categorize method as app code, framework, or noise.
+
+    Returns:
+        'app': ProbotSharp application code
+        'framework': .NET framework code (potentially interesting)
+        'noise': Threading/wait methods, meta-frames (not actionable)
+    """
+    method_lower = method_name.lower()
+
+    # Noise: Thread management, meta-frames, unmanaged code
+    noise_patterns = [
+        'threads', 'non-activities', 'process64', 'unmanaged_code_time',
+        'threading.portablethreadpool', 'threading.lowlevellifosemaphore',
+        'threading.semaphoreslim', 'threading.monitor.wait',
+        'threading.manualreseteventslim', 'threading.thread.sleep',
+        'waithandle', 'taskawaiter'
+    ]
+
+    if any(pattern in method_lower for pattern in noise_patterns):
+        return 'noise'
+
+    # App code: ProbotSharp namespaces
+    if 'probotsharp' in method_lower:
+        return 'app'
+
+    # Everything else is framework (might be interesting)
+    return 'framework'
+
+
 def parse_speedscope(trace_path):
     """Parse Speedscope JSON format and extract metrics."""
     with open(trace_path, 'r') as f:
@@ -111,7 +172,8 @@ def parse_speedscope(trace_path):
                 'method': method_path,
                 'samples': sample_count,
                 'cpu_time_ms': sample_count * 1.0,
-                'percentage': round(percentage, 2)
+                'percentage': round(percentage, 2),
+                'category': categorize_method(method_path)
             })
 
     # Analyze threads
@@ -146,53 +208,165 @@ def parse_speedscope(trace_path):
 
 
 def generate_markdown(metrics, baseline=None):
-    """Generate markdown summary of metrics."""
-    lines = ['# Performance Trace Analysis\n']
+    """Generate user-friendly markdown summary of metrics."""
+    lines = []
 
     if 'error' in metrics:
+        lines.append('# Performance Trace Analysis\n')
         lines.append(f"**Error:** {metrics['error']}\n")
         return '\n'.join(lines)
 
-    # Summary stats
-    lines.append('## Summary\n')
-    lines.append(f"- **Total Samples:** {metrics['total_samples']:,}")
-    lines.append(f"- **Estimated CPU Time:** {metrics['cpu_time_ms']:.2f} ms")
-    lines.append(f"- **Thread Count:** {metrics['thread_count']}")
-    lines.append(f"- **GC Activity:** {metrics['gc_percentage']:.2f}% of samples")
+    # ===== VERDICT & SUMMARY =====
+    lines.append('## 📊 Performance Summary\n')
+
+    # Determine verdict based on baseline comparison
+    verdict_emoji = '✅'
+    verdict_text = 'No significant performance impact detected'
+
+    if baseline and 'cpu_time_ms' in baseline and baseline['cpu_time_ms'] > 0:
+        cpu_diff = metrics['cpu_time_ms'] - baseline['cpu_time_ms']
+        cpu_pct = (cpu_diff / baseline['cpu_time_ms']) * 100
+
+        if cpu_pct > 15:
+            verdict_emoji = '🔴'
+            verdict_text = f'Performance regression detected (+{cpu_pct:.1f}%)'
+        elif cpu_pct > 5:
+            verdict_emoji = '⚠️'
+            verdict_text = f'Minor performance impact (+{cpu_pct:.1f}%)'
+        elif cpu_pct < -5:
+            verdict_emoji = '🚀'
+            verdict_text = f'Performance improvement detected ({cpu_pct:.1f}%)'
+        else:
+            verdict_emoji = '✅'
+            verdict_text = f'Performance within normal variance ({cpu_pct:+.1f}%)'
+
+    lines.append(f"**Verdict:** {verdict_emoji} {verdict_text}")
+    lines.append(f"**Trace Duration:** {format_duration(metrics['cpu_time_ms'])}")
+    lines.append(f"**Test Workload:** 40 webhook requests (20 issues, 20 pull requests)\n")
+    lines.append('---\n')
+
+    # ===== KEY METRICS TABLE =====
+    lines.append('### Key Metrics\n')
+    lines.append('| Metric | Value | Visual |')
+    lines.append('|--------|-------|--------|')
+
+    # CPU Time
+    cpu_time_human = format_duration(metrics['cpu_time_ms'])
+    cpu_bar = format_percentage_bar(100, width=20)
+    lines.append(f"| Total CPU Time | {cpu_time_human} | `{cpu_bar}` |")
+
+    # Thread Count
+    lines.append(f"| Thread Count | {metrics['thread_count']} threads | |")
+
+    # GC Activity
+    gc_pct = metrics['gc_percentage']
+    gc_bar = format_percentage_bar(min(gc_pct, 100), width=20)
+    gc_status = '✅ Minimal' if gc_pct < 5 else ('⚠️ Moderate' if gc_pct < 15 else '🔴 High')
+    lines.append(f"| GC Activity | {gc_pct:.2f}% {gc_status} | `{gc_bar}` |")
+
     lines.append('')
 
-    # Baseline comparison
-    if baseline and 'total_samples' in baseline and baseline['total_samples'] > 0:
-        lines.append('## Comparison to Baseline (main)\n')
+    # ===== BASELINE COMPARISON (if available) =====
+    if baseline and 'cpu_time_ms' in baseline and baseline['cpu_time_ms'] > 0:
+        lines.append('### 📈 Baseline Comparison\n')
 
-        cpu_diff = metrics['cpu_time_ms'] - baseline.get('cpu_time_ms', 0)
-        cpu_pct = (cpu_diff / baseline.get('cpu_time_ms', 1)) * 100
+        cpu_diff = metrics['cpu_time_ms'] - baseline['cpu_time_ms']
+        cpu_pct = (cpu_diff / baseline['cpu_time_ms']) * 100
 
         if cpu_diff > 0:
-            emoji = '🔴' if cpu_pct > 10 else '🟡'
-            lines.append(f"{emoji} **CPU Time:** +{cpu_diff:.2f} ms (+{cpu_pct:.1f}%)")
+            trend = '↑'
+            emoji = '🔴' if cpu_pct > 15 else ('⚠️' if cpu_pct > 5 else '🟡')
+            lines.append(f"{emoji} **CPU Time vs Baseline (main):** +{format_duration(abs(cpu_diff))} ({trend} +{cpu_pct:.1f}%)")
         elif cpu_diff < 0:
-            emoji = '🟢'
-            lines.append(f"{emoji} **CPU Time:** {cpu_diff:.2f} ms ({cpu_pct:.1f}%)")
+            trend = '↓'
+            emoji = '🚀'
+            lines.append(f"{emoji} **CPU Time vs Baseline (main):** -{format_duration(abs(cpu_diff))} ({trend} {cpu_pct:.1f}%)")
         else:
-            lines.append(f"⚪ **CPU Time:** No change")
+            trend = '→'
+            lines.append(f"✅ **CPU Time vs Baseline (main):** No change ({trend})")
+
+        # Add interpretation
+        if abs(cpu_pct) < 5:
+            lines.append('\n_Changes are within normal variance - no action needed._')
+        elif cpu_pct > 15:
+            lines.append('\n_Significant regression detected - consider investigating hotspots below._')
 
         lines.append('')
 
-    # Top hotspots
-    lines.append('## Top 10 Hotspot Methods\n')
-    lines.append('| Method | Samples | CPU Time | % |')
-    lines.append('|--------|---------|----------|---|')
+    lines.append('---\n')
 
-    for method in metrics['top_methods']:
-        method_name = method['method']
-        # Truncate very long method names
-        if len(method_name) > 80:
-            method_name = method_name[:77] + '...'
+    # ===== APPLICATION HOTSPOTS =====
+    app_methods = [m for m in metrics['top_methods'] if m['category'] == 'app']
 
-        lines.append(f"| `{method_name}` | {method['samples']:,} | {method['cpu_time_ms']:.1f} ms | {method['percentage']:.1f}% |")
+    if app_methods:
+        lines.append('### 🔥 Application Hotspots\n')
+        lines.append('_Top methods in ProbotSharp code consuming CPU:_\n')
+        lines.append('| Method | CPU Time | Percentage |')
+        lines.append('|--------|----------|------------|')
 
-    lines.append('')
+        for method in app_methods[:5]:  # Show top 5 app methods
+            method_name = method['method']
+            if len(method_name) > 60:
+                method_name = method_name[:57] + '...'
+
+            cpu_time = format_duration(method['cpu_time_ms'])
+            pct_bar = format_percentage_bar(method['percentage'], width=15)
+            lines.append(f"| `{method_name}` | {cpu_time} | `{pct_bar}` |")
+
+        lines.append('')
+    else:
+        lines.append('### 🔥 Application Hotspots\n')
+        lines.append('_No significant application hotspots detected. Most time spent in framework/wait operations._\n')
+
+    # ===== FRAMEWORK METHODS (collapsible) =====
+    framework_methods = [m for m in metrics['top_methods'] if m['category'] == 'framework']
+
+    if framework_methods:
+        lines.append('<details>')
+        lines.append('<summary>📚 Framework Methods (click to expand)</summary>\n')
+        lines.append('| Method | CPU Time | Percentage |')
+        lines.append('|--------|----------|------------|')
+
+        for method in framework_methods[:10]:
+            method_name = method['method']
+            if len(method_name) > 70:
+                method_name = method_name[:67] + '...'
+
+            cpu_time = format_duration(method['cpu_time_ms'])
+            pct_bar = format_percentage_bar(method['percentage'], width=15)
+            lines.append(f"| `{method_name}` | {cpu_time} | `{pct_bar}` |")
+
+        lines.append('\n</details>\n')
+
+    # ===== FULL DETAILS (collapsible) =====
+    lines.append('<details>')
+    lines.append('<summary>📋 Full Method Details (all categories)</summary>\n')
+    lines.append('| Method | Category | CPU Time | Percentage |')
+    lines.append('|--------|----------|----------|------------|')
+
+    for method in metrics['top_methods']:
+        method_name = method['method']
+        if len(method_name) > 60:
+            method_name = method_name[:57] + '...'
+
+        category_badge = {'app': '🎯 App', 'framework': '📦 Framework', 'noise': '🔇 Noise'}
+        category = category_badge.get(method['category'], method['category'])
+        cpu_time = format_duration(method['cpu_time_ms'])
+        pct_bar = format_percentage_bar(method['percentage'], width=12)
+        lines.append(f"| `{method_name}` | {category} | {cpu_time} | `{pct_bar}` |")
+
+    lines.append('\n</details>\n')
+
+    # ===== TECHNICAL DETAILS (collapsible) =====
+    lines.append('<details>')
+    lines.append('<summary>🔍 Technical Details</summary>\n')
+    lines.append(f"- **Total Samples:** {metrics['total_samples']:,.0f}")
+    lines.append(f"- **CPU Time (raw):** {metrics['cpu_time_ms']:.2f} ms")
+    lines.append(f"- **Sample Rate:** 1ms intervals")
+    lines.append(f"- **Trace Format:** Speedscope evented")
+    lines.append(f"- **GC Samples:** {metrics['gc_samples']:,.0f} ({metrics['gc_percentage']:.2f}%)")
+    lines.append(f"- **Timestamp:** {metrics['timestamp']}")
+    lines.append('\n</details>')
 
     return '\n'.join(lines)
 

From 68fa8ca0b278db544816c32997afb553d9dafb04 Mon Sep 17 00:00:00 2001
From: Neil <neil@pozetroninc.com>
Date: Fri, 24 Oct 2025 21:31:20 -0700
Subject: [PATCH 6/6] fix: Calculate exclusive times to prevent 82x
 overcounting in trace analysis

Fixed critical bug where script summed inclusive times instead of calculating
exclusive times, causing massive overcounting (30s trace showed as 41 minutes).

Changes:
- Implemented stack-based exclusive time calculation for evented format
- Track both inclusive (total) and exclusive (self) time per frame
- Calculate wall-clock duration as max(profile durations), not sum
- Total CPU time = sum of exclusive times across all frames
- Update all output fields to use new metric names

Metrics:
- wall_clock_ms: Actual elapsed time (endValue - startValue)
- total_cpu_time_ms: Sum of all exclusive times
- Methods show both inclusive_ms and exclusive_ms
- Percentages calculated against wall_clock, not summed times

Verification:
Before: 2,498,937 ms (41 minutes) - WRONG
After: 30,016 ms (30 seconds) - CORRECT

The fix follows industry best practices (Brendan Gregg, Speedscope) for
calculating exclusive time: exclusive = inclusive - children_time.
---
 scripts/analyze-trace.py | 164 ++++++++++++++++++++++++++-------------
 1 file changed, 111 insertions(+), 53 deletions(-)

diff --git a/scripts/analyze-trace.py b/scripts/analyze-trace.py
index 90fc7bb..1a5c5e1 100755
--- a/scripts/analyze-trace.py
+++ b/scripts/analyze-trace.py
@@ -92,8 +92,8 @@ def parse_speedscope(trace_path):
     if not frames or not profiles:
         return {
             'error': 'Empty trace file - no frames or profiles found',
-            'total_samples': 0,
-            'cpu_time_ms': 0,
+            'wall_clock_ms': 0,
+            'total_cpu_time_ms': 0,
             'top_methods': [],
             'thread_count': 0
         }
@@ -102,6 +102,7 @@ def parse_speedscope(trace_path):
     # Speedscope supports two profile types: 'sampled' and 'evented'
     total_samples = 0
     frame_samples = Counter()
+    profile_results = []  # Store per-profile metrics for evented format
 
     for profile in profiles:
         profile_type = profile.get('type', 'sampled')
@@ -123,8 +124,12 @@ def parse_speedscope(trace_path):
             start_value = profile.get('startValue', 0)
             end_value = profile.get('endValue', 0)
 
-            # Track stack and calculate time for each frame
-            stack = []  # Stack of (frame_idx, open_time)
+            # Track inclusive and exclusive times separately
+            frame_inclusive = Counter()  # Total time in frame (including children)
+            frame_exclusive = Counter()  # Time in frame only (excluding children)
+            frame_count = Counter()      # Number of times frame appears
+
+            stack = []  # Stack of (frame_idx, open_time, children_time)
 
             for event in events:
                 event_type = event.get('type')
@@ -132,24 +137,71 @@ def parse_speedscope(trace_path):
                 timestamp = event.get('at')
 
                 if event_type == 'O':  # Open frame
-                    stack.append((frame_idx, timestamp))
+                    stack.append((frame_idx, timestamp, 0.0))
+
                 elif event_type == 'C':  # Close frame
-                    # Match with most recent open of same frame
                     if stack and stack[-1][0] == frame_idx:
-                        _, open_time = stack.pop()
-                        duration = timestamp - open_time
-                        # Convert duration to samples (treat 1ms = 1 sample for consistency)
-                        frame_samples[frame_idx] += duration
-                        total_samples += duration
-
-    # Estimate CPU time (1ms per sample is typical for dotnet-trace cpu-sampling)
-    cpu_time_ms = total_samples * 1.0
+                        _, open_time, children_time = stack.pop()
+
+                        # Calculate times
+                        inclusive = timestamp - open_time
+                        exclusive = inclusive - children_time
+
+                        # Update frame stats
+                        frame_inclusive[frame_idx] += inclusive
+                        frame_exclusive[frame_idx] += exclusive
+                        frame_count[frame_idx] += 1
+
+                        # Propagate inclusive time to parent
+                        if stack:
+                            parent_frame, parent_open, parent_children = stack[-1]
+                            stack[-1] = (parent_frame, parent_open, parent_children + inclusive)
+
+            # Calculate profile duration (Step 2)
+            profile_duration = end_value - start_value
+
+            # Store profile result for aggregation (Step 2)
+            profile_results.append({
+                'duration_ms': profile_duration,
+                'frame_inclusive': frame_inclusive,
+                'frame_exclusive': frame_exclusive,
+                'frame_count': frame_count
+            })
 
-    # Get top hotspot methods (frames with most samples)
-    top_frames = frame_samples.most_common(10)
+    # Aggregate results across all profiles (Step 3)
+    if profile_results:
+        # Evented format: aggregate from profile_results
+        # Wall-clock = max duration across all threads (they run concurrently)
+        wall_clock_ms = max(p['duration_ms'] for p in profile_results)
+
+        # Aggregate frame times across all threads
+        total_inclusive = Counter()
+        total_exclusive = Counter()
+        total_count = Counter()
+
+        for result in profile_results:
+            for frame_idx, time in result['frame_inclusive'].items():
+                total_inclusive[frame_idx] += time
+            for frame_idx, time in result['frame_exclusive'].items():
+                total_exclusive[frame_idx] += time
+            for frame_idx, count in result['frame_count'].items():
+                total_count[frame_idx] += count
+
+        # Total CPU time = sum of all exclusive times
+        total_cpu_time_ms = sum(total_exclusive.values())
+    else:
+        # Sampled format or empty: use old behavior for backward compatibility
+        wall_clock_ms = total_samples * 1.0
+        total_cpu_time_ms = total_samples * 1.0
+        total_exclusive = frame_samples
+        total_inclusive = frame_samples
+        total_count = Counter()
+
+    # Get top hotspot methods (frames with most exclusive time)
+    top_frames = total_exclusive.most_common(10)
     top_methods = []
 
-    for frame_idx, sample_count in top_frames:
+    for frame_idx, exclusive_time in top_frames:
         if frame_idx < len(frames):
             frame = frames[frame_idx]
             frame_name = frame.get('name', 'Unknown')
@@ -165,38 +217,44 @@ def parse_speedscope(trace_path):
             else:
                 method_path = frame_name
 
-            # Calculate percentage
-            percentage = (sample_count / total_samples * 100) if total_samples > 0 else 0
+            # Get both inclusive and exclusive times
+            inclusive_time = total_inclusive[frame_idx]
+
+            # Calculate percentages against wall-clock duration
+            inclusive_pct = (inclusive_time / wall_clock_ms * 100) if wall_clock_ms > 0 else 0
+            exclusive_pct = (exclusive_time / wall_clock_ms * 100) if wall_clock_ms > 0 else 0
 
             top_methods.append({
                 'method': method_path,
-                'samples': sample_count,
-                'cpu_time_ms': sample_count * 1.0,
-                'percentage': round(percentage, 2),
+                'inclusive_ms': round(inclusive_time, 2),
+                'exclusive_ms': round(exclusive_time, 2),
+                'inclusive_pct': round(inclusive_pct, 2),
+                'exclusive_pct': round(exclusive_pct, 2),
+                'samples': total_count[frame_idx],
                 'category': categorize_method(method_path)
             })
 
     # Analyze threads
     thread_count = len(profiles)
 
-    # Look for GC and allocation patterns
+    # Look for GC and allocation patterns (use exclusive time)
     gc_samples = 0
     alloc_samples = 0
 
-    for frame_idx, count in frame_samples.items():
+    for frame_idx, exclusive_time in total_exclusive.items():
         if frame_idx < len(frames):
             frame_name = frames[frame_idx].get('name', '').lower()
             if 'gc' in frame_name or 'garbage' in frame_name:
-                gc_samples += count
+                gc_samples += exclusive_time
             if 'alloc' in frame_name:
-                alloc_samples += count
+                alloc_samples += exclusive_time
 
-    gc_percentage = (gc_samples / total_samples * 100) if total_samples > 0 else 0
-    alloc_percentage = (alloc_samples / total_samples * 100) if total_samples > 0 else 0
+    gc_percentage = (gc_samples / wall_clock_ms * 100) if wall_clock_ms > 0 else 0
+    alloc_percentage = (alloc_samples / wall_clock_ms * 100) if wall_clock_ms > 0 else 0
 
     return {
-        'total_samples': total_samples,
-        'cpu_time_ms': round(cpu_time_ms, 2),
+        'wall_clock_ms': round(wall_clock_ms, 2),
+        'total_cpu_time_ms': round(total_cpu_time_ms, 2),
         'top_methods': top_methods,
         'thread_count': thread_count,
         'gc_samples': gc_samples,
@@ -223,25 +281,25 @@ def generate_markdown(metrics, baseline=None):
     verdict_emoji = '✅'
     verdict_text = 'No significant performance impact detected'
 
-    if baseline and 'cpu_time_ms' in baseline and baseline['cpu_time_ms'] > 0:
-        cpu_diff = metrics['cpu_time_ms'] - baseline['cpu_time_ms']
-        cpu_pct = (cpu_diff / baseline['cpu_time_ms']) * 100
+    if baseline and 'wall_clock_ms' in baseline and baseline['wall_clock_ms'] > 0:
+        wall_diff = metrics['wall_clock_ms'] - baseline['wall_clock_ms']
+        wall_pct = (wall_diff / baseline['wall_clock_ms']) * 100
 
-        if cpu_pct > 15:
+        if wall_pct > 15:
             verdict_emoji = '🔴'
-            verdict_text = f'Performance regression detected (+{cpu_pct:.1f}%)'
-        elif cpu_pct > 5:
+            verdict_text = f'Performance regression detected (+{wall_pct:.1f}%)'
+        elif wall_pct > 5:
             verdict_emoji = '⚠️'
-            verdict_text = f'Minor performance impact (+{cpu_pct:.1f}%)'
-        elif cpu_pct < -5:
+            verdict_text = f'Minor performance impact (+{wall_pct:.1f}%)'
+        elif wall_pct < -5:
             verdict_emoji = '🚀'
-            verdict_text = f'Performance improvement detected ({cpu_pct:.1f}%)'
+            verdict_text = f'Performance improvement detected ({wall_pct:.1f}%)'
         else:
             verdict_emoji = '✅'
-            verdict_text = f'Performance within normal variance ({cpu_pct:+.1f}%)'
+            verdict_text = f'Performance within normal variance ({wall_pct:+.1f}%)'
 
     lines.append(f"**Verdict:** {verdict_emoji} {verdict_text}")
-    lines.append(f"**Trace Duration:** {format_duration(metrics['cpu_time_ms'])}")
+    lines.append(f"**Trace Duration:** {format_duration(metrics['wall_clock_ms'])}")
     lines.append(f"**Test Workload:** 40 webhook requests (20 issues, 20 pull requests)\n")
     lines.append('---\n')
 
@@ -251,7 +309,7 @@ def generate_markdown(metrics, baseline=None):
     lines.append('|--------|-------|--------|')
 
     # CPU Time
-    cpu_time_human = format_duration(metrics['cpu_time_ms'])
+    cpu_time_human = format_duration(metrics['total_cpu_time_ms'])
     cpu_bar = format_percentage_bar(100, width=20)
     lines.append(f"| Total CPU Time | {cpu_time_human} | `{cpu_bar}` |")
 
@@ -309,9 +367,9 @@ def generate_markdown(metrics, baseline=None):
             if len(method_name) > 60:
                 method_name = method_name[:57] + '...'
 
-            cpu_time = format_duration(method['cpu_time_ms'])
-            pct_bar = format_percentage_bar(method['percentage'], width=15)
-            lines.append(f"| `{method_name}` | {cpu_time} | `{pct_bar}` |")
+            excl_time = format_duration(method['exclusive_ms'])
+            pct_bar = format_percentage_bar(method['exclusive_pct'], width=15)
+            lines.append(f"| `{method_name}` | {excl_time} | `{pct_bar}` |")
 
         lines.append('')
     else:
@@ -332,9 +390,9 @@ def generate_markdown(metrics, baseline=None):
             if len(method_name) > 70:
                 method_name = method_name[:67] + '...'
 
-            cpu_time = format_duration(method['cpu_time_ms'])
-            pct_bar = format_percentage_bar(method['percentage'], width=15)
-            lines.append(f"| `{method_name}` | {cpu_time} | `{pct_bar}` |")
+            excl_time = format_duration(method['exclusive_ms'])
+            pct_bar = format_percentage_bar(method['exclusive_pct'], width=15)
+            lines.append(f"| `{method_name}` | {excl_time} | `{pct_bar}` |")
 
         lines.append('\n</details>\n')
 
@@ -351,17 +409,17 @@ def generate_markdown(metrics, baseline=None):
 
         category_badge = {'app': '🎯 App', 'framework': '📦 Framework', 'noise': '🔇 Noise'}
         category = category_badge.get(method['category'], method['category'])
-        cpu_time = format_duration(method['cpu_time_ms'])
-        pct_bar = format_percentage_bar(method['percentage'], width=12)
-        lines.append(f"| `{method_name}` | {category} | {cpu_time} | `{pct_bar}` |")
+        excl_time = format_duration(method['exclusive_ms'])
+        pct_bar = format_percentage_bar(method['exclusive_pct'], width=12)
+        lines.append(f"| `{method_name}` | {category} | {excl_time} | `{pct_bar}` |")
 
     lines.append('\n</details>\n')
 
     # ===== TECHNICAL DETAILS (collapsible) =====
     lines.append('<details>')
     lines.append('<summary>🔍 Technical Details</summary>\n')
-    lines.append(f"- **Total Samples:** {metrics['total_samples']:,.0f}")
-    lines.append(f"- **CPU Time (raw):** {metrics['cpu_time_ms']:.2f} ms")
+    lines.append(f"- **Wall-Clock Duration:** {metrics['wall_clock_ms']:.2f} ms")
+    lines.append(f"- **Total CPU Time:** {metrics['total_cpu_time_ms']:.2f} ms")
     lines.append(f"- **Sample Rate:** 1ms intervals")
     lines.append(f"- **Trace Format:** Speedscope evented")
     lines.append(f"- **GC Samples:** {metrics['gc_samples']:,.0f} ({metrics['gc_percentage']:.2f}%)")