cmontemuino
diff --git a/‎docs/assets/img/sample-analysis/batch_size_scaling_by_memory.png‎
-67 KB b/‎docs/assets/img/sample-analysis/batch_size_scaling_by_memory.png‎
-67 KB
diff --git a/‎docs/assets/img/sample-analysis/latency_analysis.png‎
1.05 KB b/‎docs/assets/img/sample-analysis/latency_analysis.png‎
1.05 KB
diff --git a/‎docs/assets/img/sample-analysis/memory_efficiency.png‎
616 Bytes b/‎docs/assets/img/sample-analysis/memory_efficiency.png‎
616 Bytes
diff --git a/‎docs/assets/img/sample-analysis/monitoring_dashboard.png‎
403 KB b/‎docs/assets/img/sample-analysis/monitoring_dashboard.png‎
403 KB
diff --git a/‎docs/assets/img/sample-analysis/power_efficiency_analysis.png‎
202 KB b/‎docs/assets/img/sample-analysis/power_efficiency_analysis.png‎
202 KB
diff --git a/‎docs/user-guide/analysis-guide.md‎
Lines changed: 12 additions & 4 deletions b/‎docs/user-guide/analysis-guide.md‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎src/amd_bench/core/analysis.py‎
Lines changed: 68 additions & 4 deletions b/‎src/amd_bench/core/analysis.py‎
Lines changed: 68 additions & 4 deletions
diff --git a/‎src/amd_bench/core/reporters.py‎
Lines changed: 50 additions & 1 deletion b/‎src/amd_bench/core/reporters.py‎
Lines changed: 50 additions & 1 deletion
diff --git a/‎src/amd_bench/schemas/benchmark.py‎
Lines changed: 19 additions & 0 deletions b/‎src/amd_bench/schemas/benchmark.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎tests/integration/schema/__init__.py‎ b/‎tests/integration/schema/__init__.py‎
@@ -26,18 +26,23 @@ After running the analysis pipeline, you'll get comprehensive performance insigh
 analysis/sample-output/
 ├── plots/ # Visual performance analysis
 │ ├── batch_size_scaling.png # Batch size vs performance
-│ ├── batch_size_scaling_by_memory.png
-│ ├── memory_efficiency.png # Memory utilization effects
+│ ├── batch_size_scaling_by_memory.png # Same as batch_size_scaling.png but with a split per memory utilization
 │ ├── latency_analysis.png # Latency distribution analysis
+│ ├── memory_efficiency.png # Memory utilization effects
+│ ├── monitoring_dashboard.png # Dashboard with power consumption + GPU temp distribution + CPU-GPU power relationship
+│ ├── power_efficiency_analysis.png # Power consumption analysis + Power stability vs. efficiency
 │ └── throughput_comparison.png # Throughput comparisons
 ├── reports/ # Comprehensive analysis reports
 │ ├── analysis_summary.json # Machine-readable summary
 │ └── benchmark_analysis_report.md # Human-readable report
 └── tables/ # Statistical summaries (CSV)
   ├── batch_size_analysis.csv
+  ├── gpu_allocation_summary.csv
   ├── memory_utilization_analysis.csv
   ├── model_performance_summary.csv
-  └── raw_results.csv
+  ├── monitoring_summary.csv
+  ├── raw_results.csv
+  └── thermal_analysis.csv
 ```
 
 ### Visual Analysis Guide
@@ -180,10 +185,13 @@ analysis/sample-output/
 
 #### 5. Throughput Analysis
 
-![Througput Analysis](../assets/img/sample-analysis/throughput_comparison.png)
+![Throughput Analysis](../assets/img/sample-analysis/throughput_comparison.png)
 
 **What it shows**: Throughput performance across different configuration parameters, helping identify optimal settings for maximum system utilization
 
+> ℹ️ **Note**: **Throughput** is defined as the **average latency** as a rate, representing
+**how frequently a single request completes**.
+
 **How to interpret**:
 
 - **Left plot (Throughput by Model and Batch Size)**: **Grouped bars** for each model showing different batch sizes
 
@@ -1115,7 +1115,6 @@ def _extract_vllm_latency_metrics(data: Dict[str, Any]) -> BenchmarkMetrics:
 
         - **Per-Request Completion Rate**: 1 / avg_latency (requests/second/experiment)
           - Measures how frequently individual requests complete
-          - Industry standard for latency benchmarks
           - Lower values for larger batch sizes due to queueing delays
 
         - **Batch-Level Throughput**: batch_size / avg_latency (theoretical max)
@@ -1203,8 +1202,9 @@ def get_percentile(p: int) -> float:
             """Extract percentile with flexible key handling."""
             return float(percentiles.get(str(p), percentiles.get(p, 0.0)))
 
-        # Calculate per-request completion rate (industry standard for latency benchmarks)
-        per_request_completion_rate = 1.0 / avg_latency
+        # Calculate request completion rate (requests per second for single request processing)
+        # This represents the inverse of latency: how frequently one request completes
+        throughput = 1.0 / avg_latency
 
         return BenchmarkMetrics(
             # Core latency metrics
@@ -1217,7 +1217,7 @@ def get_percentile(p: int) -> float:
             p99_latency=get_percentile(99),
             # Per-request completion rate (requests/second per experiment)
             # Note: This is NOT system-level throughput for batch processing
-            throughput=per_request_completion_rate,
+            throughput=throughput,
             # Token-level metrics (not available in latency-only benchmarks)
             tokens_per_second=0.0,
             # Experimental metadata
@@ -1250,3 +1250,67 @@ def _generate_experiment_id(params: Dict[str, str]) -> str:
             params.get("timestamp", "unknown"),
         ]
         return "_".join(str(p).replace("/", "-") for p in key_params)
+
+
+class BatchEfficiencyAnalyzer:
+    """Analyze batch efficiency across multiple batch size configurations."""
+
+    def __init__(self, results: List[BenchmarkResult]):
+        self.results = results
+        self.by_batch_size = self._group_by_batch_size()
+
+    def _group_by_batch_size(self) -> Dict[int, List[BenchmarkResult]]:
+        """Group results by batch size for comparison."""
+        from collections import defaultdict
+
+        groups: Dict[int, List[BenchmarkResult]] = defaultdict(list)
+        for result in self.results:
+            groups[result.config.batch_size].append(result)
+        return dict(groups)
+
+    def calculate_scaling_efficiency(self, baseline_batch_size: int = 1) -> Dict[int, float]:
+        """
+        Calculate how efficiently each batch size scales compared to baseline.
+
+        Returns efficiency ratios where:
+        - 1.0 = same efficiency as baseline
+        - >1.0 = better than baseline
+        - <1.0 = worse than baseline
+        """
+        if baseline_batch_size not in self.by_batch_size:
+            raise ValueError(f"No data for baseline batch size {baseline_batch_size}")
+
+        baseline_results = self.by_batch_size[baseline_batch_size]
+        baseline_throughput = sum(r.metrics.throughput for r in baseline_results) / len(
+            baseline_results
+        )
+
+        efficiencies = {}
+        for batch_size, results in self.by_batch_size.items():
+            avg_system_throughput = sum(r.system_throughput for r in results) / len(results)
+            theoretical_throughput = batch_size * baseline_throughput
+            efficiencies[batch_size] = avg_system_throughput / theoretical_throughput
+
+        return efficiencies
+
+    def get_scaling_grades(self, baseline_batch_size: int = 1) -> Dict[int, str]:
+        """Generates a human-readable performance grade for each batch size."""
+
+        efficiency_ratios = self.calculate_scaling_efficiency(baseline_batch_size)
+        grades = {}
+
+        for batch_size, ratio in efficiency_ratios.items():
+            if ratio >= 1.1:
+                grades[batch_size] = "A+ (Excellent)"
+            elif ratio >= 1.0:
+                grades[batch_size] = "A (Very Good)"
+            elif ratio >= 0.9:
+                grades[batch_size] = "B (Good)"
+            elif ratio >= 0.8:
+                grades[batch_size] = "C (Fair)"
+            elif ratio >= 0.7:
+                grades[batch_size] = "D (Poor)"
+            else:
+                grades[batch_size] = "F (Very Poor)"
+
+        return grades
@@ -113,6 +113,10 @@ def _create_markdown_report(
             f.write("\n## Model Performance Overview\n\n")
             self._write_model_performance_section(f)
 
+            # Throughput Analysis
+            f.write("\n## Throughput Analysis\n\n")
+            self._write_throughput_analysis_section(f)
+
             # Configuration Analysis
             f.write("\n## Configuration Analysis\n\n")
             self._write_configuration_analysis(f)
@@ -143,9 +147,14 @@ def _write_executive_summary(self, file: TextIO) -> None:
             f"This analysis covers **{len(models)} models** across **{len(self.results)} experiments**.\n\n"
         )
         file.write(f"- **Average Latency**: {avg_latency:.4f} seconds\n")
-        file.write(f"- **Average Throughput**: {avg_throughput:.2f} requests/second\n")
+        file.write(f"- **Throughput**: {avg_throughput:.2f} requests/second\n")
         file.write(f"- **Models Tested**: {', '.join(sorted(models))}\n")
 
+        file.write(
+            """> ℹ️ **Note**: **Throughput** is defined as the **average latency** as a rate, representing
+        **how frequently a single request completes**.\n"""
+        )
+
     def _write_model_performance_section(self, file: TextIO) -> None:
         """Write model performance section to markdown report."""
         if not self.results:
@@ -178,6 +187,46 @@ def _write_model_performance_section(self, file: TextIO) -> None:
 
         file.write("\n")
 
+    def _write_throughput_analysis_section(self, file: TextIO) -> None:
+        """Write enhanced throughput analysis with proper metric distinctions."""
+
+        file.write("**Important**: This analysis reports two different throughput metrics:\n\n")
+        file.write(
+            "- **Per-Request Completion Rate**: How frequently individual requests complete\n"
+        )
+        file.write(
+            "- **System Throughput**: Total system processing capacity (batch_size × completion_rate)\n\n"
+        )
+
+        file.write(
+            "| Batch Size | Avg Latency (s) | Completion Rate (req/s) | System Throughput (req/s) | Input Length | Output Length | Mem Util (%) |\n"
+        )
+        file.write(
+            "|------------|-----------------|-------------------------|---------------------------|--------------|---------------|--------------|\n"
+        )
+
+        # Sort by batch_size first, then by latency within each batch size
+        sorted_results = sorted(
+            self.results, key=lambda r: (r.config.batch_size, r.metrics.avg_latency)
+        )
+
+        for result in sorted_results:
+            system_throughput = result.config.batch_size * result.metrics.throughput
+            file.write(
+                f"| {result.config.batch_size} | "
+                f"{result.metrics.avg_latency:.3f} | {result.metrics.throughput:.3f} | "
+                f"{system_throughput:.3f} | {result.config.input_length} | {result.config.output_length} | {result.config.memory_util * 100:.1f} |\n"
+            )
+
+        file.write("\n**Key Insights:**\n")
+        file.write("- Larger batch sizes reduce per-request completion rates due to queueing\n")
+        file.write(
+            "- System throughput may still increase with batch size despite higher latency\n"
+        )
+        file.write(
+            "- Choose batch size based on your use case: latency-sensitive vs. throughput-optimized\n\n"
+        )
+
     def _write_configuration_analysis(self, file: TextIO) -> None:
         """Write configuration analysis section."""
         if not self.results:
 
@@ -115,6 +115,25 @@ def efficiency_score(self) -> float:
             return self.metrics.throughput / self.metrics.avg_latency
         return 0.0
 
+    @property
+    def system_throughput(self) -> float:
+        """Calculate system-level throughput accounting for batch processing.
+
+        This property calculates the actual system throughput by considering
+        the batch size used in the experiment, providing a more accurate measure
+        of system processing capacity for batch workloads.
+
+        Returns:
+            float: System throughput in requests/second (`self.metrics.throughput`),
+                   accounting for batch size
+
+        Example:
+            For an experiment with batch_size=8 and avg_latency=2.0s:
+            - per_request_completion_rate = 1/2.0 = 0.5 req/s
+            - system_throughput = 8 * 0.5 = 4.0 req/s
+        """
+        return self.config.batch_size * self.metrics.throughput
+
 
 class ExperimentFiles(BaseModel):
     """File paths for a complete experiment."""