mesa
diff --git a/‎PR_PERFORMANCE_FIX.md‎
Lines changed: 6 additions & 6 deletions b/‎PR_PERFORMANCE_FIX.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/negotiation/model.py‎
Lines changed: 8 additions & 3 deletions b/‎examples/negotiation/model.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎mesa_llm/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎mesa_llm/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎mesa_llm/benchmark.py‎
Lines changed: 92 additions & 70 deletions b/‎mesa_llm/benchmark.py‎
Lines changed: 92 additions & 70 deletions
@@ -16,9 +16,9 @@ Agents: 50, Step Time: 1805.0s, Per-Agent: 36.10s  (40x slower)
 ### **After Fix:**
 ```
 📈 PERFORMANCE BENCHMARK RESULTS
-================================================================================       
+================================================================================
 Agents   Sequential   Parallel     Speedup    Efficiency
---------------------------------------------------------------------------------       
+--------------------------------------------------------------------------------
 5        0.05s        0.02s        2.44x       0.49x
 10       0.10s        0.02s        5.41x       0.54x
 15       0.16s        0.03s        5.00x       0.33x
@@ -45,7 +45,7 @@ Agents   Sequential   Parallel     Speedup    Efficiency
 # NEW: Efficient parallel stepping
 async def step_agents_parallel(agents: list[Agent | LLMAgent]) -> None:
     semaphore = _semaphore_pool.get_semaphore()
-    
+
     async def step_with_semaphore(agent):
         async with semaphore:
             try:
@@ -56,7 +56,7 @@ async def step_agents_parallel(agents: list[Agent | LLMAgent]) -> None:
                     await loop.run_in_executor(None, agent.step)
             except Exception as e:
                 logger.error(f"Error stepping agent {getattr(agent, 'unique_id', 'unknown')}: {e}")
-    
+
     tasks = [step_with_semaphore(agent) for agent in agents]
     await asyncio.gather(*tasks, return_exceptions=True)
 ```
@@ -71,7 +71,7 @@ class SemaphorePool:
     def __init__(self, max_concurrent: int = 10):
         self.max_concurrent = max_concurrent
         self._semaphores = {}
-    
+
     def get_semaphore(self):
         thread_id = threading.get_ident()
         if thread_id not in self._semaphores:
@@ -169,7 +169,7 @@ from mesa_llm.benchmark import PerformanceBenchmark
 class CustomTestModel:
     def __init__(self, n_agents, enable_parallel=True):
         # Your custom implementation
-        
+
 benchmark = PerformanceBenchmark()
 results = benchmark.run_benchmark(test_model_class=CustomTestModel)
 ```
 
@@ -40,11 +40,16 @@ def __init__(
 
         # Enable optimized parallel stepping if parallel_stepping is enabled
         if self.parallel_stepping:
-            from mesa_llm.parallel_stepping import enable_automatic_parallel_stepping_optimized
+            from mesa_llm.parallel_stepping import (
+                enable_automatic_parallel_stepping_optimized,
+            )
+
             enable_automatic_parallel_stepping_optimized(
                 mode="asyncio",
-                max_concurrent=min(20, initial_buyers + 2),  # Adjust based on agent count
-                request_timeout=30.0
+                max_concurrent=min(
+                    20, initial_buyers + 2
+                ),  # Adjust based on agent count
+                request_timeout=30.0,
             )
 
         # ---------------------Create the buyer agents---------------------
 
@@ -4,9 +4,9 @@
 
 from .parallel_stepping import (
     enable_automatic_parallel_stepping,
+    step_agents_multithreaded,
     step_agents_parallel,
     step_agents_parallel_sync,
-    step_agents_multithreaded,
 )
 from .reasoning.reasoning import Observation, Plan
 from .recording.record_model import record_model
@@ -17,14 +17,14 @@
 
 __all__ = [
     "Observation",
+    "PerformanceBenchmark",
     "Plan",
     "ToolManager",
     "enable_automatic_parallel_stepping",
     "record_model",
+    "step_agents_multithreaded",
     "step_agents_parallel",
     "step_agents_parallel_sync",
-    "step_agents_multithreaded",
-    "PerformanceBenchmark"
 ]
 
 __title__ = "Mesa-LLM"
 
@@ -2,146 +2,159 @@
 Performance benchmark framework for mesa-llm
 """
 
-import time
-import asyncio
-import os
 import csv
+import os
 import statistics
-from typing import List, Dict
-
-from .parallel_stepping import enable_automatic_parallel_stepping, step_agents_parallel
+import time
 
 
 class PerformanceBenchmark:
     """Performance testing and analysis framework"""
-    
+
     def __init__(self):
-        self.results: List[Dict] = []
-        
-    def run_single_test(self, n_agents: int, runs: int = 3, test_model_class=None) -> Dict:
+        self.results: list[dict] = []
+
+    def run_single_test(
+        self, n_agents: int, runs: int = 3, test_model_class=None
+    ) -> dict:
         """Run performance test for specific agent count"""
         print(f"\n🔬 Testing {n_agents} agents...")
-        
+
         # Import test models if not provided
         if test_model_class is None:
             from tests.test_models import PerformanceTestModel
+
             test_model_class = PerformanceTestModel
-        
+
         sequential_times = []
         parallel_times = []
-        
+
         for run in range(runs):
             print(f"  Run {run + 1}/{runs}...")
-            
+
             # Test sequential execution
             start_time = time.time()
             model_seq = test_model_class(n_agents=n_agents, enable_parallel=False)
             creation_time = time.time() - start_time
-            
+
             step_start = time.time()
             model_seq.step_sequential()
             step_time = time.time() - step_start
             sequential_times.append(step_time)
-            
+
             # Test parallel execution
             start_time = time.time()
             model_par = test_model_class(n_agents=n_agents, enable_parallel=True)
             step_start = time.time()
             model_par.step_parallel()
             step_time = time.time() - step_start
             parallel_times.append(step_time)
-            
-            print(f"    Sequential: {sequential_times[-1]:.2f}s, Parallel: {parallel_times[-1]:.2f}s")
-        
+
+            print(
+                f"    Sequential: {sequential_times[-1]:.2f}s, Parallel: {parallel_times[-1]:.2f}s"
+            )
+
         # Calculate statistics
         avg_seq = statistics.mean(sequential_times)
         avg_par = statistics.mean(parallel_times)
-        speedup = avg_seq / avg_par if avg_par > 0 else float('inf')
-        
+        speedup = avg_seq / avg_par if avg_par > 0 else float("inf")
+
         result = {
-            'n_agents': n_agents,
-            'sequential_time': avg_seq,
-            'parallel_time': avg_par,
-            'speedup': speedup,
-            'per_agent_seq': avg_seq / n_agents,
-            'per_agent_par': avg_par / n_agents
+            "n_agents": n_agents,
+            "sequential_time": avg_seq,
+            "parallel_time": avg_par,
+            "speedup": speedup,
+            "per_agent_seq": avg_seq / n_agents,
+            "per_agent_par": avg_par / n_agents,
         }
-        
-        print(f"  📊 Results: Sequential {avg_seq:.2f}s, Parallel {avg_par:.2f}s, Speedup {speedup:.2f}x")
+
+        print(
+            f"  📊 Results: Sequential {avg_seq:.2f}s, Parallel {avg_par:.2f}s, Speedup {speedup:.2f}x"
+        )
         return result
-    
-    def run_benchmark(self, agent_counts: List[int] = None, test_model_class=None) -> List[Dict]:
+
+    def run_benchmark(
+        self, agent_counts: list[int] = None, test_model_class=None
+    ) -> list[dict]:
         """Run comprehensive performance benchmark"""
         if agent_counts is None:
             agent_counts = [5, 10, 15, 20, 25, 30, 40, 50]
-        
+
         self.results = []
-        
+
         print("🚀 Mesa-LLM Performance Benchmark")
         print("=" * 50)
         print("📋 Testing parallel vs sequential execution")
         print("⚠️  Using 10ms simulated LLM work per agent")
         print("")
-        
+
         for n_agents in agent_counts:
-            result = self.run_single_test(n_agents, runs=3, test_model_class=test_model_class)
+            result = self.run_single_test(
+                n_agents, runs=3, test_model_class=test_model_class
+            )
             self.results.append(result)
-        
+
         return self.results
-    
+
     def print_summary(self):
         """Print comprehensive performance analysis"""
         print("\n📈 PERFORMANCE BENCHMARK RESULTS")
         print("=" * 80)
-        
-        print(f"{'Agents':<8} {'Sequential':<12} {'Parallel':<12} {'Speedup':<10} {'Efficiency':<12}")
+
+        print(
+            f"{'Agents':<8} {'Sequential':<12} {'Parallel':<12} {'Speedup':<10} {'Efficiency':<12}"
+        )
         print("-" * 80)
-        
+
         for result in self.results:
-            n_agents = result['n_agents']
-            seq_time = result['sequential_time']
-            par_time = result['parallel_time']
-            speedup = result['speedup']
-            efficiency = speedup / n_agents if speedup != float('inf') else 0
-            
-            print(f"{n_agents:<8} {seq_time:<12.2f} {par_time:<12.2f} "
-                  f"{speedup:<10.2f}x {efficiency:<12.4f}")
-        
+            n_agents = result["n_agents"]
+            seq_time = result["sequential_time"]
+            par_time = result["parallel_time"]
+            speedup = result["speedup"]
+            efficiency = speedup / n_agents if speedup != float("inf") else 0
+
+            print(
+                f"{n_agents:<8} {seq_time:<12.2f} {par_time:<12.2f} "
+                f"{speedup:<10.2f}x {efficiency:<12.4f}"
+            )
+
         print("\n🔍 Performance Analysis:")
-        
+
         # Check scaling characteristics
         if len(self.results) >= 3:
             first_result = self.results[0]
             last_result = self.results[-1]
-            
-            seq_scaling = (last_result['per_agent_seq'] / first_result['per_agent_seq'])
-            par_scaling = (last_result['per_agent_par'] / first_result['per_agent_par'])
-            
+
+            seq_scaling = last_result["per_agent_seq"] / first_result["per_agent_seq"]
+            par_scaling = last_result["per_agent_par"] / first_result["per_agent_par"]
+
             print(f"Sequential scaling factor: {seq_scaling:.2f}x (1.0 = ideal)")
             print(f"Parallel scaling factor: {par_scaling:.2f}x (1.0 = ideal)")
-            
+
             # Evaluate sequential scaling
             if seq_scaling > 2.0:
                 print("⚠️  SEQUENTIAL: Exponential scaling detected!")
             elif seq_scaling > 1.5:
                 print("⚠️  SEQUENTIAL: Sub-linear scaling")
             else:
                 print("✅ SEQUENTIAL: Perfect linear scaling")
-            
+
             # Evaluate parallel scaling
             if par_scaling > 2.0:
                 print("⚠️  PARALLEL: Exponential scaling detected!")
             elif par_scaling > 1.5:
                 print("⚠️  PARALLEL: Sub-linear scaling")
             else:
                 print("✅ PARALLEL: Good linear scaling")
-        
+
         # Evaluate speedup
-        valid_speedups = [r['speedup'] for r in self.results if r['speedup'] != float('inf')]
+        valid_speedups = [
+            r["speedup"] for r in self.results if r["speedup"] != float("inf")
+        ]
         if valid_speedups:
             avg_speedup = statistics.mean(valid_speedups)
             print(f"Average speedup: {avg_speedup:.2f}x")
-            
+
             if avg_speedup > 5.0:
                 print("🎉 EXCELLENT: Parallel provides outstanding speedup!")
             elif avg_speedup > 3.0:
@@ -152,37 +165,46 @@ def print_summary(self):
                 print("⚠️  MINIMAL: Parallel provides small speedup")
             else:
                 print("❌ POOR: Parallel provides no speedup")
-        
+
         print("\n💡 Key Insights:")
         print("   • Each agent simulates 10ms LLM API response time")
         print("   • Parallel execution processes agents concurrently")
         print("   • Speedup demonstrates effectiveness of optimizations")
         print("   • Linear scaling confirms no performance bottlenecks")
-        
+
         print("\n📝 Notes:")
         print("   • This benchmark tests parallel stepping infrastructure")
         print("   • Real-world performance depends on actual API response times")
         print("   • Results demonstrate performance optimizations work correctly")
-    
+
     def save_results(self, filename: str = "benchmark_results.csv"):
         """Save benchmark results to CSV file"""
         if not self.results:
             print("No results to save!")
             return
-        
+
         # Save to results directory
-        results_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "results")
+        results_dir = os.path.join(
+            os.path.dirname(os.path.dirname(__file__)), "results"
+        )
         filepath = os.path.join(results_dir, filename)
-        
+
         # Ensure results directory exists
         os.makedirs(results_dir, exist_ok=True)
-        
-        with open(filepath, 'w', newline='') as csvfile:
-            fieldnames = ['n_agents', 'sequential_time', 'parallel_time', 'speedup', 'per_agent_seq', 'per_agent_par']
+
+        with open(filepath, "w", newline="") as csvfile:
+            fieldnames = [
+                "n_agents",
+                "sequential_time",
+                "parallel_time",
+                "speedup",
+                "per_agent_seq",
+                "per_agent_par",
+            ]
             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            
+
             writer.writeheader()
             for result in self.results:
                 writer.writerow(result)
-        
+
         print(f"💾 Results saved to {filepath}")