22Performance benchmark framework for mesa-llm
33"""
44
5- import time
6- import asyncio
7- import os
85import csv
6+ import os
97import statistics
10- from typing import List , Dict
11-
12- from .parallel_stepping import enable_automatic_parallel_stepping , step_agents_parallel
8+ import time
139
1410
1511class PerformanceBenchmark :
1612 """Performance testing and analysis framework"""
17-
13+
1814 def __init__ (self ):
19- self .results : List [Dict ] = []
20-
21- def run_single_test (self , n_agents : int , runs : int = 3 , test_model_class = None ) -> Dict :
15+ self .results : list [dict ] = []
16+
17+ def run_single_test (
18+ self , n_agents : int , runs : int = 3 , test_model_class = None
19+ ) -> dict :
2220 """Run performance test for specific agent count"""
2321 print (f"\n 🔬 Testing { n_agents } agents..." )
24-
22+
2523 # Import test models if not provided
2624 if test_model_class is None :
2725 from tests .test_models import PerformanceTestModel
26+
2827 test_model_class = PerformanceTestModel
29-
28+
3029 sequential_times = []
3130 parallel_times = []
32-
31+
3332 for run in range (runs ):
3433 print (f" Run { run + 1 } /{ runs } ..." )
35-
34+
3635 # Test sequential execution
3736 start_time = time .time ()
3837 model_seq = test_model_class (n_agents = n_agents , enable_parallel = False )
3938 creation_time = time .time () - start_time
40-
39+
4140 step_start = time .time ()
4241 model_seq .step_sequential ()
4342 step_time = time .time () - step_start
4443 sequential_times .append (step_time )
45-
44+
4645 # Test parallel execution
4746 start_time = time .time ()
4847 model_par = test_model_class (n_agents = n_agents , enable_parallel = True )
4948 step_start = time .time ()
5049 model_par .step_parallel ()
5150 step_time = time .time () - step_start
5251 parallel_times .append (step_time )
53-
54- print (f" Sequential: { sequential_times [- 1 ]:.2f} s, Parallel: { parallel_times [- 1 ]:.2f} s" )
55-
52+
53+ print (
54+ f" Sequential: { sequential_times [- 1 ]:.2f} s, Parallel: { parallel_times [- 1 ]:.2f} s"
55+ )
56+
5657 # Calculate statistics
5758 avg_seq = statistics .mean (sequential_times )
5859 avg_par = statistics .mean (parallel_times )
59- speedup = avg_seq / avg_par if avg_par > 0 else float (' inf' )
60-
60+ speedup = avg_seq / avg_par if avg_par > 0 else float (" inf" )
61+
6162 result = {
62- ' n_agents' : n_agents ,
63- ' sequential_time' : avg_seq ,
64- ' parallel_time' : avg_par ,
65- ' speedup' : speedup ,
66- ' per_agent_seq' : avg_seq / n_agents ,
67- ' per_agent_par' : avg_par / n_agents
63+ " n_agents" : n_agents ,
64+ " sequential_time" : avg_seq ,
65+ " parallel_time" : avg_par ,
66+ " speedup" : speedup ,
67+ " per_agent_seq" : avg_seq / n_agents ,
68+ " per_agent_par" : avg_par / n_agents ,
6869 }
69-
70- print (f" 📊 Results: Sequential { avg_seq :.2f} s, Parallel { avg_par :.2f} s, Speedup { speedup :.2f} x" )
70+
71+ print (
72+ f" 📊 Results: Sequential { avg_seq :.2f} s, Parallel { avg_par :.2f} s, Speedup { speedup :.2f} x"
73+ )
7174 return result
72-
73- def run_benchmark (self , agent_counts : List [int ] = None , test_model_class = None ) -> List [Dict ]:
75+
76+ def run_benchmark (
77+ self , agent_counts : list [int ] = None , test_model_class = None
78+ ) -> list [dict ]:
7479 """Run comprehensive performance benchmark"""
7580 if agent_counts is None :
7681 agent_counts = [5 , 10 , 15 , 20 , 25 , 30 , 40 , 50 ]
77-
82+
7883 self .results = []
79-
84+
8085 print ("🚀 Mesa-LLM Performance Benchmark" )
8186 print ("=" * 50 )
8287 print ("📋 Testing parallel vs sequential execution" )
8388 print ("⚠️ Using 10ms simulated LLM work per agent" )
8489 print ("" )
85-
90+
8691 for n_agents in agent_counts :
87- result = self .run_single_test (n_agents , runs = 3 , test_model_class = test_model_class )
92+ result = self .run_single_test (
93+ n_agents , runs = 3 , test_model_class = test_model_class
94+ )
8895 self .results .append (result )
89-
96+
9097 return self .results
91-
98+
9299 def print_summary (self ):
93100 """Print comprehensive performance analysis"""
94101 print ("\n 📈 PERFORMANCE BENCHMARK RESULTS" )
95102 print ("=" * 80 )
96-
97- print (f"{ 'Agents' :<8} { 'Sequential' :<12} { 'Parallel' :<12} { 'Speedup' :<10} { 'Efficiency' :<12} " )
103+
104+ print (
105+ f"{ 'Agents' :<8} { 'Sequential' :<12} { 'Parallel' :<12} { 'Speedup' :<10} { 'Efficiency' :<12} "
106+ )
98107 print ("-" * 80 )
99-
108+
100109 for result in self .results :
101- n_agents = result ['n_agents' ]
102- seq_time = result ['sequential_time' ]
103- par_time = result ['parallel_time' ]
104- speedup = result ['speedup' ]
105- efficiency = speedup / n_agents if speedup != float ('inf' ) else 0
106-
107- print (f"{ n_agents :<8} { seq_time :<12.2f} { par_time :<12.2f} "
108- f"{ speedup :<10.2f} x { efficiency :<12.4f} " )
109-
110+ n_agents = result ["n_agents" ]
111+ seq_time = result ["sequential_time" ]
112+ par_time = result ["parallel_time" ]
113+ speedup = result ["speedup" ]
114+ efficiency = speedup / n_agents if speedup != float ("inf" ) else 0
115+
116+ print (
117+ f"{ n_agents :<8} { seq_time :<12.2f} { par_time :<12.2f} "
118+ f"{ speedup :<10.2f} x { efficiency :<12.4f} "
119+ )
120+
110121 print ("\n 🔍 Performance Analysis:" )
111-
122+
112123 # Check scaling characteristics
113124 if len (self .results ) >= 3 :
114125 first_result = self .results [0 ]
115126 last_result = self .results [- 1 ]
116-
117- seq_scaling = ( last_result [' per_agent_seq' ] / first_result [' per_agent_seq' ])
118- par_scaling = ( last_result [' per_agent_par' ] / first_result [' per_agent_par' ])
119-
127+
128+ seq_scaling = last_result [" per_agent_seq" ] / first_result [" per_agent_seq" ]
129+ par_scaling = last_result [" per_agent_par" ] / first_result [" per_agent_par" ]
130+
120131 print (f"Sequential scaling factor: { seq_scaling :.2f} x (1.0 = ideal)" )
121132 print (f"Parallel scaling factor: { par_scaling :.2f} x (1.0 = ideal)" )
122-
133+
123134 # Evaluate sequential scaling
124135 if seq_scaling > 2.0 :
125136 print ("⚠️ SEQUENTIAL: Exponential scaling detected!" )
126137 elif seq_scaling > 1.5 :
127138 print ("⚠️ SEQUENTIAL: Sub-linear scaling" )
128139 else :
129140 print ("✅ SEQUENTIAL: Perfect linear scaling" )
130-
141+
131142 # Evaluate parallel scaling
132143 if par_scaling > 2.0 :
133144 print ("⚠️ PARALLEL: Exponential scaling detected!" )
134145 elif par_scaling > 1.5 :
135146 print ("⚠️ PARALLEL: Sub-linear scaling" )
136147 else :
137148 print ("✅ PARALLEL: Good linear scaling" )
138-
149+
139150 # Evaluate speedup
140- valid_speedups = [r ['speedup' ] for r in self .results if r ['speedup' ] != float ('inf' )]
151+ valid_speedups = [
152+ r ["speedup" ] for r in self .results if r ["speedup" ] != float ("inf" )
153+ ]
141154 if valid_speedups :
142155 avg_speedup = statistics .mean (valid_speedups )
143156 print (f"Average speedup: { avg_speedup :.2f} x" )
144-
157+
145158 if avg_speedup > 5.0 :
146159 print ("🎉 EXCELLENT: Parallel provides outstanding speedup!" )
147160 elif avg_speedup > 3.0 :
@@ -152,37 +165,46 @@ def print_summary(self):
152165 print ("⚠️ MINIMAL: Parallel provides small speedup" )
153166 else :
154167 print ("❌ POOR: Parallel provides no speedup" )
155-
168+
156169 print ("\n 💡 Key Insights:" )
157170 print (" • Each agent simulates 10ms LLM API response time" )
158171 print (" • Parallel execution processes agents concurrently" )
159172 print (" • Speedup demonstrates effectiveness of optimizations" )
160173 print (" • Linear scaling confirms no performance bottlenecks" )
161-
174+
162175 print ("\n 📝 Notes:" )
163176 print (" • This benchmark tests parallel stepping infrastructure" )
164177 print (" • Real-world performance depends on actual API response times" )
165178 print (" • Results demonstrate performance optimizations work correctly" )
166-
179+
167180 def save_results (self , filename : str = "benchmark_results.csv" ):
168181 """Save benchmark results to CSV file"""
169182 if not self .results :
170183 print ("No results to save!" )
171184 return
172-
185+
173186 # Save to results directory
174- results_dir = os .path .join (os .path .dirname (os .path .dirname (__file__ )), "results" )
187+ results_dir = os .path .join (
188+ os .path .dirname (os .path .dirname (__file__ )), "results"
189+ )
175190 filepath = os .path .join (results_dir , filename )
176-
191+
177192 # Ensure results directory exists
178193 os .makedirs (results_dir , exist_ok = True )
179-
180- with open (filepath , 'w' , newline = '' ) as csvfile :
181- fieldnames = ['n_agents' , 'sequential_time' , 'parallel_time' , 'speedup' , 'per_agent_seq' , 'per_agent_par' ]
194+
195+ with open (filepath , "w" , newline = "" ) as csvfile :
196+ fieldnames = [
197+ "n_agents" ,
198+ "sequential_time" ,
199+ "parallel_time" ,
200+ "speedup" ,
201+ "per_agent_seq" ,
202+ "per_agent_par" ,
203+ ]
182204 writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
183-
205+
184206 writer .writeheader ()
185207 for result in self .results :
186208 writer .writerow (result )
187-
209+
188210 print (f"💾 Results saved to { filepath } " )
0 commit comments