@@ -159,7 +159,7 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
159159 Get the best program based on a metric
160160
161161 Args:
162- metric: Metric to use for ranking (uses average if None)
162+ metric: Metric to use for ranking (uses combined_score or average if None)
163163
164164 Returns:
165165 Best program or None if database is empty
@@ -169,6 +169,7 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
169169
170170 # If no specific metric and we have a tracked best program, return it
171171 if metric is None and self .best_program_id and self .best_program_id in self .programs :
172+ logger .debug (f"Using tracked best program: { self .best_program_id } " )
172173 return self .programs [self .best_program_id ]
173174
174175 if metric :
@@ -178,20 +179,40 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
178179 key = lambda p : p .metrics [metric ],
179180 reverse = True
180181 )
182+ if sorted_programs :
183+ logger .debug (f"Found best program by metric '{ metric } ': { sorted_programs [0 ].id } " )
184+ elif self .programs and all ("combined_score" in p .metrics for p in self .programs .values ()):
185+ # Sort by combined_score if it exists (preferred method)
186+ sorted_programs = sorted (
187+ self .programs .values (),
188+ key = lambda p : p .metrics ["combined_score" ],
189+ reverse = True
190+ )
191+ if sorted_programs :
192+ logger .debug (f"Found best program by combined_score: { sorted_programs [0 ].id } " )
181193 else :
182- # Sort by average of all metrics
194+ # Sort by average of all metrics as fallback
183195 sorted_programs = sorted (
184196 self .programs .values (),
185197 key = lambda p : sum (p .metrics .values ()) / max (1 , len (p .metrics )),
186198 reverse = True
187199 )
200+ if sorted_programs :
201+ logger .debug (f"Found best program by average metrics: { sorted_programs [0 ].id } " )
188202
189- # Update the best program tracking if we found a better program
190- if sorted_programs and (self .best_program_id is None or
191- sorted_programs [0 ].id != self .best_program_id ):
192- old_id = self .best_program_id
193- self .best_program_id = sorted_programs [0 ].id
194- logger .info (f"Updated best program tracking: { self .best_program_id } " )
203+ # Update the best program tracking if we found a better program
204+ if sorted_programs and (self .best_program_id is None or
205+ sorted_programs [0 ].id != self .best_program_id ):
206+ old_id = self .best_program_id
207+ self .best_program_id = sorted_programs [0 ].id
208+ logger .info (f"Updated best program tracking from { old_id } to { self .best_program_id } " )
209+
210+ # Also log the scores to help understand the update
211+ if old_id and old_id in self .programs and "combined_score" in self .programs [old_id ].metrics \
212+ and "combined_score" in self .programs [self .best_program_id ].metrics :
213+ old_score = self .programs [old_id ].metrics ["combined_score" ]
214+ new_score = self .programs [self .best_program_id ].metrics ["combined_score" ]
215+ logger .info (f"Score change: { old_score :.4f} → { new_score :.4f} ({ new_score - old_score :+.4f} )" )
195216
196217 return sorted_programs [0 ] if sorted_programs else None
197218
@@ -416,7 +437,11 @@ def _is_better(self, program1: Program, program2: Program) -> bool:
416437 if not program1 .metrics and program2 .metrics :
417438 return False
418439
419- # Compare average of metrics
440+ # Check for combined_score first (this is the preferred metric)
441+ if "combined_score" in program1 .metrics and "combined_score" in program2 .metrics :
442+ return program1 .metrics ["combined_score" ] > program2 .metrics ["combined_score" ]
443+
444+ # Fallback to average of all metrics
420445 avg1 = sum (program1 .metrics .values ()) / len (program1 .metrics )
421446 avg2 = sum (program2 .metrics .values ()) / len (program2 .metrics )
422447
@@ -466,18 +491,15 @@ def _update_best_program(self, program: Program) -> None:
466491 if self ._is_better (program , current_best ):
467492 old_id = self .best_program_id
468493 self .best_program_id = program .id
469- logger .info (f"New best program { program .id } replaces { old_id } " )
470494
471- # Log improvement in metrics
472- if program .metrics and current_best .metrics :
473- improvements = []
474- for metric , value in program .metrics .items ():
475- if metric in current_best .metrics :
476- diff = value - current_best .metrics [metric ]
477- improvements .append (f"{ metric } : { diff :+.4f} " )
478-
479- if improvements :
480- logger .info (f"Metric improvements: { ', ' .join (improvements )} " )
495+ # Log the change
496+ if "combined_score" in program .metrics and "combined_score" in current_best .metrics :
497+ old_score = current_best .metrics ["combined_score" ]
498+ new_score = program .metrics ["combined_score" ]
499+ score_diff = new_score - old_score
500+ logger .info (f"New best program { program .id } replaces { old_id } (combined_score: { old_score :.4f} → { new_score :.4f} , +{ score_diff :.4f} )" )
501+ else :
502+ logger .info (f"New best program { program .id } replaces { old_id } " )
481503
482504 def _sample_parent (self ) -> Program :
483505 """
0 commit comments