@@ -636,7 +636,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
636636 if dim == "complexity" :
637637 # Use code length as complexity measure
638638 complexity = len (program .code )
639- bin_idx = min ( int ( complexity / 1000 * self .feature_bins ), self . feature_bins - 1 )
639+ bin_idx = self ._calculate_complexity_bin ( complexity )
640640 coords .append (bin_idx )
641641 elif dim == "diversity" :
642642 # Use average edit distance to other programs
@@ -650,9 +650,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
650650 calculate_edit_distance (program .code , other .code )
651651 for other in sample_programs
652652 ) / len (sample_programs )
653- bin_idx = min (
654- int (avg_distance / 1000 * self .feature_bins ), self .feature_bins - 1
655- )
653+ bin_idx = self ._calculate_diversity_bin (avg_distance )
656654 coords .append (bin_idx )
657655 elif dim == "score" :
658656 # Use average of numeric metrics
@@ -677,6 +675,87 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
677675 )
678676 return coords
679677
678+ def _calculate_complexity_bin (self , complexity : int ) -> int :
679+ """
680+ Calculate the bin index for a given complexity value using adaptive binning.
681+
682+ Args:
683+ complexity: The complexity value (code length)
684+
685+ Returns:
686+ Bin index in range [0, self.feature_bins - 1]
687+ """
688+ if len (self .programs ) < 2 :
689+ # Cold start: use fixed range binning
690+ # Assume reasonable range of 0-10000 characters for code length
691+ max_complexity = 10000
692+ min_complexity = 0
693+ else :
694+ # Adaptive binning: use actual range from existing programs
695+ existing_complexities = [len (p .code ) for p in self .programs .values ()]
696+ min_complexity = min (existing_complexities )
697+ max_complexity = max (existing_complexities )
698+
699+ # Ensure range is not zero
700+ if max_complexity == min_complexity :
701+ max_complexity = min_complexity + 1
702+
703+ # Normalize complexity to [0, 1] range
704+ if max_complexity > min_complexity :
705+ normalized = (complexity - min_complexity ) / (max_complexity - min_complexity )
706+ else :
707+ normalized = 0.0
708+
709+ # Clamp to [0, 1] range
710+ normalized = max (0.0 , min (1.0 , normalized ))
711+
712+ # Convert to bin index
713+ bin_idx = int (normalized * self .feature_bins )
714+
715+ # Ensure bin index is within valid range
716+ bin_idx = max (0 , min (self .feature_bins - 1 , bin_idx ))
717+
718+ return bin_idx
719+
720+ def _calculate_diversity_bin (self , avg_distance : float ) -> int :
721+ """
722+ Calculate the bin index for a given diversity value using adaptive binning.
723+
724+ Args:
725+ avg_distance: The average edit distance to other programs
726+
727+ Returns:
728+ Bin index in range [0, self.feature_bins - 1]
729+ """
730+ if len (self .programs ) < 2 :
731+ # Cold start: use fixed range binning
732+ # Assume reasonable range of 0-10000 for edit distance
733+ max_distance = 10000
734+ min_distance = 0
735+ else :
736+ # For diversity, we could calculate the actual range, but edit distance
737+ # computation is expensive. Use a reasonable fixed range instead.
738+ # Edit distances typically range from 0 to several thousand
739+ max_distance = 5000
740+ min_distance = 0
741+
742+ # Normalize distance to [0, 1] range
743+ if max_distance > min_distance :
744+ normalized = (avg_distance - min_distance ) / (max_distance - min_distance )
745+ else :
746+ normalized = 0.0
747+
748+ # Clamp to [0, 1] range
749+ normalized = max (0.0 , min (1.0 , normalized ))
750+
751+ # Convert to bin index
752+ bin_idx = int (normalized * self .feature_bins )
753+
754+ # Ensure bin index is within valid range
755+ bin_idx = max (0 , min (self .feature_bins - 1 , bin_idx ))
756+
757+ return bin_idx
758+
680759 def _feature_coords_to_key (self , coords : List [int ]) -> str :
681760 """
682761 Convert feature coordinates to a string key
0 commit comments