Merge pull request #287 from bluebread/novelty

codelion · web-flow · commit 9df937a4a423 · 2025-10-10T19:16:06.000+08:00
Add novelty rejection sampling feature
diff --git a/examples/function_minimization/config.yaml b/examples/function_minimization/config.yaml
@@ -1,17 +1,20 @@
 # Configuration for function minimization example
-max_iterations: 50
+max_iterations: 10
 checkpoint_interval: 5
 
 # LLM configuration
 llm:
-  primary_model: "gemini-2.5-flash-lite"
+  # primary_model: "gemini-2.5-flash-lite"
+  primary_model: "gpt-5-mini"
   # primary_model: "llama3.1-8b"
   primary_model_weight: 0.8
-  secondary_model: "gemini-2.5-flash"
+  # secondary_model: "gemini-2.5-flash"
   # secondary_model: "llama-4-scout-17b-16e-instruct"
+  secondary_model: "gpt-5-nano"
   secondary_model_weight: 0.2
-  api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
+  # api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
   # api_base: "https://api.cerebras.ai/v1"
+  api_base: "https://api.openai.com/v1"
   temperature: 0.7
   max_tokens: 16000
   timeout: 120
@@ -28,6 +31,9 @@ database:
   elite_selection_ratio: 0.2
   exploitation_ratio: 0.7
 
+  embedding_model: "text-embedding-3-small"
+  similarity_threshold: 0.99
+
 # Evaluator configuration
 evaluator:
   timeout: 60
diff --git a/openevolve/config.py b/openevolve/config.py
@@ -5,10 +5,13 @@
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
 
 import yaml
 
+if TYPE_CHECKING:
+    from openevolve.llm.base import LLMInterface
+
 
 @dataclass
 class LLMModelConfig:
@@ -283,6 +286,9 @@ class DatabaseConfig:
     cleanup_old_artifacts: bool = True
     artifact_retention_days: int = 30
 
+    novelty_llm: Optional["LLMInterface"] = None
+    embedding_model: Optional[str] = None
+    similarity_threshold: float = 0.99
 
 @dataclass
 class EvaluatorConfig:
diff --git a/openevolve/controller.py b/openevolve/controller.py
@@ -154,6 +154,7 @@ def __init__(
         if self.config.random_seed is not None:
             self.config.database.random_seed = self.config.random_seed
 
+        self.config.database.novelty_llm = self.llm_ensemble
         self.database = ProgramDatabase(self.config.database)
 
         self.evaluator = Evaluator(
diff --git a/openevolve/database.py b/openevolve/database.py
@@ -72,6 +72,9 @@ class Program:
     artifacts_json: Optional[str] = None  # JSON-serialized small artifacts
     artifact_dir: Optional[str] = None  # Path to large artifact files
 
+    # Embedding vector for novelty rejection sampling
+    embedding: Optional[List[float]] = None
+
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary representation"""
         return asdict(self)
@@ -183,6 +186,13 @@ def __init__(self, config: DatabaseConfig):
             }
 
         logger.info(f"Initialized program database with {len(self.programs)} programs")
+        
+        # Novelty judge setup
+        from openevolve.embedding import EmbeddingClient
+        self.novelty_llm = config.novelty_llm
+        self.embedding_client = EmbeddingClient(config.embedding_model) if config.embedding_model else None
+        self.similarity_threshold = config.similarity_threshold
+            
 
     def add(
         self, program: Program, iteration: int = None, target_island: Optional[int] = None
@@ -240,6 +250,11 @@ def add(
 
         island_idx = island_idx % len(self.islands)  # Ensure valid island
 
+        # Novelty check before adding
+        if not self._is_novel(program.id, island_idx):
+            logger.debug(f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}")
+            return program.id  # Do not add non-novel program
+
         # Add to island-specific feature map (replacing existing if better)
         feature_key = self._feature_coords_to_key(feature_coords)
         island_feature_map = self.island_feature_maps[island_idx]
@@ -931,6 +946,120 @@ def _feature_coords_to_key(self, coords: List[int]) -> str:
         """
         return "-".join(str(c) for c in coords)
 
+    def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
+        """
+        Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
+        Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/database/dbase.py#L1452
+        
+        Compute cosine similarity between two vectors.
+        """
+        if not vec1 or not vec2 or len(vec1) != len(vec2):
+            return 0.0
+
+        arr1 = np.array(vec1, dtype=np.float32)
+        arr2 = np.array(vec2, dtype=np.float32)
+
+        norm_a = np.linalg.norm(arr1)
+        norm_b = np.linalg.norm(arr2)
+
+        if norm_a == 0 or norm_b == 0:
+            return 0.0
+
+        similarity = np.dot(arr1, arr2) / (norm_a * norm_b)
+        
+        return float(similarity)
+    
+    def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool:
+        """
+        Use LLM to judge if a program is novel compared to a similar existing program
+        """
+        import asyncio
+        from openevolve.novelty_judge import NOVELTY_SYSTEM_MSG, NOVELTY_USER_MSG
+        
+        user_msg = NOVELTY_USER_MSG.format(
+            language=program.language,
+            existing_code=similar_program.code,
+            proposed_code=program.code,
+        )
+        
+        try:
+            content: str = asyncio.run(
+                self.novelty_llm.generate_with_context(
+                    system_msg=NOVELTY_SYSTEM_MSG,
+                    messages=[{"role": "user", "content": user_msg}],
+                )
+            )
+
+            if content is None or content is None:
+                logger.warning("Novelty LLM returned empty response")
+                return True
+
+            content = content.strip()
+
+            # Parse the response
+            NOVEL_i = content.upper().find("NOVEL")
+            NOT_NOVEL_i = content.upper().find("NOT NOVEL")
+            
+            if NOVEL_i == -1 and NOT_NOVEL_i == -1:
+                logger.warning(f"Unexpected novelty LLM response: {content}")
+                return True  # Assume novel if we can't parse
+            
+            if NOVEL_i != -1 and NOT_NOVEL_i != -1:
+                # Both found, take the one that appears first
+                is_novel = NOVEL_i < NOT_NOVEL_i
+            elif NOVEL_i != -1:
+                is_novel = True
+            else:
+                is_novel = False
+                
+            return is_novel
+
+        except Exception as e:
+            logger.error(f"Error in novelty LLM check: {e}")
+    
+        return True
+    
+    def _is_novel(self, program_id: int, island_idx: int) -> bool:
+        """
+        Determine if a program is novel based on diversity to existing programs
+
+        Args:
+            program: Program to check
+            island_idx: Island index
+            
+        Returns:
+            True if novel, False otherwise
+        """
+        if self.embedding_client is None or self.similarity_threshold <= 0.0:
+            # Novelty checking disabled
+            return True
+
+        program = self.programs[program_id]
+        embd = self.embedding_client.get_embedding(program.code)
+        self.programs[program_id].embedding = embd
+        
+        max_smlty = float('-inf')
+        max_smlty_pid = None
+        
+        for pid in self.islands[island_idx]:
+            other = self.programs[pid]
+            
+            if other.embedding is None:
+                logger.log("Warning: Program %s has no embedding, skipping similarity check", other.id)
+                continue
+            
+            similarity = self._cosine_similarity(embd, other.embedding)
+            
+            if similarity >= max(max_smlty, self.similarity_threshold):
+                max_smlty = similarity
+                max_smlty_pid = pid
+            
+        if max_smlty_pid is None:
+            # No similar programs found, consider it novel
+            return True
+            
+        return self._llm_judge_novelty(program, self.programs[max_smlty_pid])
+
     def _is_better(self, program1: Program, program2: Program) -> bool:
         """
         Determine if program1 has better FITNESS than program2
diff --git a/openevolve/embedding.py b/openevolve/embedding.py
@@ -0,0 +1,91 @@
+"""
+Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
+Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py
+"""
+
+import os
+import openai
+from typing import Union, List
+import logging
+
+logger = logging.getLogger(__name__)
+
+M = 1_000_000
+
+OPENAI_EMBEDDING_MODELS = [
+    "text-embedding-3-small",
+    "text-embedding-3-large",
+]
+
+AZURE_EMBEDDING_MODELS = [
+    "azure-text-embedding-3-small",
+    "azure-text-embedding-3-large",
+]
+
+OPENAI_EMBEDDING_COSTS = {
+    "text-embedding-3-small": 0.02 / M,
+    "text-embedding-3-large": 0.13 / M,
+}
+
+class EmbeddingClient:
+    def __init__(
+        self, model_name: str = "text-embedding-3-small"):
+        """
+        Initialize the EmbeddingClient.
+
+        Args:
+            model (str): The OpenAI embedding model name to use.
+        """
+        self.client, self.model = self._get_client_model(model_name)
+    
+    def _get_client_model(self, model_name: str) -> tuple[openai.OpenAI, str]:
+        if model_name in OPENAI_EMBEDDING_MODELS:
+            client = openai.OpenAI()
+            model_to_use = model_name
+        elif model_name in AZURE_EMBEDDING_MODELS:
+            # get rid of the azure- prefix
+            model_to_use = model_name.split("azure-")[-1]
+            client = openai.AzureOpenAI(
+                api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+                api_version=os.getenv("AZURE_API_VERSION"),
+                azure_endpoint=os.getenv("AZURE_API_ENDPOINT"),
+            )
+        else:
+            raise ValueError(f"Invalid embedding model: {model_name}")
+
+        return client, model_to_use
+
+    def get_embedding(
+        self, code: Union[str, List[str]]
+    ) -> Union[List[float], List[List[float]]]:
+        """
+        Computes the text embedding for a code string.
+
+        Args:
+            code (str, list[str]): The code as a string or list
+                of strings.
+
+        Returns:
+            list: Embedding vector for the code or None if an error
+                occurs.
+        """
+        if isinstance(code, str):
+            code = [code]
+            single_code = True
+        else:
+            single_code = False
+        try:
+            response = self.client.embeddings.create(
+                model=self.model, input=code, encoding_format="float"
+            )
+            # Extract embedding from response
+            if single_code:
+                return response.data[0].embedding
+            else:
+                return [d.embedding for d in response.data]
+        except Exception as e:
+            logger.info(f"Error getting embedding: {e}")
+            if single_code:
+                return [], 0.0
+            else:
+                return [[]], 0.0
diff --git a/openevolve/novelty_judge.py b/openevolve/novelty_judge.py
@@ -0,0 +1,43 @@
+"""
+Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
+Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py
+
+Prompt templates for novelty judging using LLMs.
+"""
+
+NOVELTY_SYSTEM_MSG = """You are an expert code reviewer tasked with determining if two code snippets are meaningfully different from each other.
+
+Your job is to analyze both programs and determine if the proposed code introduces meaningful changes compared to the existing code. Consider:
+
+1. **Algorithmic differences**: Different approaches, logic, or strategies
+2. **Structural changes**: Different data structures, control flow, or organization
+3. **Functional improvements**: New features, optimizations, or capabilities
+4. **Implementation variations**: Different ways of achieving the same goal that could lead to different performance characteristics
+5. **Hyperparameter changes**: Different hyperparameters that could lead to different performance characteristics
+
+Ignore trivial differences like:
+- Variable name changes
+- Minor formatting or style changes
+- Comments or documentation changes
+- Insignificant refactoring that doesn't change the core logic
+
+Respond with:
+- **NOVEL**: If the codes are meaningfully different
+- **NOT_NOVEL**: If the codes are essentially the same with only trivial differences
+
+After your decision, provide a brief explanation of your reasoning."""
+
+
+NOVELTY_USER_MSG = """Please analyze these two code snippets:
+
+**EXISTING CODE:**
+```{language}
+{existing_code}
+```
+
+**PROPOSED CODE:**
+```{language}
+{proposed_code}
+```
+
+Are these codes meaningfully different? Respond with NOVEL or NOT_NOVEL followed by your explanation."""
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
@@ -295,6 +295,10 @@ def __init__(self, config: Config, evaluation_file: str, database: ProgramDataba
     def _serialize_config(self, config: Config) -> dict:
         """Serialize config object to a dictionary that can be pickled"""
         # Manual serialization to handle nested objects properly
+
+        # The asdict() call itself triggers the deepcopy which tries to serialize novelty_llm. Remove it first.
+        config.database.novelty_llm = None
+        
         return {
             "llm": {
                 "models": [asdict(m) for m in config.llm.models],