Skip to content

Commit 9df937a

Browse files
authored
Merge pull request #287 from bluebread/novelty
Add novelty rejection sampling feature
2 parents c736a30 + 1ea8cc6 commit 9df937a

File tree

7 files changed

+285
-5
lines changed

7 files changed

+285
-5
lines changed

examples/function_minimization/config.yaml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
# Configuration for function minimization example
2-
max_iterations: 50
2+
max_iterations: 10
33
checkpoint_interval: 5
44

55
# LLM configuration
66
llm:
7-
primary_model: "gemini-2.5-flash-lite"
7+
# primary_model: "gemini-2.5-flash-lite"
8+
primary_model: "gpt-5-mini"
89
# primary_model: "llama3.1-8b"
910
primary_model_weight: 0.8
10-
secondary_model: "gemini-2.5-flash"
11+
# secondary_model: "gemini-2.5-flash"
1112
# secondary_model: "llama-4-scout-17b-16e-instruct"
13+
secondary_model: "gpt-5-nano"
1214
secondary_model_weight: 0.2
13-
api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
15+
# api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
1416
# api_base: "https://api.cerebras.ai/v1"
17+
api_base: "https://api.openai.com/v1"
1518
temperature: 0.7
1619
max_tokens: 16000
1720
timeout: 120
@@ -28,6 +31,9 @@ database:
2831
elite_selection_ratio: 0.2
2932
exploitation_ratio: 0.7
3033

34+
embedding_model: "text-embedding-3-small"
35+
similarity_threshold: 0.99
36+
3137
# Evaluator configuration
3238
evaluator:
3339
timeout: 60

openevolve/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
import os
66
from dataclasses import dataclass, field
77
from pathlib import Path
8-
from typing import Any, Callable, Dict, List, Optional, Union
8+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
99

1010
import yaml
1111

12+
if TYPE_CHECKING:
13+
from openevolve.llm.base import LLMInterface
14+
1215

1316
@dataclass
1417
class LLMModelConfig:
@@ -283,6 +286,9 @@ class DatabaseConfig:
283286
cleanup_old_artifacts: bool = True
284287
artifact_retention_days: int = 30
285288

289+
novelty_llm: Optional["LLMInterface"] = None
290+
embedding_model: Optional[str] = None
291+
similarity_threshold: float = 0.99
286292

287293
@dataclass
288294
class EvaluatorConfig:

openevolve/controller.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def __init__(
154154
if self.config.random_seed is not None:
155155
self.config.database.random_seed = self.config.random_seed
156156

157+
self.config.database.novelty_llm = self.llm_ensemble
157158
self.database = ProgramDatabase(self.config.database)
158159

159160
self.evaluator = Evaluator(

openevolve/database.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ class Program:
7272
artifacts_json: Optional[str] = None # JSON-serialized small artifacts
7373
artifact_dir: Optional[str] = None # Path to large artifact files
7474

75+
# Embedding vector for novelty rejection sampling
76+
embedding: Optional[List[float]] = None
77+
7578
def to_dict(self) -> Dict[str, Any]:
7679
"""Convert to dictionary representation"""
7780
return asdict(self)
@@ -183,6 +186,13 @@ def __init__(self, config: DatabaseConfig):
183186
}
184187

185188
logger.info(f"Initialized program database with {len(self.programs)} programs")
189+
190+
# Novelty judge setup
191+
from openevolve.embedding import EmbeddingClient
192+
self.novelty_llm = config.novelty_llm
193+
self.embedding_client = EmbeddingClient(config.embedding_model) if config.embedding_model else None
194+
self.similarity_threshold = config.similarity_threshold
195+
186196

187197
def add(
188198
self, program: Program, iteration: int = None, target_island: Optional[int] = None
@@ -240,6 +250,11 @@ def add(
240250

241251
island_idx = island_idx % len(self.islands) # Ensure valid island
242252

253+
# Novelty check before adding
254+
if not self._is_novel(program.id, island_idx):
255+
logger.debug(f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}")
256+
return program.id # Do not add non-novel program
257+
243258
# Add to island-specific feature map (replacing existing if better)
244259
feature_key = self._feature_coords_to_key(feature_coords)
245260
island_feature_map = self.island_feature_maps[island_idx]
@@ -931,6 +946,120 @@ def _feature_coords_to_key(self, coords: List[int]) -> str:
931946
"""
932947
return "-".join(str(c) for c in coords)
933948

949+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
950+
"""
951+
Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
952+
Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/database/dbase.py#L1452
953+
954+
Compute cosine similarity between two vectors.
955+
"""
956+
if not vec1 or not vec2 or len(vec1) != len(vec2):
957+
return 0.0
958+
959+
arr1 = np.array(vec1, dtype=np.float32)
960+
arr2 = np.array(vec2, dtype=np.float32)
961+
962+
norm_a = np.linalg.norm(arr1)
963+
norm_b = np.linalg.norm(arr2)
964+
965+
if norm_a == 0 or norm_b == 0:
966+
return 0.0
967+
968+
similarity = np.dot(arr1, arr2) / (norm_a * norm_b)
969+
970+
return float(similarity)
971+
972+
def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool:
973+
"""
974+
Use LLM to judge if a program is novel compared to a similar existing program
975+
"""
976+
import asyncio
977+
from openevolve.novelty_judge import NOVELTY_SYSTEM_MSG, NOVELTY_USER_MSG
978+
979+
user_msg = NOVELTY_USER_MSG.format(
980+
language=program.language,
981+
existing_code=similar_program.code,
982+
proposed_code=program.code,
983+
)
984+
985+
try:
986+
content: str = asyncio.run(
987+
self.novelty_llm.generate_with_context(
988+
system_msg=NOVELTY_SYSTEM_MSG,
989+
messages=[{"role": "user", "content": user_msg}],
990+
)
991+
)
992+
993+
if content is None or content is None:
994+
logger.warning("Novelty LLM returned empty response")
995+
return True
996+
997+
content = content.strip()
998+
999+
# Parse the response
1000+
NOVEL_i = content.upper().find("NOVEL")
1001+
NOT_NOVEL_i = content.upper().find("NOT NOVEL")
1002+
1003+
if NOVEL_i == -1 and NOT_NOVEL_i == -1:
1004+
logger.warning(f"Unexpected novelty LLM response: {content}")
1005+
return True # Assume novel if we can't parse
1006+
1007+
if NOVEL_i != -1 and NOT_NOVEL_i != -1:
1008+
# Both found, take the one that appears first
1009+
is_novel = NOVEL_i < NOT_NOVEL_i
1010+
elif NOVEL_i != -1:
1011+
is_novel = True
1012+
else:
1013+
is_novel = False
1014+
1015+
return is_novel
1016+
1017+
except Exception as e:
1018+
logger.error(f"Error in novelty LLM check: {e}")
1019+
1020+
return True
1021+
1022+
def _is_novel(self, program_id: int, island_idx: int) -> bool:
1023+
"""
1024+
Determine if a program is novel based on diversity to existing programs
1025+
1026+
Args:
1027+
program: Program to check
1028+
island_idx: Island index
1029+
1030+
Returns:
1031+
True if novel, False otherwise
1032+
"""
1033+
if self.embedding_client is None or self.similarity_threshold <= 0.0:
1034+
# Novelty checking disabled
1035+
return True
1036+
1037+
program = self.programs[program_id]
1038+
embd = self.embedding_client.get_embedding(program.code)
1039+
self.programs[program_id].embedding = embd
1040+
1041+
max_smlty = float('-inf')
1042+
max_smlty_pid = None
1043+
1044+
for pid in self.islands[island_idx]:
1045+
other = self.programs[pid]
1046+
1047+
if other.embedding is None:
1048+
logger.log("Warning: Program %s has no embedding, skipping similarity check", other.id)
1049+
continue
1050+
1051+
similarity = self._cosine_similarity(embd, other.embedding)
1052+
1053+
if similarity >= max(max_smlty, self.similarity_threshold):
1054+
max_smlty = similarity
1055+
max_smlty_pid = pid
1056+
1057+
if max_smlty_pid is None:
1058+
# No similar programs found, consider it novel
1059+
return True
1060+
1061+
return self._llm_judge_novelty(program, self.programs[max_smlty_pid])
1062+
9341063
def _is_better(self, program1: Program, program2: Program) -> bool:
9351064
"""
9361065
Determine if program1 has better FITNESS than program2

openevolve/embedding.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""
2+
Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
3+
Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py
4+
"""
5+
6+
import os
7+
import openai
8+
from typing import Union, List
9+
import logging
10+
11+
logger = logging.getLogger(__name__)
12+
13+
M = 1_000_000
14+
15+
OPENAI_EMBEDDING_MODELS = [
16+
"text-embedding-3-small",
17+
"text-embedding-3-large",
18+
]
19+
20+
AZURE_EMBEDDING_MODELS = [
21+
"azure-text-embedding-3-small",
22+
"azure-text-embedding-3-large",
23+
]
24+
25+
OPENAI_EMBEDDING_COSTS = {
26+
"text-embedding-3-small": 0.02 / M,
27+
"text-embedding-3-large": 0.13 / M,
28+
}
29+
30+
class EmbeddingClient:
31+
def __init__(
32+
self, model_name: str = "text-embedding-3-small"):
33+
"""
34+
Initialize the EmbeddingClient.
35+
36+
Args:
37+
model (str): The OpenAI embedding model name to use.
38+
"""
39+
self.client, self.model = self._get_client_model(model_name)
40+
41+
def _get_client_model(self, model_name: str) -> tuple[openai.OpenAI, str]:
42+
if model_name in OPENAI_EMBEDDING_MODELS:
43+
client = openai.OpenAI()
44+
model_to_use = model_name
45+
elif model_name in AZURE_EMBEDDING_MODELS:
46+
# get rid of the azure- prefix
47+
model_to_use = model_name.split("azure-")[-1]
48+
client = openai.AzureOpenAI(
49+
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
50+
api_version=os.getenv("AZURE_API_VERSION"),
51+
azure_endpoint=os.getenv("AZURE_API_ENDPOINT"),
52+
)
53+
else:
54+
raise ValueError(f"Invalid embedding model: {model_name}")
55+
56+
return client, model_to_use
57+
58+
def get_embedding(
59+
self, code: Union[str, List[str]]
60+
) -> Union[List[float], List[List[float]]]:
61+
"""
62+
Computes the text embedding for a code string.
63+
64+
Args:
65+
code (str, list[str]): The code as a string or list
66+
of strings.
67+
68+
Returns:
69+
list: Embedding vector for the code or None if an error
70+
occurs.
71+
"""
72+
if isinstance(code, str):
73+
code = [code]
74+
single_code = True
75+
else:
76+
single_code = False
77+
try:
78+
response = self.client.embeddings.create(
79+
model=self.model, input=code, encoding_format="float"
80+
)
81+
# Extract embedding from response
82+
if single_code:
83+
return response.data[0].embedding
84+
else:
85+
return [d.embedding for d in response.data]
86+
except Exception as e:
87+
logger.info(f"Error getting embedding: {e}")
88+
if single_code:
89+
return [], 0.0
90+
else:
91+
return [[]], 0.0

openevolve/novelty_judge.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License)
3+
Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py
4+
5+
Prompt templates for novelty judging using LLMs.
6+
"""
7+
8+
NOVELTY_SYSTEM_MSG = """You are an expert code reviewer tasked with determining if two code snippets are meaningfully different from each other.
9+
10+
Your job is to analyze both programs and determine if the proposed code introduces meaningful changes compared to the existing code. Consider:
11+
12+
1. **Algorithmic differences**: Different approaches, logic, or strategies
13+
2. **Structural changes**: Different data structures, control flow, or organization
14+
3. **Functional improvements**: New features, optimizations, or capabilities
15+
4. **Implementation variations**: Different ways of achieving the same goal that could lead to different performance characteristics
16+
5. **Hyperparameter changes**: Different hyperparameters that could lead to different performance characteristics
17+
18+
Ignore trivial differences like:
19+
- Variable name changes
20+
- Minor formatting or style changes
21+
- Comments or documentation changes
22+
- Insignificant refactoring that doesn't change the core logic
23+
24+
Respond with:
25+
- **NOVEL**: If the codes are meaningfully different
26+
- **NOT_NOVEL**: If the codes are essentially the same with only trivial differences
27+
28+
After your decision, provide a brief explanation of your reasoning."""
29+
30+
31+
NOVELTY_USER_MSG = """Please analyze these two code snippets:
32+
33+
**EXISTING CODE:**
34+
```{language}
35+
{existing_code}
36+
```
37+
38+
**PROPOSED CODE:**
39+
```{language}
40+
{proposed_code}
41+
```
42+
43+
Are these codes meaningfully different? Respond with NOVEL or NOT_NOVEL followed by your explanation."""

openevolve/process_parallel.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ def __init__(self, config: Config, evaluation_file: str, database: ProgramDataba
295295
def _serialize_config(self, config: Config) -> dict:
296296
"""Serialize config object to a dictionary that can be pickled"""
297297
# Manual serialization to handle nested objects properly
298+
299+
# The asdict() call itself triggers the deepcopy which tries to serialize novelty_llm. Remove it first.
300+
config.database.novelty_llm = None
301+
298302
return {
299303
"llm": {
300304
"models": [asdict(m) for m in config.llm.models],

0 commit comments

Comments
 (0)