2323
2424PIPELINE_CMD = ["python3" , "openevolve-run.py" ]
2525
26+
2627@register_model ("openevolve" )
2728class OpenEvolve (LM ):
2829 def __init__ (
@@ -42,9 +43,9 @@ def __init__(
4243 self .config_file = config_file
4344
4445 # folder must match prompt:template_dir in config.yml!
45- self .prompt_path = "scripts /lm_eval/prompts/system_message.txt"
46- self .evaluator_prompt_path = "scripts /lm_eval/prompts/evaluator_system_message.txt"
47- self .best_path = "scripts /lm_eval/openevolve_output/best/best_program.txt"
46+ self .prompt_path = "examples /lm_eval/prompts/system_message.txt"
47+ self .evaluator_prompt_path = "examples /lm_eval/prompts/evaluator_system_message.txt"
48+ self .best_path = "examples /lm_eval/openevolve_output/best/best_program.txt"
4849 self .base_system_message = "You are an expert task solver, with a lot of commonsense, math, language and coding knowledge.\n \n Consider this task:\n ```{prompt}´´´"
4950
5051 def generate (self , prompts : List [str ], max_gen_toks : int = None , stop = None , ** kwargs ):
@@ -133,22 +134,28 @@ def generate_until(self, requests: Iterable[Any], **kw) -> List[str]:
133134 cleaned .append (g )
134135 return cleaned
135136
137+
136138if __name__ == "__main__" :
137139 # cli arguments for primary model, secondary model, iterations, config and tasks
138140 p = argparse .ArgumentParser (
139141 description = "OpenEvolve <-> lm-evaluation-harness adapter." ,
140142 )
141- p .add_argument ("--config" , default = "scripts /lm_eval/config.yml" , help = "config file" )
143+ p .add_argument ("--config" , default = "examples /lm_eval/config.yml" , help = "config file" )
142144 p .add_argument (
143145 "--init_file" ,
144- default = "scripts /lm_eval/initial_content_stub.txt" ,
146+ default = "examples /lm_eval/initial_content_stub.txt" ,
145147 help = "initial content file" ,
146148 )
147149 p .add_argument (
148- "--evaluator_file" , default = "scripts /lm_eval/evaluator_stub.py" , help = "evaluator file"
150+ "--evaluator_file" , default = "examples /lm_eval/evaluator_stub.py" , help = "evaluator file"
149151 )
150152 p .add_argument ("--iterations" , default = 5 , type = int , help = "number of iterations" )
151- p .add_argument ("--limit" , default = None , type = int , help = "limit the number of examples per task that are executed" )
153+ p .add_argument (
154+ "--limit" ,
155+ default = None ,
156+ type = int ,
157+ help = "limit the number of examples per task that are executed" ,
158+ )
152159 # p.add_argument("--tasks", default="boolq,gsm8k,mmlu", help="comma-list of tasks to evaluate")
153160 p .add_argument ("--tasks" , default = "gsm8k" , help = "list of tasks to evaluate" )
154161 p .add_argument ("--output_path" , default = "results" , help = "output path for results" )
@@ -175,10 +182,12 @@ def generate_until(self, requests: Iterable[Any], **kw) -> List[str]:
175182 ).mkdir (exist_ok = True )
176183
177184 timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
178- results_path = pathlib .Path (os .path .join (
179- args .output_path ,
180- f"{ timestamp } _iter{ args .iterations } .json" ,
181- ))
185+ results_path = pathlib .Path (
186+ os .path .join (
187+ args .output_path ,
188+ f"{ timestamp } _iter{ args .iterations } .json" ,
189+ )
190+ )
182191
183192 with results_path .open ("w" ) as f :
184193 json .dump (results , f , indent = 2 )
@@ -189,7 +198,7 @@ def generate_until(self, requests: Iterable[Any], **kw) -> List[str]:
189198 # pick the first value that is a real number
190199 for key , val in metrics .items ():
191200 if isinstance (val , (int , float )):
192- short [task ] = (key , val ) # store *both* name & value
201+ short [task ] = (key , val ) # store *both* name & value
193202 break
194203
195204 print (f"Full results written to { results_path } \n " )
0 commit comments