@@ -149,7 +149,6 @@ def __init__(
149
149
)
150
150
self .sympy_conversions [name ] = conversion
151
151
152
- print (self .pset .mapping )
153
152
creator .create ("FitnessMin" , base .Fitness , weights = (- 1.0 ,))
154
153
creator .create ("Individual" , gp .PrimitiveTree , fitness = creator .FitnessMin )
155
154
@@ -235,6 +234,8 @@ def simplify(self, expression: gp.PrimitiveTree) -> sympy.core.Expr:
235
234
236
235
:return: The simplified expression as a sympy Expr object.
237
236
"""
237
+ if isinstance (expression , str ):
238
+ expression = creator .Individual (gp .PrimitiveTree .from_string (expression , self .pset ))
238
239
return sympy .simplify (self ._stringify_for_sympy (expression ))
239
240
240
241
def repair (self , expression : gp .PrimitiveTree ) -> gp .PrimitiveTree :
@@ -278,16 +279,23 @@ def fitness(self, expression: gp.PrimitiveTree) -> float:
278
279
"""
279
280
old_settings = np .seterr (all = "raise" )
280
281
try :
282
+ if isinstance (expression , str ):
283
+ expression = creator .Individual (gp .PrimitiveTree .from_string (expression , self .pset ))
284
+
281
285
# Create model, fit (run) it, give estimates from it]
282
286
func = gp .compile (expression , self .pset )
283
- y_estimates = pd .Series ([func (** x ) for _ , x in self .df [self .features ].iterrows ()])
287
+ y_estimates = pd .Series ([func (** x ) for _ , x in self .df [self .features ].iterrows ()], index = self . df . index )
284
288
285
- # Calc errors using an improved normalised mean squared
289
+ # Calculate errors using root mean square error
286
290
sqerrors = (self .df [self .outcome ] - y_estimates ) ** 2
287
- mean_squared = sqerrors .sum () / len (self .df )
288
- nmse = mean_squared / (self .df [self .outcome ].sum () / len (self .df ))
291
+ nrmse = np .sqrt (sqerrors .sum ()/ len (self .df ))/ (self .df [self .outcome ].max () - self .df [self .outcome ].min ())
292
+
293
+
294
+ if pd .isnull (nrmse ) or nrmse .real != nrmse :
295
+ return (float ("inf" ),)
296
+ assert nrmse > 0 , f"NRMSE { nrmse } should be greater than zero"
289
297
290
- return (nmse ,)
298
+ return (nrmse ,)
291
299
292
300
# Fitness value of infinite if error - not return 1
293
301
except (
@@ -321,7 +329,7 @@ def make_offspring(self, population: list, num_offspring: int) -> list:
321
329
offspring .append (child )
322
330
return offspring
323
331
324
- def run_gp (self , ngen : int , pop_size : int = 20 , num_offspring : int = 10 , seeds : list = None ) -> gp .PrimitiveTree :
332
+ def run_gp (self , ngen : int , pop_size : int = 20 , num_offspring : int = 10 , seeds : list = None , repair = True ) -> gp .PrimitiveTree :
325
333
"""
326
334
Execute Genetic Programming to find the best expression using a mu+lambda algorithm.
327
335
@@ -332,7 +340,9 @@ def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds:
332
340
333
341
:return: The best candididate expression.
334
342
"""
335
- population = [self .toolbox .repair (ind ) for ind in self .toolbox .population (n = pop_size )]
343
+ population = self .toolbox .population (n = pop_size )
344
+ if repair :
345
+ population = [self .toolbox .repair (ind ) for ind in population ]
336
346
if seeds is not None :
337
347
for seed in seeds :
338
348
ind = creator .Individual (gp .PrimitiveTree .from_string (seed , self .pset ))
@@ -348,7 +358,8 @@ def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds:
348
358
for _ in range (1 , ngen + 1 ):
349
359
# Vary the population
350
360
offspring = self .make_offspring (population , num_offspring )
351
- offspring = [self .toolbox .repair (ind ) for ind in offspring ]
361
+ if repair :
362
+ offspring = [self .toolbox .repair (ind ) for ind in offspring ]
352
363
353
364
# Evaluate the individuals with an invalid fitness
354
365
for ind in offspring :
0 commit comments