@@ -149,7 +149,6 @@ def __init__(
149
149
)
150
150
self .sympy_conversions [name ] = conversion
151
151
152
- print (self .pset .mapping )
153
152
creator .create ("FitnessMin" , base .Fitness , weights = (- 1.0 ,))
154
153
creator .create ("Individual" , gp .PrimitiveTree , fitness = creator .FitnessMin )
155
154
@@ -235,6 +234,8 @@ def simplify(self, expression: gp.PrimitiveTree) -> sympy.core.Expr:
235
234
236
235
:return: The simplified expression as a sympy Expr object.
237
236
"""
237
+ if isinstance (expression , str ):
238
+ expression = creator .Individual (gp .PrimitiveTree .from_string (expression , self .pset ))
238
239
return sympy .simplify (self ._stringify_for_sympy (expression ))
239
240
240
241
def repair (self , expression : gp .PrimitiveTree ) -> gp .PrimitiveTree :
@@ -248,7 +249,7 @@ def repair(self, expression: gp.PrimitiveTree) -> gp.PrimitiveTree:
248
249
"""
249
250
eq = f"{ self .outcome } ~ { ' + ' .join (str (x ) for x in self .split (expression ))} "
250
251
try :
251
- # Create model, fit (run) it, give estimates from it]
252
+ # Create model, fit (run) it, give estimates from it
252
253
model = smf .ols (eq , self .df )
253
254
res = model .fit ()
254
255
@@ -278,16 +279,25 @@ def fitness(self, expression: gp.PrimitiveTree) -> float:
278
279
"""
279
280
old_settings = np .seterr (all = "raise" )
280
281
try :
281
- # Create model, fit (run) it, give estimates from it]
282
+ if isinstance (expression , str ):
283
+ expression = creator .Individual (gp .PrimitiveTree .from_string (expression , self .pset ))
284
+
285
+ # Create model, fit (run) it, give estimates from it
282
286
func = gp .compile (expression , self .pset )
283
- y_estimates = pd .Series ([func (** x ) for _ , x in self .df [self .features ].iterrows ()])
287
+ y_estimates = pd .Series (
288
+ [func (** x ) for _ , x in self .df [self .features ].iterrows ()],
289
+ index = self .df .index ,
290
+ )
284
291
285
- # Calc errors using an improved normalised mean squared
292
+ # Calculate errors using the normalised root mean square error (nrmse),
293
+ # which is normalised with respect to the range
286
294
sqerrors = (self .df [self .outcome ] - y_estimates ) ** 2
287
- mean_squared = sqerrors .sum () / len (self .df )
288
- nmse = mean_squared / (self .df [self .outcome ].sum () / len (self .df ))
295
+ nrmse = np .sqrt (sqerrors .sum () / len (self .df )) / (self .df [self .outcome ].max () - self .df [self .outcome ].min ())
296
+
297
+ if pd .isnull (nrmse ) or nrmse .real != nrmse :
298
+ return (float ("inf" ),)
289
299
290
- return (nmse ,)
300
+ return (nrmse ,)
291
301
292
302
# Fitness value of infinite if error - not return 1
293
303
except (
@@ -321,18 +331,29 @@ def make_offspring(self, population: list, num_offspring: int) -> list:
321
331
offspring .append (child )
322
332
return offspring
323
333
324
- def run_gp (self , ngen : int , pop_size : int = 20 , num_offspring : int = 10 , seeds : list = None ) -> gp .PrimitiveTree :
334
+ # pylint: disable=too-many-arguments
335
+ def run_gp (
336
+ self ,
337
+ ngen : int ,
338
+ pop_size : int = 20 ,
339
+ num_offspring : int = 10 ,
340
+ seeds : list = None ,
341
+ repair : bool = True ,
342
+ ) -> gp .PrimitiveTree :
325
343
"""
326
344
Execute Genetic Programming to find the best expression using a mu+lambda algorithm.
327
345
328
346
:param ngen: The maximum number of generations.
329
347
:param pop_size: The population size.
330
348
:param num_offspring: The number of new individuals per generation.
331
349
:param seeds: Seed individuals for the initial population.
350
+ :param repair: Whether to run the linear regression repair operator (defaults to True).
332
351
333
352
:return: The best candididate expression.
334
353
"""
335
- population = [self .toolbox .repair (ind ) for ind in self .toolbox .population (n = pop_size )]
354
+ population = self .toolbox .population (n = pop_size )
355
+ if repair :
356
+ population = [self .toolbox .repair (ind ) for ind in population ]
336
357
if seeds is not None :
337
358
for seed in seeds :
338
359
ind = creator .Individual (gp .PrimitiveTree .from_string (seed , self .pset ))
@@ -348,7 +369,8 @@ def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds:
348
369
for _ in range (1 , ngen + 1 ):
349
370
# Vary the population
350
371
offspring = self .make_offspring (population , num_offspring )
351
- offspring = [self .toolbox .repair (ind ) for ind in offspring ]
372
+ if repair :
373
+ offspring = [self .toolbox .repair (ind ) for ind in offspring ]
352
374
353
375
# Evaluate the individuals with an invalid fitness
354
376
for ind in offspring :
0 commit comments