Skip to content

Commit ae844dd

Browse files
committed
Improved GP fitness to use NRMSE
1 parent bb2527b commit ae844dd

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

causal_testing/estimation/genetic_programming_regression_fitter.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def __init__(
149149
)
150150
self.sympy_conversions[name] = conversion
151151

152-
print(self.pset.mapping)
153152
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
154153
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
155154

@@ -235,6 +234,8 @@ def simplify(self, expression: gp.PrimitiveTree) -> sympy.core.Expr:
235234
236235
:return: The simplified expression as a sympy Expr object.
237236
"""
237+
if isinstance(expression, str):
238+
expression = creator.Individual(gp.PrimitiveTree.from_string(expression, self.pset))
238239
return sympy.simplify(self._stringify_for_sympy(expression))
239240

240241
def repair(self, expression: gp.PrimitiveTree) -> gp.PrimitiveTree:
@@ -278,16 +279,23 @@ def fitness(self, expression: gp.PrimitiveTree) -> float:
278279
"""
279280
old_settings = np.seterr(all="raise")
280281
try:
282+
if isinstance(expression, str):
283+
expression = creator.Individual(gp.PrimitiveTree.from_string(expression, self.pset))
284+
281285
# Create model, fit (run) it, give estimates from it]
282286
func = gp.compile(expression, self.pset)
283-
y_estimates = pd.Series([func(**x) for _, x in self.df[self.features].iterrows()])
287+
y_estimates = pd.Series([func(**x) for _, x in self.df[self.features].iterrows()], index=self.df.index)
284288

285-
# Calc errors using an improved normalised mean squared
289+
# Calculate errors using root mean square error
286290
sqerrors = (self.df[self.outcome] - y_estimates) ** 2
287-
mean_squared = sqerrors.sum() / len(self.df)
288-
nmse = mean_squared / (self.df[self.outcome].sum() / len(self.df))
291+
nrmse = np.sqrt(sqerrors.sum()/len(self.df))/(self.df[self.outcome].max() - self.df[self.outcome].min())
292+
293+
294+
if pd.isnull(nrmse) or nrmse.real != nrmse:
295+
return (float("inf"),)
296+
assert nrmse > 0, f"NRMSE {nrmse} should be greater than zero"
289297

290-
return (nmse,)
298+
return (nrmse,)
291299

292300
# Fitness value of infinite if error - not return 1
293301
except (
@@ -321,7 +329,7 @@ def make_offspring(self, population: list, num_offspring: int) -> list:
321329
offspring.append(child)
322330
return offspring
323331

324-
def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds: list = None) -> gp.PrimitiveTree:
332+
def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds: list = None, repair=True) -> gp.PrimitiveTree:
325333
"""
326334
Execute Genetic Programming to find the best expression using a mu+lambda algorithm.
327335
@@ -332,7 +340,9 @@ def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds:
332340
333341
:return: The best candididate expression.
334342
"""
335-
population = [self.toolbox.repair(ind) for ind in self.toolbox.population(n=pop_size)]
343+
population = self.toolbox.population(n=pop_size)
344+
if repair:
345+
population = [self.toolbox.repair(ind) for ind in population]
336346
if seeds is not None:
337347
for seed in seeds:
338348
ind = creator.Individual(gp.PrimitiveTree.from_string(seed, self.pset))
@@ -348,7 +358,8 @@ def run_gp(self, ngen: int, pop_size: int = 20, num_offspring: int = 10, seeds:
348358
for _ in range(1, ngen + 1):
349359
# Vary the population
350360
offspring = self.make_offspring(population, num_offspring)
351-
offspring = [self.toolbox.repair(ind) for ind in offspring]
361+
if repair:
362+
offspring = [self.toolbox.repair(ind) for ind in offspring]
352363

353364
# Evaluate the individuals with an invalid fitness
354365
for ind in offspring:

0 commit comments

Comments
 (0)