|
| 1 | +"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?""" |
| 2 | + |
| 3 | +import random |
| 4 | +from dataclasses import dataclass |
| 5 | + |
| 6 | +random.seed(42) |
| 7 | + |
| 8 | +# =========================== Problem setup: Knapsack =========================== |
| 9 | + |
| 10 | +KNAPSACK_N_ITEMS = 42 # Number of items in the knapsack problem |
| 11 | +KNAPSACK_VALUE_RANGE = (10, 100) # Range of item values |
| 12 | +KNAPSACK_WEIGHT_RANGE = (5, 50) # Range of item weights |
| 13 | +KNAPSACK_CAPACITY_RATIO = 0.5 # Capacity as a fraction of total weight |
| 14 | + |
| 15 | +@dataclass |
| 16 | +class Item: |
| 17 | + value: int |
| 18 | + weight: int |
| 19 | + |
| 20 | +def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]: |
| 21 | + """Generates a random knapsack problem instance.""" |
| 22 | + items = [] |
| 23 | + for _ in range(n_items): |
| 24 | + value = random.randint(*value_range) |
| 25 | + weight = random.randint(*weight_range) |
| 26 | + items.append(Item(value=value, weight=weight)) |
| 27 | + # We set capacity as a fraction of total weight |
| 28 | + capacity = int(sum(it.weight for it in items) * capacity_ratio) |
| 29 | + return items, capacity |
| 30 | + |
| 31 | +items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO) |
| 32 | + |
| 33 | + |
| 34 | + |
| 35 | +# ============================== GA Representation ============================== |
| 36 | + |
| 37 | +# HYPERPARAMETERS (For tuning the GA) |
| 38 | + |
| 39 | +POPULATION_SIZE = 120 |
| 40 | +GENERATIONS = 200 |
| 41 | +CROSSOVER_PROBABILITY = 0.9 |
| 42 | +MUTATION_PROBABILITY = 0.01 |
| 43 | +TOURNAMENT_K = 3 |
| 44 | +ELITISM = 2 |
| 45 | + |
| 46 | +OVERWEIGHT_PENALTY_FACTOR = 10 |
| 47 | + |
| 48 | +Genome = list[int] # An index list where 1 means item is included, 0 means excluded |
| 49 | + |
| 50 | +def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]: |
| 51 | + """Evaluation function - calculates the fitness of each candidate based on total value and weight.""" |
| 52 | + total_value = 0 |
| 53 | + total_weight = 0 |
| 54 | + for gene, item in zip(genome, items): |
| 55 | + if gene: |
| 56 | + total_value += item.value |
| 57 | + total_weight += item.weight |
| 58 | + if total_weight > capacity: |
| 59 | + # Penalize overweight solutions: return small value scaled by overflow |
| 60 | + overflow = (total_weight - capacity) |
| 61 | + total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR) |
| 62 | + return total_value, total_weight |
| 63 | + |
| 64 | +def random_genome(n: int) -> Genome: |
| 65 | + """Generates a random genome of length n.""" |
| 66 | + return [random.randint(0,1) for _ in range(n)] |
| 67 | + |
| 68 | +def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome: |
| 69 | + """Performs tournament selection to choose genomes from the population. |
| 70 | + Note that other selection strategies exist such as roulette wheel, rank-based, etc. |
| 71 | + """ |
| 72 | + contenders = random.sample(list(zip(population, fitnesses)), k) |
| 73 | + get_fitness = lambda x: x[1] |
| 74 | + return max(contenders, key=get_fitness)[0][:] |
| 75 | + |
| 76 | +def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]: |
| 77 | + """Performs single-point crossover between two genomes. |
| 78 | + Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc.""" |
| 79 | + min_length = min(len(a), len(b)) |
| 80 | + if random.random() > p_crossover or min_length < 2: |
| 81 | + return a[:], b[:] |
| 82 | + cutoff_point = random.randint(1, min_length - 1) |
| 83 | + return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:] |
| 84 | + |
| 85 | +def mutation(g: Genome, p_mutation: int) -> Genome: |
| 86 | + """Performs bit-flip mutation on a genome. |
| 87 | + Note that other mutation strategies exist such as swap mutation, scramble mutation, etc. |
| 88 | + """ |
| 89 | + return [(1 - gene) if random.random() < p_mutation else gene for gene in g] |
| 90 | + |
| 91 | +def run_ga( |
| 92 | + items: list[Item], |
| 93 | + capacity: int, |
| 94 | + pop_size=POPULATION_SIZE, |
| 95 | + generations=GENERATIONS, |
| 96 | + p_crossover=CROSSOVER_PROBABILITY, |
| 97 | + p_mutation=MUTATION_PROBABILITY, |
| 98 | + tournament_k=TOURNAMENT_K, |
| 99 | + elitism=ELITISM, |
| 100 | +): |
| 101 | + """Runs the genetic algorithm to solve the knapsack problem.""" |
| 102 | + n = len(items) |
| 103 | + population = [random_genome(n) for _ in range(pop_size)] |
| 104 | + best_history = [] # track best fitness per generation |
| 105 | + avg_history = [] |
| 106 | + best_overall = None |
| 107 | + best_fit_overall = -1 |
| 108 | + |
| 109 | + for _ in range(generations): |
| 110 | + fitnesses = [evaluate(genome, items, capacity)[0] for genome in population] |
| 111 | + best_fit = max(fitnesses) |
| 112 | + best_idx = fitnesses.index(best_fit) |
| 113 | + best_history.append(best_fit) |
| 114 | + avg_fit = sum(fitnesses) / pop_size |
| 115 | + avg_history.append(avg_fit) |
| 116 | + |
| 117 | + if best_fit > best_fit_overall: |
| 118 | + best_fit_overall = best_fit |
| 119 | + best_overall = population[best_idx][:] |
| 120 | + |
| 121 | + # Elitism |
| 122 | + get_fitness = lambda i: fitnesses[i] |
| 123 | + elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices |
| 124 | + elites = [population[i][:] for i in elite_indices] # Make nepo babies |
| 125 | + |
| 126 | + # New generation |
| 127 | + new_pop = elites[:] |
| 128 | + while len(new_pop) < pop_size: |
| 129 | + parent1 = selection(population, fitnesses, k=tournament_k) |
| 130 | + parent2 = selection(population, fitnesses, k=tournament_k) |
| 131 | + child1, child2 = crossover(parent1, parent2, p_crossover) |
| 132 | + child1 = mutation(child1, p_mutation) |
| 133 | + child2 = mutation(child2, p_mutation) |
| 134 | + new_pop.extend([child1, child2]) |
| 135 | + population = new_pop[:pop_size] |
| 136 | + |
| 137 | + # Final evaluation of the best |
| 138 | + best_value, best_weight = evaluate(best_overall, items, capacity) |
| 139 | + return { |
| 140 | + "best_genome": best_overall, |
| 141 | + "best_value": best_value, |
| 142 | + "best_weight": best_weight, |
| 143 | + "capacity": capacity, |
| 144 | + "best_history": best_history, |
| 145 | + "avg_history": avg_history, |
| 146 | + } |
| 147 | + |
| 148 | +result = run_ga(items, capacity) |
| 149 | + |
| 150 | +best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1] |
| 151 | + |
| 152 | +print(f"Knapsack capacity: {result["capacity"]}") |
| 153 | +print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}") |
| 154 | + |
| 155 | +# print("Items included in the best solution:", best_items) |
| 156 | + |
| 157 | +# import matplotlib.pyplot as plt |
| 158 | + |
| 159 | +# # Plot fitness curves |
| 160 | +# plt.figure() |
| 161 | +# plt.plot(result["best_history"], label="Best fitness") |
| 162 | +# plt.plot(result["avg_history"], label="Average fitness") |
| 163 | +# plt.title("GA on Knapsack: Fitness over Generations") |
| 164 | +# plt.xlabel("Generation") |
| 165 | +# plt.ylabel("Fitness") |
| 166 | +# plt.legend() |
| 167 | +# plt.tight_layout() |
| 168 | +# plt.show() |
0 commit comments