Skip to content

Commit bda9d8c

Browse files
Genetic algorithm for Knapsack
1 parent a71618f commit bda9d8c

File tree

1 file changed

+168
-0
lines changed

1 file changed

+168
-0
lines changed

genetic_algorithm/knapsack.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?"""
2+
3+
import random
4+
from dataclasses import dataclass
5+
6+
random.seed(42)
7+
8+
# =========================== Problem setup: Knapsack ===========================
9+
10+
KNAPSACK_N_ITEMS = 42 # Number of items in the knapsack problem
11+
KNAPSACK_VALUE_RANGE = (10, 100) # Range of item values
12+
KNAPSACK_WEIGHT_RANGE = (5, 50) # Range of item weights
13+
KNAPSACK_CAPACITY_RATIO = 0.5 # Capacity as a fraction of total weight
14+
15+
@dataclass
16+
class Item:
17+
value: int
18+
weight: int
19+
20+
def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]:
21+
"""Generates a random knapsack problem instance."""
22+
items = []
23+
for _ in range(n_items):
24+
value = random.randint(*value_range)
25+
weight = random.randint(*weight_range)
26+
items.append(Item(value=value, weight=weight))
27+
# We set capacity as a fraction of total weight
28+
capacity = int(sum(it.weight for it in items) * capacity_ratio)
29+
return items, capacity
30+
31+
items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO)
32+
33+
34+
35+
# ============================== GA Representation ==============================
36+
37+
# HYPERPARAMETERS (For tuning the GA)
38+
39+
POPULATION_SIZE = 120
40+
GENERATIONS = 200
41+
CROSSOVER_PROBABILITY = 0.9
42+
MUTATION_PROBABILITY = 0.01
43+
TOURNAMENT_K = 3
44+
ELITISM = 2
45+
46+
OVERWEIGHT_PENALTY_FACTOR = 10
47+
48+
Genome = list[int] # An index list where 1 means item is included, 0 means excluded
49+
50+
def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
51+
"""Evaluation function - calculates the fitness of each candidate based on total value and weight."""
52+
total_value = 0
53+
total_weight = 0
54+
for gene, item in zip(genome, items):
55+
if gene:
56+
total_value += item.value
57+
total_weight += item.weight
58+
if total_weight > capacity:
59+
# Penalize overweight solutions: return small value scaled by overflow
60+
overflow = (total_weight - capacity)
61+
total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
62+
return total_value, total_weight
63+
64+
def random_genome(n: int) -> Genome:
65+
"""Generates a random genome of length n."""
66+
return [random.randint(0,1) for _ in range(n)]
67+
68+
def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
69+
"""Performs tournament selection to choose genomes from the population.
70+
Note that other selection strategies exist such as roulette wheel, rank-based, etc.
71+
"""
72+
contenders = random.sample(list(zip(population, fitnesses)), k)
73+
get_fitness = lambda x: x[1]
74+
return max(contenders, key=get_fitness)[0][:]
75+
76+
def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
77+
"""Performs single-point crossover between two genomes.
78+
Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
79+
min_length = min(len(a), len(b))
80+
if random.random() > p_crossover or min_length < 2:
81+
return a[:], b[:]
82+
cutoff_point = random.randint(1, min_length - 1)
83+
return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:]
84+
85+
def mutation(g: Genome, p_mutation: int) -> Genome:
86+
"""Performs bit-flip mutation on a genome.
87+
Note that other mutation strategies exist such as swap mutation, scramble mutation, etc.
88+
"""
89+
return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
90+
91+
def run_ga(
92+
items: list[Item],
93+
capacity: int,
94+
pop_size=POPULATION_SIZE,
95+
generations=GENERATIONS,
96+
p_crossover=CROSSOVER_PROBABILITY,
97+
p_mutation=MUTATION_PROBABILITY,
98+
tournament_k=TOURNAMENT_K,
99+
elitism=ELITISM,
100+
):
101+
"""Runs the genetic algorithm to solve the knapsack problem."""
102+
n = len(items)
103+
population = [random_genome(n) for _ in range(pop_size)]
104+
best_history = [] # track best fitness per generation
105+
avg_history = []
106+
best_overall = None
107+
best_fit_overall = -1
108+
109+
for _ in range(generations):
110+
fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
111+
best_fit = max(fitnesses)
112+
best_idx = fitnesses.index(best_fit)
113+
best_history.append(best_fit)
114+
avg_fit = sum(fitnesses) / pop_size
115+
avg_history.append(avg_fit)
116+
117+
if best_fit > best_fit_overall:
118+
best_fit_overall = best_fit
119+
best_overall = population[best_idx][:]
120+
121+
# Elitism
122+
get_fitness = lambda i: fitnesses[i]
123+
elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices
124+
elites = [population[i][:] for i in elite_indices] # Make nepo babies
125+
126+
# New generation
127+
new_pop = elites[:]
128+
while len(new_pop) < pop_size:
129+
parent1 = selection(population, fitnesses, k=tournament_k)
130+
parent2 = selection(population, fitnesses, k=tournament_k)
131+
child1, child2 = crossover(parent1, parent2, p_crossover)
132+
child1 = mutation(child1, p_mutation)
133+
child2 = mutation(child2, p_mutation)
134+
new_pop.extend([child1, child2])
135+
population = new_pop[:pop_size]
136+
137+
# Final evaluation of the best
138+
best_value, best_weight = evaluate(best_overall, items, capacity)
139+
return {
140+
"best_genome": best_overall,
141+
"best_value": best_value,
142+
"best_weight": best_weight,
143+
"capacity": capacity,
144+
"best_history": best_history,
145+
"avg_history": avg_history,
146+
}
147+
148+
result = run_ga(items, capacity)
149+
150+
best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
151+
152+
print(f"Knapsack capacity: {result["capacity"]}")
153+
print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}")
154+
155+
# print("Items included in the best solution:", best_items)
156+
157+
# import matplotlib.pyplot as plt
158+
159+
# # Plot fitness curves
160+
# plt.figure()
161+
# plt.plot(result["best_history"], label="Best fitness")
162+
# plt.plot(result["avg_history"], label="Average fitness")
163+
# plt.title("GA on Knapsack: Fitness over Generations")
164+
# plt.xlabel("Generation")
165+
# plt.ylabel("Fitness")
166+
# plt.legend()
167+
# plt.tight_layout()
168+
# plt.show()

0 commit comments

Comments
 (0)