Skip to content

Commit 37a6606

Browse files
committed
update
1 parent 3627246 commit 37a6606

File tree

3 files changed

+143
-0
lines changed

3 files changed

+143
-0
lines changed

rl3v2/extra_reading.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
=== PART 1 ===
2+
3+
ES (Evolution Strategies)
4+
"Evolution Strategies as a Scalable Alternative to Reinforcement Learning"
5+
https://arxiv.org/abs/1703.03864
6+
7+
Trust Region Evolution Strategies
8+
https://www.microsoft.com/en-us/research/uploads/prod/2018/11/trust-region-evolution-strategies.pdf
9+
10+
The CMA Evolution Strategy: A Tutorial
11+
https://arxiv.org/pdf/1604.00772
12+
13+
Simple random search provides a competitive approach to reinforcement learning (Augmented Random Search)
14+
https://arxiv.org/abs/1803.07055
15+
16+
=== PART 2 ===
17+
18+
DDPG (Deep Deterministic Policy Gradient)
19+
"Continuous control with deep reinforcement learning"
20+
https://arxiv.org/abs/1509.02971
21+
22+
Deterministic Policy Gradient Algorithms
23+
http://proceedings.mlr.press/v32/silver14.pdf

rl3v2/visualize_es.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
# Objective function to minimize (you can change this)
5+
def f(x, y):
6+
# return np.sin(x) + np.cos(y)
7+
return -((x - 1)**2 + y**2)
8+
9+
# Evolution Strategies optimizer (simple version)
10+
def evolution_strategies(
11+
f, bounds, pop_size=50, sigma=0.3, alpha=0.03, iterations=100
12+
):
13+
dim = 2
14+
mu = np.random.uniform(bounds[0], bounds[1], size=dim)
15+
16+
history = []
17+
18+
for gen in range(iterations):
19+
# Sample noise
20+
noise = np.random.randn(pop_size, dim)
21+
population = mu + sigma * noise
22+
fitness = np.array([f(x[0], x[1]) for x in population])
23+
24+
history.append((population.copy(), mu.copy()))
25+
26+
# Normalize fitness for weighting
27+
fitness_norm = (fitness - np.mean(fitness)) / (np.std(fitness) + 1e-8)
28+
mu += alpha / (pop_size * sigma) * np.dot(noise.T, fitness_norm)
29+
30+
return history
31+
32+
# Visualization function
33+
def visualize_es(history, bounds, f, resolution=100):
34+
x = np.linspace(bounds[0], bounds[1], resolution)
35+
y = np.linspace(bounds[0], bounds[1], resolution)
36+
X, Y = np.meshgrid(x, y)
37+
Z = f(X, Y)
38+
39+
plt.figure(figsize=(8, 6))
40+
for i, (pop, mu) in enumerate(history):
41+
plt.clf()
42+
plt.contourf(X, Y, Z, levels=50, cmap='viridis')
43+
plt.colorbar(label="f(x, y)")
44+
plt.scatter(pop[:, 0], pop[:, 1], c='white', s=20, label='Population')
45+
plt.scatter(mu[0], mu[1], c='red', s=80, label='Mean', edgecolors='black')
46+
plt.title(f"Evolution Strategies - Step {i+1}")
47+
plt.xlim(bounds[0], bounds[1])
48+
plt.ylim(bounds[0], bounds[1])
49+
plt.xlabel('x')
50+
plt.ylabel('y')
51+
plt.legend()
52+
# plt.pause(0.1)
53+
plt.waitforbuttonpress()
54+
plt.show()
55+
56+
# Run
57+
bounds = (-5, 5)
58+
history = evolution_strategies(f, bounds, iterations=80)
59+
visualize_es(history, bounds, f)

rl3v2/visualize_hill_climbing.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
# Objective function to minimize (you can change this)
5+
def f(x, y):
6+
# return np.sin(x) + np.cos(y)
7+
return -((x - 1)**2 + y**2)
8+
9+
# Evolution Strategies optimizer (simple version)
10+
def hill_climb(
11+
f, bounds, pop_size=1, sigma=0.3, alpha=0.3, iterations=100
12+
):
13+
dim = 2
14+
mu = np.random.uniform(bounds[0], bounds[1], size=dim)
15+
16+
history = []
17+
best_f = f(mu)
18+
19+
for gen in range(iterations):
20+
# Sample noise
21+
noise = np.random.randn(pop_size, dim)
22+
population = mu + sigma * noise
23+
fitness = np.array([f(x[0], x[1]) for x in population])
24+
25+
history.append((population.copy(), mu.copy()))
26+
27+
# Update point if it's better
28+
if fitness[0] > best_f:
29+
best_f = fitness[0]
30+
mu = population.flatten()
31+
32+
return history
33+
34+
# Visualization function
35+
def visualize_es(history, bounds, f, resolution=100):
36+
x = np.linspace(bounds[0], bounds[1], resolution)
37+
y = np.linspace(bounds[0], bounds[1], resolution)
38+
X, Y = np.meshgrid(x, y)
39+
Z = f(X, Y)
40+
41+
plt.figure(figsize=(8, 6))
42+
for i, (pop, mu) in enumerate(history):
43+
plt.clf()
44+
plt.contourf(X, Y, Z, levels=50, cmap='viridis')
45+
plt.colorbar(label="f(x, y)")
46+
plt.scatter(pop[:, 0], pop[:, 1], c='white', s=20, label='Population')
47+
plt.scatter(mu[0], mu[1], c='red', s=80, label='Mean', edgecolors='black')
48+
plt.title(f"Hill Climbing - Step {i+1}")
49+
plt.xlim(bounds[0], bounds[1])
50+
plt.ylim(bounds[0], bounds[1])
51+
plt.xlabel('x')
52+
plt.ylabel('y')
53+
plt.legend()
54+
# plt.pause(0.1)
55+
plt.waitforbuttonpress()
56+
plt.show()
57+
58+
# Run
59+
bounds = (-5, 5)
60+
history = hill_climb(f, bounds, iterations=80)
61+
visualize_es(history, bounds, f)

0 commit comments

Comments
 (0)