Skip to content

Commit 9b50fd5

Browse files
committed
first impl of MARL
1 parent 594c195 commit 9b50fd5

File tree

11 files changed

+1658
-80
lines changed

11 files changed

+1658
-80
lines changed

robot_nav/models/CNNTD3/CNNTD3.py

Lines changed: 311 additions & 32 deletions
Large diffs are not rendered by default.

robot_nav/models/CNNTD3/att.py

Lines changed: 614 additions & 0 deletions
Large diffs are not rendered by default.

robot_nav/models/TD3/TD3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action
445445
ang_vel = (action[1] + 1) / 2
446446
state = min_values + [distance, cos, sin] + [lin_vel, ang_vel]
447447

448-
assert len(state) == self.state_dim
448+
assert len(state) == self.state_dim, f"{len(state), self.state_dim}"
449449
terminal = 1 if collision or goal else 0
450450

451451
return state, terminal

robot_nav/multi_robot_world.yaml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
world:
2+
height: 12 # the height of the world
3+
width: 12 # the height of the world
4+
step_time: 0.3 # 10Hz calculate each step
5+
sample_time: 0.3 # 10 Hz for render and data extraction
6+
collision_mode: 'reactive'
7+
8+
robot:
9+
- number: 3
10+
kinematics: {name: 'diff'}
11+
distribution: {name: 'circle', radius: 4, center: [6, 6]}
12+
shape: {name: 'circle', radius: 0.2}
13+
vel_min: [ 0, -1.0 ]
14+
vel_max: [ 1.0, 1.0 ]
15+
state: [2, 2, 0, 0]
16+
goal: [9, 9, 0]
17+
color: ['royalblue', 'red', 'green', 'orange', 'purple']
18+
arrive_mode: position
19+
goal_threshold: 0.3
20+
21+
sensors:
22+
- type: 'lidar2d'
23+
range_min: 0
24+
range_max: 7
25+
angle_range: 3.14
26+
number: 90
27+
noise: True
28+
std: 0.08
29+
angle_std: 0.1
30+
offset: [ 0, 0, 0 ]
31+
alpha: 0.3
32+
33+
plot:
34+
show_trajectory: False
35+
show_sensor: False
36+
37+
obstacle:
38+
- number: 4
39+
kinematics: {name: 'static'}
40+
distribution: {name: 'random', range_low: [0, 0, -3.14], range_high: [12, 12, 3.14]}
41+
behavior: {name: 'rvo', wander: True, range_low: [0, 0, -3.14], range_high: [12, 12, 3.14], vxmax: 0.2, vymax: 0.2, factor: 1.0}
42+
vel_max: [0.2, 0.2]
43+
vel_min: [-0.2, -0.2]
44+
shape:
45+
- {name: 'circle', radius: 1.0, random_shape: True}
46+
- {name: 'polygon', random_shape: true, avg_radius_range: [0.5, 1.0], irregularity_range: [0, 0.4], spikeyness_range: [0, 0.4], num_vertices_range: [4, 6]}
47+
- shape: {name: 'circle', radius: 0.8} # length, width
48+
state: [ 5, 10, 1 ]
49+
kinematics: { name: 'static' }
50+
- shape: { name: 'rectangle', length: 1.0, width: 1.2 } # length, width
51+
state: [ 8, 5, 1 ]
52+
kinematics: {name: 'static'}
53+
- shape: { name: 'rectangle', length: 0.5, width: 2.1 } # length, width
54+
state: [ 1, 8, 1.3 ]
55+
kinematics: {name: 'static'}
56+
- shape: { name: 'linestring', vertices: [ [ 0, 0 ], [ 12, 0 ], [ 12, 12 ], [ 0, 12 ],[ 0, 0 ] ] } # vertices
57+
kinematics: {name: 'static'}
58+
state: [ 0, 0, 0 ]

robot_nav/multi_robot_world2.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
world:
2+
height: 12 # the height of the world
3+
width: 12 # the height of the world
4+
step_time: 0.3 # 10Hz calculate each step
5+
sample_time: 0.3 # 10 Hz for render and data extraction
6+
collision_mode: 'reactive'
7+
8+
robot:
9+
- number: 5
10+
kinematics: {name: 'diff'}
11+
distribution: {name: 'circle', radius: 4, center: [6, 6]}
12+
shape: {name: 'circle', radius: 0.2}
13+
vel_min: [ 0, -1.0 ]
14+
vel_max: [ 1.0, 1.0 ]
15+
state: [2, 2, 0, 0]
16+
goal: [9, 9, 0]
17+
color: ['royalblue', 'red', 'green', 'orange', 'purple', 'yellow', 'cyan', 'magenta', 'lime', 'pink', 'brown']
18+
arrive_mode: position
19+
goal_threshold: 0.3
20+
21+
plot:
22+
show_trajectory: False
23+
24+
#obstacle:
25+
# - shape: { name: 'linestring', vertices: [ [ 0, 0 ], [ 12, 0 ], [ 12, 12 ], [ 0, 12 ],[ 0, 0 ] ] } # vertices
26+
# kinematics: {name: 'static'}
27+
# state: [ 0, 0, 0 ]

robot_nav/multi_train.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from robot_nav.models.TD3.TD3 import TD3
2+
from robot_nav.models.DDPG.DDPG import DDPG
3+
from robot_nav.models.SAC.SAC import SAC
4+
from robot_nav.models.HCM.hardcoded_model import HCM
5+
from robot_nav.models.PPO.PPO import PPO
6+
from robot_nav.models.CNNTD3.CNNTD3 import CNNTD3
7+
8+
import torch
9+
import numpy as np
10+
from sim import SIM_ENV
11+
from utils import get_buffer
12+
13+
14+
def main(args=None):
15+
"""Main training function"""
16+
action_dim = 2 # number of actions produced by the model
17+
max_action = 1 # maximum absolute value of output actions
18+
state_dim = 97 # number of input values in the neural network (vector length of state input)
19+
device = torch.device(
20+
"cuda" if torch.cuda.is_available() else "cpu"
21+
) # using cuda if it is available, cpu otherwise
22+
nr_eval_episodes = 10 # how many episodes to use to run evaluation
23+
max_epochs = 60 # max number of epochs
24+
epoch = 0 # starting epoch number
25+
episodes_per_epoch = 70 # how many episodes to run in single epoch
26+
episode = 0 # starting episode number
27+
train_every_n = 2 # train and update network parameters every n episodes
28+
training_iterations = 80 # how many batches to use for single training cycle
29+
batch_size = 12 # batch size for each training iteration
30+
max_steps = 300 # maximum number of steps in single episode
31+
steps = 0 # starting step number
32+
load_saved_buffer = False # whether to load experiences from assets/data.yml
33+
pretrain = False # whether to use the loaded experiences to pre-train the model (load_saved_buffer must be True)
34+
pretraining_iterations = (
35+
10 # number of training iterations to run during pre-training
36+
)
37+
save_every = 5 # save the model every n training cycles
38+
39+
40+
41+
sim = SIM_ENV(world_file="multi_robot_world.yaml",disable_plotting=False) # instantiate environment
42+
43+
model = CNNTD3(
44+
state_dim=state_dim,
45+
action_dim=action_dim,
46+
max_action=max_action,
47+
num_robots=sim.num_robots,
48+
device=device,
49+
save_every=save_every,
50+
load_model=False,
51+
model_name="CNNTD3",
52+
) # instantiate a model
53+
54+
replay_buffer = get_buffer(
55+
model,
56+
sim,
57+
load_saved_buffer,
58+
pretrain,
59+
pretraining_iterations,
60+
training_iterations,
61+
batch_size,
62+
)
63+
con = torch.tensor([[[0,0], [0,0], [0,0]]])
64+
65+
latest_scan, distance, cos, sin, collision, goal, a, reward, positions = sim.step([[0, 0] for _ in range(sim.num_robots)], con) # get the initial step state
66+
67+
while epoch < max_epochs: # train until max_epochs is reached
68+
state, terminal = model.prepare_state(
69+
latest_scan, distance, cos, sin, collision, goal, a, positions
70+
) # get state a state representation from returned data from the environment
71+
72+
action, connection = model.get_action(np.array(state), False) # get an action from the model
73+
74+
a_in = [[(a[0] + 1) / 4, a[1]] for a in action] # clip linear velocity to [0, 0.5] m/s range
75+
76+
latest_scan, distance, cos, sin, collision, goal, a, reward, positions = sim.step(a_in, connection) # get data from the environment
77+
next_state, terminal = model.prepare_state(
78+
latest_scan, distance, cos, sin, collision, goal, a, positions
79+
) # get a next state representation
80+
replay_buffer.add(
81+
state, action, reward, terminal, next_state
82+
) # add experience to the replay buffer
83+
84+
if (
85+
any(terminal) or steps == max_steps
86+
): # reset environment of terminal stat ereached, or max_steps were taken
87+
latest_scan, distance, cos, sin, collision, goal, a, reward, positions = sim.reset()
88+
episode += 1
89+
if episode % train_every_n == 0:
90+
model.train(
91+
replay_buffer=replay_buffer,
92+
iterations=training_iterations,
93+
batch_size=batch_size,
94+
) # train the model and update its parameters
95+
96+
steps = 0
97+
else:
98+
steps += 1
99+
100+
if (
101+
episode + 1
102+
) % episodes_per_epoch == 0: # if epoch is concluded, run evaluation
103+
episode = 0
104+
epoch += 1
105+
evaluate(model, epoch, sim, eval_episodes=nr_eval_episodes)
106+
107+
108+
def evaluate(model, epoch, sim, eval_episodes=10):
109+
print("..............................................")
110+
print(f"Epoch {epoch}. Evaluating scenarios")
111+
avg_reward = 0.0
112+
col = 0
113+
goals = 0
114+
for _ in range(eval_episodes):
115+
count = 0
116+
latest_scan, distance, cos, sin, collision, goal, a, reward, positions = sim.reset()
117+
done = False
118+
while not done and count < 501:
119+
state, terminal = model.prepare_state(
120+
latest_scan, distance, cos, sin, collision, goal, a, positions
121+
)
122+
action, connection = model.get_action(np.array(state), False)
123+
a_in = [[(a[0] + 1) / 4, a[1]] for a in action]
124+
latest_scan, distance, cos, sin, collision, goal, a, reward, positions = sim.step(a_in, connection)
125+
avg_reward += sum(reward)/len(reward)
126+
count += 1
127+
if collision:
128+
col += 1
129+
if goal:
130+
goals += 1
131+
done = collision or goal
132+
avg_reward /= eval_episodes
133+
avg_col = col / eval_episodes
134+
avg_goal = goals / eval_episodes
135+
print(f"Average Reward: {avg_reward}")
136+
print(f"Average Collision rate: {avg_col}")
137+
print(f"Average Goal rate: {avg_goal}")
138+
print("..............................................")
139+
model.writer.add_scalar("eval/avg_reward", avg_reward, epoch)
140+
model.writer.add_scalar("eval/avg_col", avg_col, epoch)
141+
model.writer.add_scalar("eval/avg_goal", avg_goal, epoch)
142+
143+
144+
if __name__ == "__main__":
145+
main()

robot_nav/multi_train2.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from robot_nav.models.TD3.TD3 import TD3
2+
from robot_nav.models.DDPG.DDPG import DDPG
3+
from robot_nav.models.SAC.SAC import SAC
4+
from robot_nav.models.HCM.hardcoded_model import HCM
5+
from robot_nav.models.PPO.PPO import PPO
6+
from robot_nav.models.CNNTD3.att import CNNTD3
7+
8+
import torch
9+
import numpy as np
10+
from sim2 import SIM_ENV
11+
from utils import get_buffer
12+
13+
def outside_of_bounds(poses):
14+
outside = False
15+
for pose in poses:
16+
norm_x = pose[0] - 6
17+
norm_y = pose[1] - 6
18+
if abs(norm_x) > 10.5 or abs(norm_y) > 10.5:
19+
outside = True
20+
break
21+
return outside
22+
23+
def main(args=None):
24+
"""Main training function"""
25+
action_dim = 2 # number of actions produced by the model
26+
max_action = 1 # maximum absolute value of output actions
27+
state_dim = 9 # number of input values in the neural network (vector length of state input)
28+
device = torch.device(
29+
"cuda" if torch.cuda.is_available() else "cpu"
30+
) # using cuda if it is available, cpu otherwise
31+
nr_eval_episodes = 10 # how many episodes to use to run evaluation
32+
max_epochs = 100 # max number of epochs
33+
epoch = 0 # starting epoch number
34+
episodes_per_epoch = 70 # how many episodes to run in single epoch
35+
episode = 0 # starting episode number
36+
train_every_n = 10 # train and update network parameters every n episodes
37+
training_iterations = 80 # how many batches to use for single training cycle
38+
batch_size = 16 # batch size for each training iteration
39+
max_steps = 300 # maximum number of steps in single episode
40+
steps = 0 # starting step number
41+
load_saved_buffer = False # whether to load experiences from assets/data.yml
42+
pretrain = False # whether to use the loaded experiences to pre-train the model (load_saved_buffer must be True)
43+
pretraining_iterations = (
44+
10 # number of training iterations to run during pre-training
45+
)
46+
save_every = 5 # save the model every n training cycles
47+
48+
49+
50+
sim = SIM_ENV(world_file="multi_robot_world2.yaml",disable_plotting=False) # instantiate environment
51+
52+
model = CNNTD3(
53+
state_dim=state_dim,
54+
action_dim=action_dim,
55+
max_action=max_action,
56+
num_robots=sim.num_robots,
57+
device=device,
58+
save_every=save_every,
59+
load_model=True,
60+
model_name="CNNTD3",
61+
) # instantiate a model
62+
63+
replay_buffer = get_buffer(
64+
model,
65+
sim,
66+
load_saved_buffer,
67+
pretrain,
68+
pretraining_iterations,
69+
training_iterations,
70+
batch_size,
71+
)
72+
con = torch.tensor([[0. for _ in range(sim.num_robots-1)] for _ in range(sim.num_robots) ])
73+
74+
poses, distance, cos, sin, collision, goal, a, reward, positions, goal_positions = sim.step([[0, 0] for _ in range(sim.num_robots)], con) # get the initial step state
75+
76+
while epoch < max_epochs: # train until max_epochs is reached
77+
state, terminal = model.prepare_state(
78+
poses, distance, cos, sin, collision, goal, a, positions, goal_positions
79+
) # get state a state representation from returned data from the environment
80+
81+
action, connection, combined_weights = model.get_action(np.array(state), True) # get an action from the model
82+
83+
a_in = [[(a[0] + 1) / 4, a[1]] for a in action] # clip linear velocity to [0, 0.5] m/s range
84+
85+
poses, distance, cos, sin, collision, goal, a, reward, positions, goal_positions = sim.step(a_in, connection, combined_weights) # get data from the environment
86+
next_state, terminal = model.prepare_state(
87+
poses, distance, cos, sin, collision, goal, a, positions, goal_positions
88+
) # get a next state representation
89+
replay_buffer.add(
90+
state, action, reward, terminal, next_state
91+
) # add experience to the replay buffer
92+
outside = outside_of_bounds(poses)
93+
if (
94+
any(terminal) or steps == max_steps or outside
95+
): # reset environment of terminal stat ereached, or max_steps were taken
96+
poses, distance, cos, sin, collision, goal, a, reward, positions, goal_positions = sim.reset()
97+
episode += 1
98+
if episode % train_every_n == 0:
99+
model.train(
100+
replay_buffer=replay_buffer,
101+
iterations=training_iterations,
102+
batch_size=batch_size,
103+
) # train the model and update its parameters
104+
105+
steps = 0
106+
else:
107+
steps += 1
108+
109+
if (
110+
episode + 1
111+
) % episodes_per_epoch == 0: # if epoch is concluded, run evaluation
112+
episode = 0
113+
epoch += 1
114+
# evaluate(model, epoch, sim, eval_episodes=nr_eval_episodes)
115+
116+
117+
def evaluate(model, epoch, sim, eval_episodes=10):
118+
print("..............................................")
119+
print(f"Epoch {epoch}. Evaluating scenarios")
120+
avg_reward = 0.0
121+
col = 0
122+
goals = 0
123+
for _ in range(eval_episodes):
124+
count = 0
125+
poses, distance, cos, sin, collision, goal, a, reward, positions, goal_positions = sim.reset()
126+
done = False
127+
while not done and count < 501:
128+
state, terminal = model.prepare_state(
129+
poses, distance, cos, sin, collision, goal, a, positions, goal_positions
130+
)
131+
action, connection, combined_weights = model.get_action(np.array(state), False)
132+
a_in = [[(a[0] + 1) / 4, a[1]] for a in action]
133+
poses, distance, cos, sin, collision, goal, a, reward, positions, goal_positions = sim.step(a_in, connection, combined_weights)
134+
avg_reward += sum(reward)/len(reward)
135+
count += 1
136+
if collision:
137+
col += 1
138+
if goal:
139+
goals += 1
140+
done = collision or goal
141+
avg_reward /= eval_episodes
142+
avg_col = col / eval_episodes
143+
avg_goal = goals / eval_episodes
144+
print(f"Average Reward: {avg_reward}")
145+
print(f"Average Collision rate: {avg_col}")
146+
print(f"Average Goal rate: {avg_goal}")
147+
print("..............................................")
148+
model.writer.add_scalar("eval/avg_reward", avg_reward, epoch)
149+
model.writer.add_scalar("eval/avg_col", avg_col, epoch)
150+
model.writer.add_scalar("eval/avg_goal", avg_goal, epoch)
151+
152+
153+
if __name__ == "__main__":
154+
main()

0 commit comments

Comments
 (0)