Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions pufferlib/config/ocean/predprey.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
[base]
package = ocean
env_name = puffer_predprey
rnn_name = Recurrent


[vec]
num_envs = 8

[env]
num_envs = 64
vision = 3
num_agents = 4
report_interval = 1
reward_death_scale = 0.14320154190448353
reward_eat = 0.6301939255961027
reward_collect = 1
timestep_reward = -0.001012632066429986
hp_reward_scale = 0.07674633247055918
held_food_reward_scale = 0.2643837513970884
reward_fireplace_lit = 0.7
reward_store_chest = 0.5

[train]
total_timesteps = 100_000_000
checkpoint_interval = 100
adam_beta1 = 0.9925640021442416
adam_beta2 = 0.9
adam_eps = 6.225983651908837e-10
bptt_horizon = 64
clip_coef = 1
ent_coef = 0.000999567018772538
gae_lambda = 0.9948939854010467
gamma = 0.9997469057538332
learning_rate = 0.002574057351505564
max_grad_norm = 1.8944338753964156
max_minibatch_size = 32768
minibatch_size = 65536
prio_alpha = 0.9328200510590207
prio_beta0 = 0.9225942853355249
vf_clip_coef = 0.1
vf_coef = 0.6935921910790133
vtrace_c_clip = 2.819851610841173
vtrace_rho_clip = 5


[sweep]
method = Protein
metric = score
goal = maximize
downsample = 5
use_gpu = True
prune_pareto = True

[sweep.env.reward_fireplace_lit]
distribution = uniform
min = -1.0
max = 1.0
mean = 0.5
scale = auto

[sweep.env.reward_store_chest]
distribution = uniform
min = -1.0
max = 1.0
mean = 0.5
scale = auto

[sweep.train.total_timesteps]
distribution = log_normal
min = 3e7
max = 1e10
mean = 2e8
scale = time
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def make_multiagent(buf=None, **kwargs):
'spaces': make_spaces,
'multiagent': make_multiagent,
'slimevolley': 'SlimeVolley',
'predprey': 'PredPrey',
}

def env_creator(name='squared', *args, **kwargs):
Expand Down
30 changes: 30 additions & 0 deletions pufferlib/ocean/predprey/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include "predprey.h"

#define Env PredPrey
#include "../env_binding.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->width = unpack(kwargs, "width");
env->height = unpack(kwargs, "height");
env->num_agents = unpack(kwargs, "num_agents");
env->vision = unpack(kwargs, "vision");
env->reward_death_scale = unpack(kwargs, "reward_death_scale");
env->reward_eat = unpack(kwargs, "reward_eat");
env->reward_collect = unpack(kwargs, "reward_collect");
env->timestep_reward = unpack(kwargs, "timestep_reward");
env->hp_reward_scale = unpack(kwargs, "hp_reward_scale");
env->held_food_reward_scale = unpack(kwargs, "held_food_reward_scale");
env->reward_fireplace_lit = unpack(kwargs, "reward_fireplace_lit");
env->reward_store_chest = unpack(kwargs, "reward_store_chest");
init_cenv(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "steals", log->steals);
assign_to_dict(dict, "collects", log->collects);
return 0;
}
42 changes: 42 additions & 0 deletions pufferlib/ocean/predprey/grid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef GRID_H
#define GRID_H

#define GRID_HEIGHT 32
#define GRID_WIDTH 32

static const unsigned char grid[GRID_HEIGHT][GRID_WIDTH] = {
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02},
{0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02}
};

#endif // GRID_H
175 changes: 175 additions & 0 deletions pufferlib/ocean/predprey/predprey.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#include <raylib.h>
#include <unistd.h>
#include <stdlib.h>
// #include "puffernet.h"
#include "predprey.h"

int main() {
srand(43);
PredPrey env = {
.num_agents = 4,
.width = 32,
.height = 32,
.vision = 3,
.reward_death_scale = 1.0f,
.reward_eat = 0.0f,
.reward_collect = 0.0f,
.timestep_reward = 0.0f,
.hp_reward_scale = 0.0f,
.held_food_reward_scale = 0.0f,
};
allocate_cenv(&env);
c_reset(&env);
c_render(&env);

// Weights* weights = load_weights("resources/cpr/cpr_weights.bin", 139270);
// int logit_sizes[] = {5};
// LinearLSTM* net = make_linearlstm(weights, env.num_agents, 49, logit_sizes, 1);
while (!WindowShouldClose()) {

for (int i = 0; i < env.num_agents; i++) {
env.actions[i] = rand() % 7;
}

// User can take control of the first puffer
if (IsKeyDown(KEY_LEFT_SHIFT)) {
sleep(1);
env.actions[0] = NO_MOVE;
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W))
env.actions[0] = UP;
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S))
env.actions[0] = DOWN;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A))
env.actions[0] = LEFT;
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D))
env.actions[0] = RIGHT;
if (IsKeyDown(KEY_C))
env.actions[0] = INTERACT;
if (IsKeyDown(KEY_E))
env.actions[0] = EAT;

printf("Getting user input %d\n", env.actions[0]);
} else {
// for (int i = 0; i < env.num_agents*49; i++) {
// net->obs[i] = env.observations[i];
// }
// forward_linearlstm(net, net->obs, env.actions);
}

c_step(&env);
c_render(&env);

}
free_CEnv(&env);
close_renderer(env.client);

return 0;
}
/////////////////////
// Reset stress test
/////////////////////

// int main() {
// srand(43);
// PredPrey env = {
// .num_agents = 4,
// .width = 32,
// .height = 32,
// .vision = 3,
// .reward_death_scale = 1.0f,
// .reward_eat = 0.0f,
// .reward_collect = 0.0f,
// .timestep_reward = 0.0f,
// .reward_steal = 0.0f,
// .hp_reward_scale = 0.0f,
// .held_food_reward_scale = 0.0f,
// .food_base_spawn_rate = 1e-1,
// };
// for (int i = 0; i < 1000; i++) {
// allocate_cenv(&env);
// c_reset(&env);
// c_render(&env);

// // Weights* weights = load_weights("resources/cpr/cpr_weights.bin", 139270);
// // int logit_sizes[] = {5};
// // LinearLSTM* net = make_linearlstm(weights, env.num_agents, 49, logit_sizes, 1);
// for (int i = 0; i < 10; i++) {

// for (int i = 0; i < env.num_agents; i++) {
// env.actions[i] = rand() % 7;
// }

// // User can take control of the first puffer
// if (IsKeyDown(KEY_LEFT_SHIFT)) {
// sleep(1);
// env.actions[0] = NO_MOVE;
// if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W))
// env.actions[0] = UP;
// if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S))
// env.actions[0] = DOWN;
// if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A))
// env.actions[0] = LEFT;
// if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D))
// env.actions[0] = RIGHT;
// if (IsKeyDown(KEY_C))
// env.actions[0] = INTERACT;
// if (IsKeyDown(KEY_E))
// env.actions[0] = EAT;

// printf("Getting user input %d\n", env.actions[0]);
// } else {
// // for (int i = 0; i < env.num_agents*49; i++) {
// // net->obs[i] = env.observations[i];
// // }
// // forward_linearlstm(net, net->obs, env.actions);
// }

// c_step(&env);
// c_render(&env);

// }
// close_renderer(env.client);
// free_CEnv(&env);
// }
// return 0;
// }

////////////////
// For profile
////////////////
// #include <raylib.h>
// #include <unistd.h>
// #include <stdlib.h>
// #include "predprey.h"

// int main() {
// PredPrey env = {
// .num_agents = 4,
// .width = 32,
// .height = 32,
// .vision = 3,
// .reward_food = 0.0f,
// .food_base_spawn_rate = 1e-1,
// };
// allocate_cenv(&env);
// c_reset(&env);

// long i = 0;
// while (true) {

// for (int i = 0; i < env.num_agents; i++) {
// env.actions[i] = rand() % 7;
// }

// c_step(&env);

// i++;
// if (i > 10000000) {
// printf("breaking");
// break;
// }
// }
// free_CEnv(&env);

// return 0;
// }
Loading
Loading