Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions pufferlib/config/ocean/lock_key.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[base]
package = ocean
env_name = puffer_lock_key
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 4096
num_keys = 1
size = 8
log_interval = 128
obs_dist = 2

[train]
total_timesteps = 20_000_000
gamma = 0.95
learning_rate = 0.05
minibatch_size = 32768
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def make_multiagent(buf=None, **kwargs):
'spaces': make_spaces,
'multiagent': make_multiagent,
'slimevolley': 'SlimeVolley',
'lock_key': 'LockKey',
}

def env_creator(name='squared', *args, **kwargs):
Expand Down
32 changes: 32 additions & 0 deletions pufferlib/ocean/lock_key/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include "lock_key.h"

#define Env LockKey
#include "../env_binding.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->size = unpack(kwargs, "size");
env->num_keys = unpack(kwargs, "num_keys");
env->obs_dist = unpack(kwargs, "obs_dist");

int tiles = env->size * env->size;
env->state = (unsigned char*)calloc(tiles, sizeof(unsigned char));
if (!env->state) return -1;

return 0;
}

static int my_close(Env* env) {
if (env->state) {
free(env->state);
env->state = NULL;
}
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
return 0;
}
51 changes: 51 additions & 0 deletions pufferlib/ocean/lock_key/lock_key.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include <time.h>
#include "lock_key.h"

int main() {
srand((unsigned int)time(NULL));

LockKey env = {.size = 8, .num_keys = 3, .obs_dist = 2};

int tiles = env.size * env.size;

env.state = (unsigned char*)calloc(tiles, sizeof(unsigned char));
env.observations = (unsigned char*)calloc(tiles, sizeof(unsigned char));
env.actions = (int*)calloc(1, sizeof(int));
env.rewards = (float*)calloc(1, sizeof(float));
env.terminals = (unsigned char*)calloc(1, sizeof(unsigned char));
env.truncations = (unsigned char*)calloc(1, sizeof(unsigned char)); // optional

c_reset(&env);
c_render(&env);

while (!WindowShouldClose()) {
if (IsKeyDown(KEY_LEFT_SHIFT)) {
if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) {
env.actions[0] = 0;
} else if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) {
env.actions[0] = 1;
} else if (IsKeyDown(KEY_W) || IsKeyDown(KEY_UP)) {
env.actions[0] = 2;
} else if (IsKeyDown(KEY_S) || IsKeyDown(KEY_DOWN)) {
env.actions[0] = 3;
} else {
env.actions[0] = -1; // no-op
}
} else {
env.actions[0] = rand() % 5; // 4 == no-op, still fine
}

c_step(&env);
c_render(&env);
}

free(env.state);
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
if (env.truncations) free(env.truncations);

c_close(&env);
return 0;
}
185 changes: 185 additions & 0 deletions pufferlib/ocean/lock_key/lock_key.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#ifndef LOCK_KEY_H
#define LOCK_KEY_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "raylib.h"

static const Color PUFF_RED = (Color){187, 0, 0, 255};
static const Color PUFF_CYAN = (Color){0, 187, 187, 255};
static const Color PUFF_GREEN = (Color){0, 187, 0, 255};
static const Color PUFF_BACKGROUND = (Color){65, 30, 40, 255};
static const Color PUFF_BLACK = (Color){0, 0, 0, 255};

typedef struct {
float perf;
float score;
float episode_return;
float episode_length;
float n;
} Log;

typedef struct {
Log log;

// observations: partial view for agent
unsigned char* observations;
// state: full system state
unsigned char* state;

int* actions;
float* rewards;
unsigned char* terminals;
unsigned char* truncations;

int size;
int num_keys;
int tick;
int x;
int y;
int num_keys_collected;
int obs_dist;
} LockKey;

static inline int lk_pos(LockKey* env, int x, int y) {
return y * env->size + x;
}

static inline int lk_visible(LockKey* env, int x, int y) {
int dx = x - env->x; if (dx < 0) dx = -dx;
int dy = y - env->y; if (dy < 0) dy = -dy;
return (dx > dy ? dx : dy) <= env->obs_dist;
}

static inline void lk_update_observations(LockKey* env) {
int tiles = env->size * env->size;
memset(env->observations, 0, tiles * sizeof(unsigned char));

for (int y = 0; y < env->size; y++) {
for (int x = 0; x < env->size; x++) {
if (!lk_visible(env, x, y)) continue;
int pos = lk_pos(env, x, y);
env->observations[pos] = env->state[pos];
}
}
}

void add_log(LockKey* env) {
env->log.perf += (env->rewards[0] > 0) ? 1 : 0;
env->log.score += env->rewards[0];
env->log.episode_return += env->rewards[0];
env->log.episode_length += env->tick;
env->log.n++;
}

static inline void c_reset(LockKey* env) {
int tiles = env->size * env->size;
memset(env->state, 0, tiles * sizeof(unsigned char));

env->x = env->size / 2;
env->y = env->size / 2;
int player_pos = lk_pos(env, env->x, env->y);
env->state[player_pos] = 1;
env->tick = 0;

int lock_idx;
do lock_idx = rand() % tiles;
while (lock_idx == player_pos);
env->state[lock_idx] = 2;

for (int i = 0; i < env->num_keys; i++) {
int key_idx;
do key_idx = rand() % tiles;
while (env->state[key_idx] != 0);
env->state[key_idx] = 3;
}

env->num_keys_collected = 0;
lk_update_observations(env);
}

static inline void c_step(LockKey* env) {
env->tick++;
env->rewards[0] = -0.1f;
env->terminals[0] = 0;
if (env->truncations) env->truncations[0] = 0;

int prev_pos = lk_pos(env, env->x, env->y);
if (env->state[prev_pos] != 2)
env->state[prev_pos] = 0;

int a = env->actions[0];
if (a == 0) env->x--;
else if (a == 1) env->x++;
else if (a == 2) env->y--;
else if (a == 3) env->y++;

int max_steps = 3*env->size + env->num_keys*env->num_keys;
if (env->tick > max_steps || env->x < 0 || env->x >= env->size || env->y < 0 || env->y >= env->size) {
env->rewards[0] = -3.0f;
env->terminals[0] = 1;
add_log(env);
c_reset(env);
return;
}

int pos = lk_pos(env, env->x, env->y);

if (env->state[pos] == 3) {
env->rewards[0] += 1.0f;
env->num_keys_collected++;
}

if (env->state[pos] == 2 && env->num_keys_collected == env->num_keys) {
env->rewards[0] = 3.0f;
env->terminals[0] = 1;
add_log(env);
c_reset(env);
return;
}

if (env->state[pos] != 2)
env->state[pos] = 1;

lk_update_observations(env);
}

static inline void c_render(LockKey* env) {
if (!IsWindowReady()) {
InitWindow(64*env->size, 64*env->size, "LockKey");
SetTargetFPS(5);
}

if (IsKeyDown(KEY_ESCAPE)) exit(0);

BeginDrawing();

for (int y = 0; y < env->size; y++) {
for (int x = 0; x < env->size; x++) {
Color bg = lk_visible(env, x, y) ? PUFF_BACKGROUND : PUFF_BLACK;
DrawRectangle(x * 64, y * 64, 64, 64, bg);

int pos = lk_pos(env, x, y);
unsigned char v = env->observations[pos];
if (!v) continue;

Color color =
(v == 1) ? PUFF_CYAN :
(v == 2) ? PUFF_RED :
(v == 3) ? PUFF_GREEN :
PUFF_BACKGROUND;

DrawRectangle(x * 64, y * 64, 64, 64, color);
}
}

EndDrawing();
}

static inline void c_close(LockKey* env) {
(void)env;
if (IsWindowReady()) CloseWindow();
}

#endif
46 changes: 46 additions & 0 deletions pufferlib/ocean/lock_key/lock_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import gymnasium
import numpy as np
import pufferlib
from pufferlib.ocean.lock_key import binding

class LockKey(pufferlib.PufferEnv):
def __init__(self, num_envs=1, render_mode=None, log_interval=128, size=8, num_keys=3, buf=None, seed=0, obs_dist=2):
self.single_observation_space = gymnasium.spaces.Box(
low=0, high=3, shape=(size * size,), dtype=np.uint8
)

self.single_action_space = gymnasium.spaces.Discrete(5)

self.render_mode = render_mode
self.num_agents = num_envs
self.log_interval = log_interval
super().__init__(buf)

self.c_envs = binding.vec_init(
self.observations, self.actions, self.rewards,
self.terminals, self.truncations, num_envs, seed,
size=size,
num_keys=num_keys,
obs_dist=obs_dist,
)

def reset(self, seed=0):
binding.vec_reset(self.c_envs, seed)
self.tick = 0
return self.observations, []

def step(self, actions):
self.tick += 1
self.actions[:] = actions
binding.vec_step(self.c_envs)
info = []
if self.tick % self.log_interval == 0:
info.append(binding.vec_log(self.c_envs))
return (self.observations, self.rewards,
self.terminals, self.truncations, info)

def render(self):
binding.vec_render(self.c_envs, 0)

def close(self):
binding.vec_close(self.c_envs)
Binary file not shown.