initial commit

jpiabrantes · jpiabrantes · commit 3a8914e07ef1 · 2025-09-09T17:39:21.000+01:00
diff --git a/pufferlib/config/ocean/slimevolley.ini b/pufferlib/config/ocean/slimevolley.ini
@@ -0,0 +1,16 @@
+[base]
+package = ocean
+env_name = puffer_slimevolley
+policy_name = Policy
+
+[env]
+; 1 for single-agent (vs bot), 2 for two-agent (self-play)
+num_agents=2
+
+[train]
+learning_rate = 0.015
+total_timesteps = 10_000_000
+num_envs=128
+num_workers=8
+batch_size=1024
+minibatch_size=128
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
@@ -158,6 +158,7 @@ def make_multiagent(buf=None, **kwargs):
     'whisker_racer': 'WhiskerRacer',
     'spaces': make_spaces,
     'multiagent': make_multiagent,
+    'slimevolley': 'SlimeVolley',
 }
 
 def env_creator(name='squared', *args, **kwargs):
diff --git a/pufferlib/ocean/slimevolley/binding.c b/pufferlib/ocean/slimevolley/binding.c
@@ -0,0 +1,18 @@
+#include "slimevolley.h"
+
+#define Env SlimeVolley
+#include "../env_binding.h"
+
+static int my_init(Env* env, PyObject* args, PyObject* kwargs) {    
+    env->num_agents = unpack(kwargs, "num_agents");
+    init(env);
+    return 0;
+}
+
+static int my_log(PyObject* dict, Log* log) {
+    assign_to_dict(dict, "perf", log->perf);
+    assign_to_dict(dict, "score", log->score);
+    assign_to_dict(dict, "episode_return", log->episode_return);
+    assign_to_dict(dict, "episode_length", log->episode_length);
+    return 0;
+}
diff --git a/pufferlib/ocean/slimevolley/eval.py b/pufferlib/ocean/slimevolley/eval.py
@@ -0,0 +1,77 @@
+import gymnasium
+import numpy as np
+
+from pufferlib.ocean.slimevolley import binding
+import pufferlib
+from pufferlib.ocean.torch import Policy
+import torch
+
+class SlimeVolley(pufferlib.PufferEnv):
+    def __init__(self, num_envs=1, render_mode=None, log_interval=128, buf=None, seed=0,
+                 num_agents=1):
+        assert num_agents in {1, 2}, "num_agents must be 1 or 2"
+        num_obs = 12
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=1,
+            shape=(num_obs,), dtype=np.float32)
+        self.single_action_space = gymnasium.spaces.MultiDiscrete([2, 2, 2])
+
+        self.render_mode = render_mode
+        self.num_agents = num_envs * num_agents
+        self.log_interval = log_interval
+
+        super().__init__(buf)
+        c_envs = []
+        for i in range(num_envs):
+            c_env = binding.env_init(
+                self.observations[i*num_agents:(i+1)*num_agents],
+                self.actions[i*num_agents:(i+1)*num_agents],
+                self.rewards[i*num_agents:(i+1)*num_agents],
+                self.terminals[i*num_agents:(i+1)*num_agents],
+                self.truncations[i*num_agents:(i+1)*num_agents],
+                seed,
+                num_agents=num_agents
+                )
+            c_envs.append(c_env)
+
+        self.c_envs = binding.vectorize(*c_envs)
+
+    def reset(self, seed=0):
+        binding.vec_reset(self.c_envs, seed)
+        self.tick = 0
+        return self.observations, []
+
+    def step(self, actions):
+        self.tick += 1
+        self.actions[:] = actions
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.log_interval == 0:
+            log = binding.vec_log(self.c_envs)
+            if log:
+                info.append(log)
+
+        return (self.observations, self.rewards,
+            self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+        
+
+if __name__ == "__main__":
+    env = SlimeVolley(num_envs=1, num_agents=1)
+    observations, _ = env.reset()
+    env.render()
+    policy = Policy(env)
+    policy.load_state_dict(torch.load("checkpoint.pt", map_location="cpu"))
+    with torch.no_grad():
+        while True:
+            actions = policy(torch.from_numpy(observations))
+            actions = [float(torch.argmax(a)) for a in actions[0]]
+            o, r, t, _, i = env.step([actions])
+            env.render()
+            if t[0]:
+                break
diff --git a/pufferlib/ocean/slimevolley/slimevolley.c b/pufferlib/ocean/slimevolley/slimevolley.c
@@ -0,0 +1,66 @@
+/* Pure C demo file for SlimeVolley. Build it with:
+ * bash scripts/build_ocean.sh target local (debug)
+ * bash scripts/build_ocean.sh target fast
+ * We suggest building and debugging your env in pure C first. You
+ * get faster builds and better error messages
+ */
+#include "slimevolley.h"
+#include <stdio.h>
+
+
+void abranti_simple_policy(float* obs, float* action) {
+    float x_agent = obs[0];
+    float x_ball = obs[4];
+    float vx_ball = obs[6];
+    float backward = (-23.757145f * x_agent + 23.206863f * x_ball + 0.7943352f * vx_ball) + 1.4617119f;
+    float forward = -64.6463748f * backward + 22.4668393f;
+    action[0] = forward;
+    action[1] = backward;
+    action[2] = 1.0f; // always jump
+}
+
+void random_policy(float* obs, float* action) {
+    action[0] = rand() * 2 - 1;
+    action[1] = rand() * 2 - 1;
+    action[2] = rand() * 2 - 1;
+}
+
+int main() {
+    int num_obs = 12;
+    int num_actions = 3;
+    SlimeVolley env = {.num_agents = 1};
+    init(&env);
+    env.observations = (float*)calloc(env.num_agents*num_obs, sizeof(float));
+    env.actions = (float*)calloc(num_actions*env.num_agents, sizeof(float));
+    env.rewards = (float*)calloc(env.num_agents, sizeof(float));
+    env.terminals = (unsigned char*)calloc(env.num_agents, sizeof(unsigned char));
+    // Always call reset and render first
+    c_reset(&env);
+    c_render(&env);
+
+    fprintf(stderr, "num agents: %d\n", env.num_agents);
+
+    while (!WindowShouldClose()) {
+        for (int i=0; i<env.num_agents; i++) {
+            if (i == 0) {
+                random_policy(&env.observations[12*i], &env.actions[3*i]);
+                
+            } else {
+                abranti_simple_policy(&env.observations[12*i], &env.actions[3*i]);
+            }
+        }
+        c_step(&env);
+        c_render(&env);
+        if (env.terminals[0] || env.terminals[1]) {
+            fprintf(stderr, "Episode ended. Rewards: %f, %f\n", env.rewards[0], env.rewards[1]);
+            break;
+        }
+    }
+
+    // Try to clean up after yourself
+    free(env.observations);
+    free(env.actions);
+    free(env.rewards);
+    free(env.terminals);
+    c_close(&env);
+}
diff --git a/pufferlib/ocean/slimevolley/slimevolley.h b/pufferlib/ocean/slimevolley/slimevolley.h
diff --git a/pufferlib/ocean/slimevolley/slimevolley.py b/pufferlib/ocean/slimevolley/slimevolley.py

Original file line number	Diff line number	Diff line change
`@@ -158,6 +158,7 @@ def make_multiagent(buf=None, **kwargs):`
`158`	`158`	`'whisker_racer': 'WhiskerRacer',`
`159`	`159`	`'spaces': make_spaces,`
`160`	`160`	`'multiagent': make_multiagent,`
	`161`	`+ 'slimevolley': 'SlimeVolley',`
`161`	`162`	`}`
`162`	`163`
`163`	`164`	`def env_creator(name='squared', args, *kwargs):`