Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
5e371e5
Init template for Overcooked environment implementation
mmbajo Aug 11, 2025
c93efef
Refactor Overcooked environment for single-agent gameplay
mmbajo Aug 12, 2025
55d0987
Update .gitignore to include dsym files
mmbajo Sep 10, 2025
4cd598c
Refactor Overcooked environment dimensions and grid layout
mmbajo Sep 10, 2025
319e514
Fix position validation in Overcooked environment
mmbajo Sep 10, 2025
8b16850
Add interaction handling in Overcooked environment
mmbajo Sep 10, 2025
72641d6
Add item management and agent color rendering in Overcooked environment
mmbajo Sep 11, 2025
3eefe9f
Add new assets for Overcooked environment
mmbajo Sep 11, 2025
e2d3138
Enhance Overcooked environment with detailed texture support
mmbajo Sep 11, 2025
c5a93b8
Update chef sprite textures in Overcooked environment
mmbajo Sep 11, 2025
9fa3cfd
Edit the rendering logic for ingredient box. We only use Onions for now.
mmbajo Sep 11, 2025
816b92b
Add cooking mechanics to Overcooked environment
mmbajo Sep 11, 2025
5f694b7
Enhance Overcooked gameplay with plated soup mechanics
mmbajo Sep 11, 2025
55a892c
Add Overcooked configuration file
mmbajo Sep 12, 2025
190fd9d
Update Overcooked configuration to adjust agent settings
mmbajo Sep 12, 2025
b17af77
Enhance Overcooked environment for multi-agent gameplay
mmbajo Sep 12, 2025
1f4443b
Refactor observation handling in Overcooked environment
mmbajo Sep 12, 2025
cd66213
Refactor observation structure in Overcooked environment
mmbajo Sep 12, 2025
c796718
Add dish evaluation and reward system in Overcooked environment
mmbajo Sep 12, 2025
8faebff
Update Overcooked environment configuration for gameplay balance
mmbajo Sep 12, 2025
3c7f9cb
Add dish serving evaluation and enhance rendering in Overcooked envir…
mmbajo Sep 12, 2025
7e9ca00
Add Overcooked game type to environment configuration
mmbajo Sep 13, 2025
40f01d9
Implement neural network support in Overcooked environment
mmbajo Sep 13, 2025
3849caf
Add user-defined statistics tracking in Overcooked environment
mmbajo Sep 13, 2025
18176f6
Refactor dish evaluation logic in Overcooked environment
mmbajo Sep 13, 2025
d2f7ff7
Refactor dish evaluation logic in Overcooked environment
mmbajo Sep 13, 2025
548fb50
Enhance observation structure in Overcooked environment
mmbajo Sep 14, 2025
cfc3638
Refactor observation computation in Overcooked environment
mmbajo Sep 16, 2025
5bdc016
Initialize episode counter and update performance metrics in Overcook…
mmbajo Sep 16, 2025
58f9ac7
Update observation vector size in Overcooked environment
mmbajo Sep 16, 2025
1457f65
Attempt fix for proper logging of user stats
mmbajo Sep 16, 2025
6acfb5d
Reset log fields in Overcooked environment to ensure accurate trackin…
mmbajo Sep 18, 2025
1ae9cde
Add sweep configuration for training parameters in Overcooked environ…
mmbajo Sep 18, 2025
82857d8
Enhance item handling in Overcooked environment
mmbajo Sep 19, 2025
015f1b9
Fix item drop condition in Overcooked environment
mmbajo Sep 19, 2025
675776f
Include rewards in obs for better credit assignment in Overcooked env.
mmbajo Sep 19, 2025
0e28c2d
Enhance observation vector in Overcooked environment
mmbajo Sep 20, 2025
d2b6eca
Refactor distance calculations and observation handling in Overcooked…
mmbajo Sep 20, 2025
6bee4b3
Update coordinate types and frame rate in Overcooked environment
mmbajo Sep 20, 2025
55a22b8
Add wall texture and update grid representation in Overcooked environ…
mmbajo Sep 20, 2025
6e06f2f
Refactor proximity feature calculations in Overcooked environment
mmbajo Sep 20, 2025
f534a35
Refactor item type definitions and enhance observation panel in Overc…
mmbajo Sep 20, 2025
4a0413b
Update observation size and enhance position calculation in Overcooke…
mmbajo Sep 20, 2025
c785ee9
Refactor absolute position calculation in compute_observations function
mmbajo Sep 20, 2025
13a4861
Refactor and clean up Overcooked.h file
mmbajo Sep 22, 2025
c63f6de
Update reward system and function signatures in Overcooked environment
mmbajo Sep 22, 2025
4df28fd
Merge branch '3.0' into roze-overcooked-dev
mmbajo Sep 22, 2025
0107e7b
Update training parameters in Overcooked configuration
mmbajo Sep 22, 2025
60d47cb
This config gets over 0.5 explained variance!
mmbajo Sep 22, 2025
324689e
Update Overcooked environment for single agent gameplay
mmbajo Sep 26, 2025
30f44fb
Test 1 agent config to verify learning - still cant learn fully
mmbajo Sep 26, 2025
7cb4dbc
Remove teammate mirroring since its redundant - we put everything int…
mmbajo Oct 7, 2025
24b4fdb
Add TODO comments for ingredient handling in Overcooked environment
mmbajo Oct 9, 2025
896912a
Add TODO comment to generalize reward handling in evaluate_dish_serve…
mmbajo Oct 9, 2025
3b08355
Remove debug observation printing and unused debug flag from Overcook…
mmbajo Oct 9, 2025
e15e469
Add README for Overcooked environment -> mainly describes reward and …
mmbajo Oct 9, 2025
3435db1
Update training parameters
mmbajo Oct 9, 2025
6add1dc
Update readme
mmbajo Oct 9, 2025
d2fe6c0
Bugfix: Update ingredient limits and observation logic in Overcooked …
mmbajo Oct 10, 2025
94045ff
Refactor reward system in Overcooked environment
mmbajo Oct 10, 2025
3dd554e
Refactor cooking state management in Overcooked environment
mmbajo Oct 10, 2025
48a7ce7
Fix dish serving logic in Overcooked environment
mmbajo Oct 10, 2025
70c717b
Fix wall detection logic in Overcooked environment
mmbajo Oct 11, 2025
e8a112a
Merge remote-tracking branch 'upstream/3.0' into roze-overcooked-dev
mmbajo Dec 6, 2025
038121a
Add reward for ingredient picked in Overcooked environment
mmbajo Dec 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,6 @@ pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/

# dsym files
*.dSYM/
39 changes: 39 additions & 0 deletions pufferlib/config/ocean/overcooked.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[base]
package = ocean
env_name = puffer_overcooked
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 1024
num_agents = 1

[train]
total_timesteps = 10_000_000_000
learning_rate = 0.10
minibatch_size = 32768
gamma = 0.99
ent_coef = 0.15
gae_lambda = 0.97
clip_coef = 0.35
anneal_lr = False

[sweep]
method = Protein
metric = score
goal = maximize
downsample = 10

[sweep.train.gamma]
distribution = logit_normal
min = 0.9
mean = 0.99
max = 0.999
scale = auto

[sweep.train.ent_coef]
distribution = log_normal
min = 0.001
mean = 0.02
max = 0.1
scale = auto
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def make_multiagent(buf=None, **kwargs):
'checkers': 'Checkers',
'asteroids': 'Asteroids',
'whisker_racer': 'WhiskerRacer',
'overcooked': 'Overcooked',
'onestateworld': 'World',
'onlyfish': 'OnlyFish',
'chain_mdp': 'Chain',
Expand Down
73 changes: 73 additions & 0 deletions pufferlib/ocean/overcooked/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Overcooked Environment

A multi-agent cooking coordination environment where agents cooperate to prepare and serve onion soup. Here we describe the rewards system and observation space.

## Observation Space

**39-dimensional vector per agent** — *see [compute_observations](overcooked.h#L281)*

### Player Features (34 dims)
- **Orientation** (4): One-hot encoding of facing direction — [overcooked.h:305](overcooked.h#L305)
- **Held Object** (4): One-hot encoding (onion, soup, dish, empty) — [overcooked.h:309-319](overcooked.h#L309-L319)
- **Proximity Features** (12): Normalized (dx, dy) to nearest — [overcooked.h:322-352](overcooked.h#L322-L352):
- Onion source (ingredient box)
- Dish source (plate box)
- Plated soup on counter
- Serving area
- Empty counter
- Pot (stove)
- **Nearest Soup Ingredients** (2): Onion/tomato counts in nearest plated soup or held soup (normalized) — [overcooked.h:356-380](overcooked.h#L356-L380)
- **Pot Soup Ingredients** (2): Onion/tomato counts in nearest pot (normalized) — [overcooked.h:382-405](overcooked.h#L382-L405)
- **Pot Existence** (1): Binary flag for reachable pot — [overcooked.h:408](overcooked.h#L408)
- **Pot State** (4): Binary flags (empty, full, cooking, ready) — [overcooked.h:410-418](overcooked.h#L410-L418)
- **Cooking Time** (1): Remaining cook time (normalized) — [overcooked.h:420-426](overcooked.h#L420-L426)
- **Wall Detection** (4): Binary flags for walls/obstacles (up, down, left, right) — [overcooked.h:428-438](overcooked.h#L428-L438)

### Spatial Features (4 dims)
- **Teammate Relative Position** (2): Normalized (dx, dy) to other agent — [overcooked.h:440-451](overcooked.h#L440-L451)
- **Absolute Position** (2): Normalized (x, y) coordinates — [overcooked.h:453-455](overcooked.h#L453-L455)

### Context (1 dim)
- **Reward** (1): Current step reward — [overcooked.h:458](overcooked.h#L458)

## Action Space

**6 discrete actions** — *see [c_step](overcooked.h#L804)*
- 0: No-op — [ACTION_NOOP](overcooked.h#L38)
- 1: Move up — [ACTION_UP](overcooked.h#L39)
- 2: Move down — [ACTION_DOWN](overcooked.h#L40)
- 3: Move left — [ACTION_LEFT](overcooked.h#L41)
- 4: Move right — [ACTION_RIGHT](overcooked.h#L42)
- 5: Interact (pick up/place items, use equipment) — [ACTION_INTERACT](overcooked.h#L43)

## Reward System

*See [evaluate_dish_served](overcooked.h#L720) and [handle_interaction](overcooked.h#L467)*

### Main Rewards
- **Correct dish served** (3 onions): +20.0 (shared), +5.0 (server bonus) — [overcooked.h:732-735](overcooked.h#L732-L735)
- **Wrong dish served** (incorrect recipe): +0.1 (shared) — [overcooked.h:741-745](overcooked.h#L741-L745)
- **Step penalty**: Configurable (default: 0.0) — [overcooked.h:807](overcooked.h#L807)

### Intermediate Rewards
- **Add onion to pot**: +0.1 — [overcooked.h:494](overcooked.h#L494)
- **Start cooking** (3 onions in pot): +0.1 — [overcooked.h:507](overcooked.h#L507)
- **Plate cooked soup**: +0.1 — [overcooked.h:520](overcooked.h#L520)

## Recipe

The correct recipe requires **exactly 3 onions** in the soup. Agents must:
1. Pick up onions from ingredient boxes
2. Add 3 onions to a pot
3. Start cooking (interact with pot when empty-handed)
4. Wait for soup to cook (20 steps)
5. Pick up a plate from plate box
6. Plate the cooked soup (interact with pot while holding plate)
7. Deliver plated soup to serving area

## Game Constants

- **Cooking time**: 20 steps — [COOKING_TIME](overcooked.h#L32)
- **Max ingredients per pot**: 3 — [MAX_INGREDIENTS](overcooked.h#L33)
- **Grid size**: 5×5 (default) — [CRAMPED_ROOM](overcooked.h#L186)
- **Max episode steps**: 400 (default) — [overcooked.py:12](overcooked.py#L12)
41 changes: 41 additions & 0 deletions pufferlib/ocean/overcooked/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include "overcooked.h"

#define Env Overcooked
#include "../env_binding.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->width = unpack(kwargs, "width");
env->height = unpack(kwargs, "height");
env->num_agents = unpack(kwargs, "num_agents");
env->max_steps = unpack(kwargs, "max_steps");
env->grid_size = unpack(kwargs, "grid_size");
env->observation_size = unpack(kwargs, "observation_size");
env->rewards_config.dish_served_whole_team = unpack(kwargs, "reward_dish_served_whole_team");
env->rewards_config.dish_served_agent = unpack(kwargs, "reward_dish_served_agent");
env->rewards_config.pot_started = unpack(kwargs, "reward_pot_started");
env->rewards_config.ingredient_added = unpack(kwargs, "reward_ingredient_added");
env->rewards_config.ingredient_picked = unpack(kwargs, "reward_ingredient_picked");
env->rewards_config.soup_plated = unpack(kwargs, "reward_soup_plated");
env->rewards_config.wrong_dish_served = unpack(kwargs, "reward_wrong_dish_served");
env->rewards_config.step_penalty = unpack(kwargs, "reward_step_penalty");
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "dishes_served", log->dishes_served);
assign_to_dict(dict, "cooperation_score", log->cooperation_score);
// User-defined stats
assign_to_dict(dict, "correct_dishes", log->correct_dishes);
assign_to_dict(dict, "wrong_dishes", log->wrong_dishes);
assign_to_dict(dict, "ingredients_picked", log->ingredients_picked);
assign_to_dict(dict, "pots_started", log->pots_started);
assign_to_dict(dict, "items_dropped", log->items_dropped);
assign_to_dict(dict, "agent_collisions", log->agent_collisions);
assign_to_dict(dict, "cooking_time_efficiency", log->cooking_time_efficiency);
return 0;
}
147 changes: 147 additions & 0 deletions pufferlib/ocean/overcooked/overcooked.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/* Pure C demo file for Overcooked with neural network support.
* Build it with:
* bash scripts/build_ocean.sh overcooked local (debug)
* bash scripts/build_ocean.sh overcooked fast
*/

#include <time.h>
#include "overcooked.h"
#include "puffernet.h"

void demo() {
// Training debug -> 1 agent for now
int num_agents = 1;

Weights* weights = load_weights("resources/overcooked/puffer_overcooked_weights.bin", 575004);
int logit_sizes[] = {6}; // 6 actions: up, down, left, right, interact, noop
LinearLSTM* net = make_linearlstm(weights, num_agents, 39, logit_sizes, 1);

Overcooked env = {
.width = 5,
.height = 5,
.num_agents = num_agents,
.max_steps = 400,
.grid_size = 100,
.rewards_config = {
.dish_served_whole_team = 20.0f,
.dish_served_agent = 5.0f,
.pot_started = 0.1f,
.ingredient_added = 0.1f,
.soup_plated = 0.1f,
.wrong_dish_served = 0.1f,
.step_penalty = 0.0f
},
.observation_size = 39
};

env.observations = (float*)calloc(env.observation_size * num_agents, sizeof(float));
env.actions = (int*)calloc(num_agents, sizeof(int));
env.rewards = (float*)calloc(num_agents, sizeof(float));
env.terminals = (unsigned char*)calloc(num_agents, sizeof(unsigned char));

init(&env);
c_reset(&env);
c_render(&env);

while (!WindowShouldClose()) {
// Manual control for single agent with Shift key
if (IsKeyDown(KEY_LEFT_SHIFT)) {
// Agent controls (WASD + Space)
env.actions[0] = ACTION_NOOP;
if (IsKeyDown(KEY_W)) env.actions[0] = ACTION_UP;
if (IsKeyDown(KEY_S)) env.actions[0] = ACTION_DOWN;
if (IsKeyDown(KEY_A)) env.actions[0] = ACTION_LEFT;
if (IsKeyDown(KEY_D)) env.actions[0] = ACTION_RIGHT;
if (IsKeyPressed(KEY_SPACE)) env.actions[0] = ACTION_INTERACT;
} else {
forward_linearlstm(net, env.observations, env.actions);
}

c_step(&env);
c_render(&env);

int should_reset = 0;
for (int i = 0; i < num_agents; i++) {
if (env.terminals[i]) {
should_reset = 1;
break;
}
}
if (should_reset) {
c_reset(&env);
}
}

free_linearlstm(net);
free(weights);
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}

void test_performance(float test_time) {
int num_agents = 1;

Overcooked env = {
.width = 5,
.height = 5,
.num_agents = num_agents,
.max_steps = 400,
.grid_size = 100,
.rewards_config = {
.dish_served_whole_team = 20.0f,
.dish_served_agent = 5.0f,
.pot_started = 0.1f,
.ingredient_added = 0.1f,
.soup_plated = 0.1f,
.wrong_dish_served = 0.1f,
.step_penalty = 0.0f
},
.observation_size = 39
};

env.observations = (float*)calloc(env.observation_size * num_agents, sizeof(float));
env.actions = (int*)calloc(num_agents, sizeof(int));
env.rewards = (float*)calloc(num_agents, sizeof(float));
env.terminals = (unsigned char*)calloc(num_agents, sizeof(unsigned char));

init(&env);
c_reset(&env);

int start = time(NULL);
int steps = 0;
while (time(NULL) - start < test_time) {
// Random actions for performance testing
for (int i = 0; i < num_agents; i++) {
env.actions[i] = rand() % 6;
}
c_step(&env);
steps++;

// Reset if any agent's episode ends
for (int i = 0; i < num_agents; i++) {
if (env.terminals[i]) {
c_reset(&env);
break;
}
}
}

int end = time(NULL);
float sps = (float)(num_agents * steps) / (end - start);
printf("SPS: %f\n", sps);

free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}

int main() {
demo();
// test_performance(30);
return 0;
}
Loading