Skip to content
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2dcb107
Fixed puffer render mode for log replay setting, some fixes for rende…
mpragnay Apr 6, 2026
d978227
Add default manifest.json when building map binaries
mpragnay Apr 6, 2026
a838e0c
Added driving behaviours eval, memory efficient implementation, keepi…
mpragnay Apr 7, 2026
a29e714
Add driving behaviours eval config file
mpragnay Apr 7, 2026
632fcc5
Driving behaviours eval in subprocess
mpragnay Apr 7, 2026
3fba62f
Fix subprocess args to use the correct map_dir with correct num_maps,…
mpragnay Apr 7, 2026
c91c669
Merge branch '3.0' into pragnay/driving_behaviours_eval
mpragnay Apr 7, 2026
80dff96
Fixing renders on a per class basis
mpragnay Apr 7, 2026
efebf1d
Pass render cfg correctly, not working still need to look at all the …
mpragnay Apr 7, 2026
73f6eaf
Add class binaries, remove class bins from gitignore
mpragnay Apr 7, 2026
5fe75e5
Enable render_eval for all driving behaviour scenarios
eugenevinitsky Apr 7, 2026
2f124e2
Merge remote-tracking branch 'origin/3.0' into pragnay/driving_behavi…
eugenevinitsky Apr 7, 2026
6df0a20
Fix: set expert agent velocities from log data in move_expert
eugenevinitsky Apr 8, 2026
8078486
Fix: set expert agent velocities from log data in move_expert
eugenevinitsky Apr 8, 2026
fa3974c
Fix: sanitize wandb_prefix in render output path to avoid subdirectories
eugenevinitsky Apr 8, 2026
567ca12
Add GPU heartbeat to prevent job reclamation on NYU cluster
eugenevinitsky Apr 8, 2026
ce6d027
Fix heartbeat: properly background within bash -c, cleanup on exit
eugenevinitsky Apr 8, 2026
4c3b43f
Revert heartbeat from submit_cluster — needs more debugging
eugenevinitsky Apr 8, 2026
ae64373
10x velocity reward coefficient: 2.5e-3 -> 2.5e-2
eugenevinitsky Apr 8, 2026
f1228b0
Add script to render behavior eval videos locally
eugenevinitsky Apr 8, 2026
1fac4f9
Merge remote-tracking branch 'origin/ev/fix-expert-velocity' into pra…
eugenevinitsky Apr 8, 2026
7f484f9
Update behavior eval reward values: softer collision/offroad, wider g…
eugenevinitsky Apr 10, 2026
8994942
Revert drive.c map path to 3.0 default
eugenevinitsky Apr 10, 2026
2e2879c
Merge remote-tracking branch 'origin/3.0' into pragnay/driving_behavi…
eugenevinitsky Apr 10, 2026
30a025a
revert a reward change
eugenevinitsky Apr 10, 2026
6ca63cb
put the driving behaviors eval init back to where it should be
eugenevinitsky Apr 10, 2026
970dcf5
Revert [render] section to 3.0 defaults — local dev preferences shoul…
eugenevinitsky Apr 10, 2026
c807f6a
Add trailing newlines to manifest.json files for pre-commit
eugenevinitsky Apr 10, 2026
98ec822
Merge branch '3.0' into pragnay/driving_behaviours_eval
mpragnay Apr 13, 2026
b1da1c1
Post merge fixes and alignment with new renders, invoking human repla…
mpragnay Apr 13, 2026
96bf636
Fixing human replay eval invoking during driving behaviours eval
mpragnay Apr 13, 2026
11d8095
Make episode_len configurable
mpragnay Apr 13, 2026
2e0ef69
Merge branch '3.0' into pragnay/driving_behaviours_eval
mpragnay Apr 14, 2026
3e77d2a
Fix output unpack errors for driving_behaviours_eval
mpragnay Apr 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ pufferlib/resources/drive/binaries/
pufferlib/resources/drive/binaries/training/
pufferlib/resources/drive/binaries/validation/

# But keep map_000.bin for the training test
# Keep map_000.bin for the training test
!pufferlib/resources/drive/binaries/map_000.bin
!pufferlib/resources/drive/binaries/training/map_000.bin
pufferlib/resources/drive/sanity/sanity_binaries/
Expand All @@ -178,6 +178,13 @@ pufferlib/resources/drive/sanity/sanity_binaries/
!pufferlib/resources/drive/binaries/carla/**
!pufferlib/resources/drive/binaries/carla_2D/**

# Keep driving behaviour eval scenario binaries
!pufferlib/resources/drive/binaries/dense_traffic/**
!pufferlib/resources/drive/binaries/lane_change/**
!pufferlib/resources/drive/binaries/obstacles/**
!pufferlib/resources/drive/binaries/vru_interaction/**
!pufferlib/resources/drive/binaries/lead_vehicle_interaction/**

# Compiled drive binary in root
/drive
/visualize
Expand Down
23 changes: 16 additions & 7 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,10 @@ human_replay_eval = False
human_replay_control_mode = "control_sdc_only"
; Number of scenarios for human replay evaluation equals the number of agents
human_replay_num_agents = 16
; Evaluating different driving behaviours learned by the policy
driving_behaviours_eval = True
Comment thread
eugenevinitsky marked this conversation as resolved.
driving_behaviours_eval_config = "pufferlib/config/ocean/driving_behaviours_eval.ini"
driving_behaviours_eval_interval = 250

[safe_eval]
; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
Expand Down Expand Up @@ -273,15 +277,15 @@ acc = 1.0
[render]
; Mode to render a bunch of maps with a given policy
; Path to dataset used for rendering
map_dir = "resources/drive/binaries/training"
map_dir = "resources/drive/binaries/interactive_data_validation"
; Directory to output rendered videos
output_dir = "resources/drive/render_videos"
; Evaluation will run on the first num_maps maps in the map_dir directory
num_maps = 100
output_dir = "resources/drive/human_replay_videos"
; Evaluation will run on the first num_maps maps in the map_dir directory. Use "auto" to render all maps in map_dir.
num_maps = auto
; "both", "topdown", "agent"; Other args are passed from train confs
view_mode = "both"
view_mode = "topdown"
; Policy bin file used for rendering videos
policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
policy_path = "resources/drive/best_policy_with_reward_conditioning.bin"
; Allows more than cpu cores workers for rendering
overwork = True
; If True, show exactly what the agent sees in agent observation
Expand All @@ -291,9 +295,14 @@ show_grid = True
; Draws lines from ego agent observed ORUs and road elements to show detection range
show_lasers = True
; Display human xy logs in the background
show_human_logs = False
show_human_logs = True
; If True, zoom in on a part of the map. Otherwise, show full map
zoom_in = True
; If True, render in log-replay mode: only the SDC is policy-controlled,
; all other agents follow their ground-truth expert trajectories
human_replay_render = True
; Control mode used when human_replay_render = True
human_replay_control_mode = "control_sdc_only"

[sweep.train.learning_rate]
distribution = log_normal
Expand Down
53 changes: 53 additions & 0 deletions pufferlib/config/ocean/driving_behaviours_eval.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; Configuration for driving behaviour evaluation maps and rewards.
; Currently evaluates 5 broad driving behaviours: lead vehicle interaction, lane change, dense traffic, obstacles, vulnerable road user interactions (VRUs).
; Currently uses safe reward conditioning values for evaluation

[eval_lead_vehicle_interaction]
map_dir = "resources/drive/binaries/lead_vehicle_interaction"
human_replay_eval = True
render_eval = True

[eval_lane_change]
map_dir = "resources/drive/binaries/lane_change"
human_replay_eval = True
render_eval = True

[eval_dense_traffic]
map_dir = "resources/drive/binaries/dense_traffic"
human_replay_eval = True
render_eval = True

[eval_obstacles]
map_dir = "resources/drive/binaries/obstacles"
human_replay_eval = True
render_eval = True

[eval_vru_interaction]
map_dir = "resources/drive/binaries/vru_interaction"
human_replay_eval = True
render_eval = True

[eval_driving_rewards]
; Reward conditioning values (min=max to fix the value).
; Names match the env reward_bound_* keys.
; High penalties for unsafe behavior
collision = -0.5
offroad = -0.5
overspeed = -1.0
traffic_light = -1.0
reverse = -0.0075
comfort = -0.1

; Standard driving rewards
goal_radius = 8.0
lane_align = 0.0025
lane_center = -0.00075
velocity = 0.005
center_bias = 0.0
vel_align = 1.0
timestep = -0.00005

; Neutral scaling factors
throttle = 1.0
steer = 1.0
acc = 1.0
123 changes: 123 additions & 0 deletions pufferlib/ocean/benchmark/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,3 +964,126 @@ def log_stats(self, global_step=None):
if global_step is not None:
payload["train_step"] = global_step
self.logger.wandb.log(payload)


class DrivingBehavioursEvaluator:
"""Evaluates a policy on the 5 driving behaviour classes using live in-process weights."""

# Sections in driving_behaviours_eval.ini that describe scenario classes
EVAL_SECTIONS_PREFIX = "eval_"
REWARD_SECTION = "eval_driving_rewards"

def __init__(self, env_name: str, behaviours_config: Dict, device="cuda", logger=None):
self.env_name = env_name
self.behaviours_config = behaviours_config
if isinstance(device, int):
device = f"cuda:{device}"
self.device = device
self.logger = logger
self.reward_config = behaviours_config.get(self.REWARD_SECTION, {})
self.classes = [
(name, cfg)
for name, cfg in behaviours_config.items()
if name.startswith(self.EVAL_SECTIONS_PREFIX) and name != self.REWARD_SECTION
]

def _build_class_env_config(self, class_cfg: Dict) -> Dict:
"""Build env config for one scenario class with fixed reward conditioning."""
import re
import sys
from pufferlib.pufferl import load_config

original_argv = sys.argv
sys.argv = ["pufferl"]
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this supposed to do?

try:
eval_config = load_config(self.env_name)
finally:
sys.argv = original_argv

eval_config["vec"] = dict(backend="PufferEnv", num_envs=1)
eval_config["train"]["device"] = self.device
eval_config["env"]["control_mode"] = "control_sdc_only"
eval_config["env"]["init_mode"] = "create_all_valid"
eval_config["env"]["episode_length"] = 91
eval_config["env"]["resample_frequency"] = 0

map_dir = class_cfg.get("map_dir", "")
if isinstance(map_dir, str):
map_dir = map_dir.strip('"')
eval_config["env"]["map_dir"] = map_dir
# Set num_maps to the number of available bins so we cover all scenarios
available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1
eval_config["env"]["num_maps"] = available_maps

# Discover valid reward bound names
valid_bounds = set()
for key in eval_config["env"]:
m = re.match(r"reward_bound_(.+)_min$", key)
if m:
valid_bounds.add(m.group(1))

# Fix reward conditioning to eval_driving_rewards values
for key, val in self.reward_config.items():
if key not in valid_bounds:
continue
eval_config["env"][f"reward_bound_{key}_min"] = float(val)
eval_config["env"][f"reward_bound_{key}_max"] = float(val)

return eval_config

def evaluate_class(self, class_cfg: Dict, policy) -> Dict:
"""Run human-replay rollouts on all maps in the class map_dir and return averaged metrics."""
from collections import defaultdict
from pufferlib.pufferl import load_env

print(f"Evaluating class")

eval_config = self._build_class_env_config(class_cfg)
num_maps = eval_config["env"]["num_maps"]
print(f"Built eval config for class with map_dir: {eval_config['env']['map_dir']}")

vecenv = load_env(self.env_name, eval_config)
print(f"Loaded vecenv")
policy.eval()
print(f"Set policy to eval mode")
rollout_evaluator = HumanReplayEvaluator(eval_config)
all_stats = defaultdict(list)
print(f"Starting rollouts for class with {num_maps} maps")
try:
for _ in range(num_maps):
result = rollout_evaluator.rollout(eval_config, vecenv, policy) or {}
for k, v in result.items():
try:
all_stats[k].append(float(v))
except (TypeError, ValueError):
pass
# Reset for next map
vecenv.reset()
finally:
vecenv.close()
import gc

gc.collect()
import torch

if torch.cuda.is_available():
torch.cuda.empty_cache()

return {k: float(np.mean(v)) for k, v in all_stats.items() if v}

def log_stats(self, all_results: Dict[str, Dict], global_step=None):
"""Log per-class metrics to wandb under driving_behaviours/<class>/<metric>."""
if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
return
payload = {}
for class_name, metrics in all_results.items():
short = class_name[len(self.EVAL_SECTIONS_PREFIX) :]
for k, v in metrics.items():
try:
payload[f"driving_behaviours/{short}/{k}"] = float(v)
except (TypeError, ValueError):
pass
if global_step is not None:
payload["train_step"] = global_step
if payload:
self.logger.wandb.log(payload)
6 changes: 4 additions & 2 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ static void generate_reward_coefs(Drive *env, Agent *agent) {
agent->reward_coefs[REWARD_COEF_REVERSE] = random_uniform(env->reward_bounds[REWARD_COEF_REVERSE].min_val,
env->reward_bounds[REWARD_COEF_REVERSE].max_val);
// Fixed values (Must fall within the bounds defined above)
agent->reward_coefs[REWARD_COEF_VELOCITY] = 2.5e-3f;
agent->reward_coefs[REWARD_COEF_VELOCITY] = 2.5e-2f;
agent->reward_coefs[REWARD_COEF_TIMESTEP] = -2.5e-5f;
// Dynamic conditioning (Mixed Uniform)
agent->reward_coefs[REWARD_COEF_THROTTLE] = mixed_uniform(1.25f);
Expand Down Expand Up @@ -2152,7 +2152,9 @@ void set_active_agents(Drive *env) {
static_agent_indices[env->static_agent_count] = i;
env->static_agent_count++;
env->agents[i].active_agent = 0;
if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) {

if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 ||
env->active_agent_count == env->num_agents) {
expert_static_agent_indices[env->expert_static_agent_count] = i;
env->expert_static_agent_count++;
env->agents[i].mark_as_expert = 1;
Expand Down
11 changes: 11 additions & 0 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,17 @@ def process_all_maps(
if not success:
print(f" {name}: {error}")

# Write manifest.json mapping each bin to its source JSON
manifest = {}
for i, map_path, binary_path, *_ in tasks:
_, _, success, _ = results[i]
if success:
manifest[f"map_{i:03d}.bin"] = map_path.name
manifest_path = binary_dir / "manifest.json"
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
print(f"Wrote manifest to {manifest_path} ({len(manifest)} entries)")
Comment on lines +1074 to +1083
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mpragnay really nice touch



def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
import time
Expand Down
41 changes: 23 additions & 18 deletions pufferlib/ocean/drive/visualize.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,12 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett

int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
const char *output_agent, int num_maps, int zoom_in) {
const char *output_agent, int num_maps, int zoom_in, const char *ini_file) {

// Parse configuration from INI file
env_init_config conf = {0};
const char *ini_file = "pufferlib/config/ocean/drive.ini";
if (ini_file == NULL)
ini_file = "pufferlib/config/ocean/drive.ini";
if (ini_parse(ini_file, handler, &conf) < 0) {
fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file);
return -1;
Expand Down Expand Up @@ -350,25 +351,29 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
char filename_topdown[256];
char filename_agent[256];

if (output_topdown != NULL && output_agent != NULL) {
strcpy(filename_topdown, output_topdown);
strcpy(filename_agent, output_agent);
} else {
char policy_base[256];
strcpy(policy_base, policy_name);
*strrchr(policy_base, '.') = '\0';
char policy_base[256];
strcpy(policy_base, policy_name);
*strrchr(policy_base, '.') = '\0';

char map[256];
strcpy(map, basename((char *)map_name));
*strrchr(map, '.') = '\0';
char map[256];
strcpy(map, basename((char *)map_name));
*strrchr(map, '.') = '\0';

char video_dir[256];
sprintf(video_dir, "%s/video", policy_base);
char mkdir_cmd[512];
snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
system(mkdir_cmd);
char video_dir[256];
sprintf(video_dir, "%s/video", policy_base);
char mkdir_cmd[512];
snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
system(mkdir_cmd);
Comment on lines +354 to +366
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

policy_base and map are derived via *strrchr(..., '.') = '\0' without checking that strrchr returned non-NULL. If policy_name or map_name lacks an extension, this will dereference NULL and crash. Also, the strcpy/sprintf calls here can overflow the fixed 256-byte buffers with long paths. Please add NULL checks and use bounded copies/formatting (e.g., snprintf) to avoid crashes/overflows.

Copilot uses AI. Check for mistakes.
Comment on lines +362 to +366
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

system("mkdir -p ...") is run using policy_base derived from the user-supplied --policy-name path. This introduces command-injection risk (e.g., quotes/metacharacters in the path) and is now executed even when --output-* paths are provided. Prefer creating the directory via mkdir(2)/filesystem APIs (and only when you actually need the auto-generated output paths).

Copilot uses AI. Check for mistakes.

if (output_topdown != NULL) {
strcpy(filename_topdown, output_topdown);
} else {
sprintf(filename_topdown, "%s/video/%s_topdown.mp4", policy_base, map);
}

if (output_agent != NULL) {
strcpy(filename_agent, output_agent);
} else {
sprintf(filename_agent, "%s/video/%s_agent.mp4", policy_base, map);
}

Expand Down Expand Up @@ -555,6 +560,6 @@ int main(int argc, char *argv[]) {
}

eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
output_agent, num_maps, zoom_in);
output_agent, num_maps, zoom_in, ini_file);
return 0;
}
Loading
Loading