Emerge-Lab · eugenevinitsky · Apr 6, 2026 · Apr 6, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -169,7 +169,7 @@ pufferlib/resources/drive/binaries/
 pufferlib/resources/drive/binaries/training/
 pufferlib/resources/drive/binaries/validation/
 
-# But keep map_000.bin for the training test
+# Keep map_000.bin for the training test
 !pufferlib/resources/drive/binaries/map_000.bin
 !pufferlib/resources/drive/binaries/training/map_000.bin
 pufferlib/resources/drive/sanity/sanity_binaries/
@@ -178,6 +178,13 @@ pufferlib/resources/drive/sanity/sanity_binaries/
 !pufferlib/resources/drive/binaries/carla/**
 !pufferlib/resources/drive/binaries/carla_2D/**
 
+# Keep driving behaviour eval scenario binaries
+!pufferlib/resources/drive/binaries/dense_traffic/**
+!pufferlib/resources/drive/binaries/lane_change/**
+!pufferlib/resources/drive/binaries/obstacles/**
+!pufferlib/resources/drive/binaries/vru_interaction/**
+!pufferlib/resources/drive/binaries/lead_vehicle_interaction/**
+
 # Compiled drive binary in root
 /drive
 /visualize

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -228,6 +228,10 @@ human_replay_eval = False
 human_replay_control_mode = "control_sdc_only"
 ; Number of scenarios for human replay evaluation equals the number of agents
 human_replay_num_agents = 16
+; Evaluating different driving behaviours learned by the policy
+driving_behaviours_eval = True
+driving_behaviours_eval_config = "pufferlib/config/ocean/driving_behaviours_eval.ini"
+driving_behaviours_eval_interval = 250
 
 [safe_eval]
 ; If True, periodically run policy with safe/law-abiding reward conditioning and log metrics
@@ -273,15 +277,15 @@ acc = 1.0
 [render]
 ; Mode to render a bunch of maps with a given policy
 ; Path to dataset used for rendering
-map_dir = "resources/drive/binaries/training"
+map_dir = "resources/drive/binaries/interactive_data_validation"
 ; Directory to output rendered videos
-output_dir = "resources/drive/render_videos"
-; Evaluation will run on the first num_maps maps in the map_dir directory
-num_maps = 100
+output_dir = "resources/drive/human_replay_videos"
+; Evaluation will run on the first num_maps maps in the map_dir directory. Use "auto" to render all maps in map_dir.
+num_maps = auto
 ; "both", "topdown", "agent"; Other args are passed from train confs
-view_mode = "both"
+view_mode = "topdown"
 ; Policy bin file used for rendering videos
-policy_path = "resources/drive/puffer_drive_weights_resampling_300.bin"
+policy_path = "resources/drive/best_policy_with_reward_conditioning.bin"
 ; Allows more than cpu cores workers for rendering
 overwork = True
 ; If True, show exactly what the agent sees in agent observation
@@ -291,9 +295,14 @@ show_grid = True
 ; Draws lines from ego agent observed ORUs and road elements to show detection range
 show_lasers = True
 ; Display human xy logs in the background
-show_human_logs = False
+show_human_logs = True
 ; If True, zoom in on a part of the map. Otherwise, show full map
 zoom_in = True
+; If True, render in log-replay mode: only the SDC is policy-controlled,
+; all other agents follow their ground-truth expert trajectories
+human_replay_render = True
+; Control mode used when human_replay_render = True
+human_replay_control_mode = "control_sdc_only"
 
 [sweep.train.learning_rate]
 distribution = log_normal

diff --git a/pufferlib/config/ocean/driving_behaviours_eval.ini b/pufferlib/config/ocean/driving_behaviours_eval.ini
@@ -0,0 +1,53 @@
+; Configuration for driving behaviour evaluation maps and rewards.
+; Currently evaluates 5 broad driving behaviours: lead vehicle interaction, lane change, dense traffic, obstacles, vulnerable road user interactions (VRUs).
+; Currently uses safe reward conditioning values for evaluation
+
+[eval_lead_vehicle_interaction]
+map_dir = "resources/drive/binaries/lead_vehicle_interaction"
+human_replay_eval = True
+render_eval = True
+
+[eval_lane_change]
+map_dir = "resources/drive/binaries/lane_change"
+human_replay_eval = True
+render_eval = True
+
+[eval_dense_traffic]
+map_dir = "resources/drive/binaries/dense_traffic"
+human_replay_eval = True
+render_eval = True
+
+[eval_obstacles]
+map_dir = "resources/drive/binaries/obstacles"
+human_replay_eval = True
+render_eval = True
+
+[eval_vru_interaction]
+map_dir = "resources/drive/binaries/vru_interaction"
+human_replay_eval = True
+render_eval = True
+
+[eval_driving_rewards]
+; Reward conditioning values (min=max to fix the value).
+; Names match the env reward_bound_* keys.
+; High penalties for unsafe behavior
+collision = -0.5
+offroad = -0.5
+overspeed = -1.0
+traffic_light = -1.0
+reverse = -0.0075
+comfort = -0.1
+
+; Standard driving rewards
+goal_radius = 8.0
+lane_align = 0.0025
+lane_center = -0.00075
+velocity = 0.005
+center_bias = 0.0
+vel_align = 1.0
+timestep = -0.00005
+
+; Neutral scaling factors
+throttle = 1.0
+steer = 1.0
+acc = 1.0
diff --git a/pufferlib/ocean/benchmark/evaluator.py b/pufferlib/ocean/benchmark/evaluator.py
@@ -964,3 +964,126 @@ def log_stats(self, global_step=None):
         if global_step is not None:
             payload["train_step"] = global_step
         self.logger.wandb.log(payload)
+
+
+class DrivingBehavioursEvaluator:
+    """Evaluates a policy on the 5 driving behaviour classes using live in-process weights."""
+
+    # Sections in driving_behaviours_eval.ini that describe scenario classes
+    EVAL_SECTIONS_PREFIX = "eval_"
+    REWARD_SECTION = "eval_driving_rewards"
+
+    def __init__(self, env_name: str, behaviours_config: Dict, device="cuda", logger=None):
+        self.env_name = env_name
+        self.behaviours_config = behaviours_config
+        if isinstance(device, int):
+            device = f"cuda:{device}"
+        self.device = device
+        self.logger = logger
+        self.reward_config = behaviours_config.get(self.REWARD_SECTION, {})
+        self.classes = [
+            (name, cfg)
+            for name, cfg in behaviours_config.items()
+            if name.startswith(self.EVAL_SECTIONS_PREFIX) and name != self.REWARD_SECTION
+        ]
+
+    def _build_class_env_config(self, class_cfg: Dict) -> Dict:
+        """Build env config for one scenario class with fixed reward conditioning."""
+        import re
+        import sys
+        from pufferlib.pufferl import load_config
+
+        original_argv = sys.argv
+        sys.argv = ["pufferl"]
+        try:
+            eval_config = load_config(self.env_name)
+        finally:
+            sys.argv = original_argv
+
+        eval_config["vec"] = dict(backend="PufferEnv", num_envs=1)
+        eval_config["train"]["device"] = self.device
+        eval_config["env"]["control_mode"] = "control_sdc_only"
+        eval_config["env"]["init_mode"] = "create_all_valid"
+        eval_config["env"]["episode_length"] = 91
+        eval_config["env"]["resample_frequency"] = 0
+
+        map_dir = class_cfg.get("map_dir", "")
+        if isinstance(map_dir, str):
+            map_dir = map_dir.strip('"')
+        eval_config["env"]["map_dir"] = map_dir
+        # Set num_maps to the number of available bins so we cover all scenarios
+        available_maps = len([f for f in os.listdir(map_dir) if f.endswith(".bin")]) if os.path.isdir(map_dir) else 1
+        eval_config["env"]["num_maps"] = available_maps
+
+        # Discover valid reward bound names
+        valid_bounds = set()
+        for key in eval_config["env"]:
+            m = re.match(r"reward_bound_(.+)_min$", key)
+            if m:
+                valid_bounds.add(m.group(1))
+
+        # Fix reward conditioning to eval_driving_rewards values
+        for key, val in self.reward_config.items():
+            if key not in valid_bounds:
+                continue
+            eval_config["env"][f"reward_bound_{key}_min"] = float(val)
+            eval_config["env"][f"reward_bound_{key}_max"] = float(val)
+
+        return eval_config
+
+    def evaluate_class(self, class_cfg: Dict, policy) -> Dict:
+        """Run human-replay rollouts on all maps in the class map_dir and return averaged metrics."""
+        from collections import defaultdict
+        from pufferlib.pufferl import load_env
+
+        print(f"Evaluating class")
+
+        eval_config = self._build_class_env_config(class_cfg)
+        num_maps = eval_config["env"]["num_maps"]
+        print(f"Built eval config for class with map_dir: {eval_config['env']['map_dir']}")
+
+        vecenv = load_env(self.env_name, eval_config)
+        print(f"Loaded vecenv")
+        policy.eval()
+        print(f"Set policy to eval mode")
+        rollout_evaluator = HumanReplayEvaluator(eval_config)
+        all_stats = defaultdict(list)
+        print(f"Starting rollouts for class with {num_maps} maps")
+        try:
+            for _ in range(num_maps):
+                result = rollout_evaluator.rollout(eval_config, vecenv, policy) or {}
+                for k, v in result.items():
+                    try:
+                        all_stats[k].append(float(v))
+                    except (TypeError, ValueError):
+                        pass
+                # Reset for next map
+                vecenv.reset()
+        finally:
+            vecenv.close()
+            import gc
+
+            gc.collect()
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+        return {k: float(np.mean(v)) for k, v in all_stats.items() if v}
+
+    def log_stats(self, all_results: Dict[str, Dict], global_step=None):
+        """Log per-class metrics to wandb under driving_behaviours/<class>/<metric>."""
+        if not (self.logger and hasattr(self.logger, "wandb") and self.logger.wandb):
+            return
+        payload = {}
+        for class_name, metrics in all_results.items():
+            short = class_name[len(self.EVAL_SECTIONS_PREFIX) :]
+            for k, v in metrics.items():
+                try:
+                    payload[f"driving_behaviours/{short}/{k}"] = float(v)
+                except (TypeError, ValueError):
+                    pass
+        if global_step is not None:
+            payload["train_step"] = global_step
+        if payload:
+            self.logger.wandb.log(payload)
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
@@ -595,7 +595,7 @@ static void generate_reward_coefs(Drive *env, Agent *agent) {
         agent->reward_coefs[REWARD_COEF_REVERSE] = random_uniform(env->reward_bounds[REWARD_COEF_REVERSE].min_val,
                                                                   env->reward_bounds[REWARD_COEF_REVERSE].max_val);
         // Fixed values (Must fall within the bounds defined above)
-        agent->reward_coefs[REWARD_COEF_VELOCITY] = 2.5e-3f;
+        agent->reward_coefs[REWARD_COEF_VELOCITY] = 2.5e-2f;
         agent->reward_coefs[REWARD_COEF_TIMESTEP] = -2.5e-5f;
         // Dynamic conditioning (Mixed Uniform)
         agent->reward_coefs[REWARD_COEF_THROTTLE] = mixed_uniform(1.25f);
@@ -2152,7 +2152,9 @@ void set_active_agents(Drive *env) {
             static_agent_indices[env->static_agent_count] = i;
             env->static_agent_count++;
             env->agents[i].active_agent = 0;
-            if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) {
+
+            if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 ||
+                env->active_agent_count == env->num_agents) {
                 expert_static_agent_indices[env->expert_static_agent_count] = i;
                 env->expert_static_agent_count++;
                 env->agents[i].mark_as_expert = 1;

diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
@@ -1023,6 +1023,17 @@ def process_all_maps(
             if not success:
                 print(f"  {name}: {error}")
 
+    # Write manifest.json mapping each bin to its source JSON
+    manifest = {}
+    for i, map_path, binary_path, *_ in tasks:
+        _, _, success, _ = results[i]
+        if success:
+            manifest[f"map_{i:03d}.bin"] = map_path.name
+    manifest_path = binary_dir / "manifest.json"
+    with open(manifest_path, "w") as f:
+        json.dump(manifest, f, indent=2)
+    print(f"Wrote manifest to {manifest_path} ({len(manifest)} entries)")
+
 
 def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
     import time

diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
@@ -193,11 +193,12 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett
 
 int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
              int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
-             const char *output_agent, int num_maps, int zoom_in) {
+             const char *output_agent, int num_maps, int zoom_in, const char *ini_file) {
 
     // Parse configuration from INI file
     env_init_config conf = {0};
-    const char *ini_file = "pufferlib/config/ocean/drive.ini";
+    if (ini_file == NULL)
+        ini_file = "pufferlib/config/ocean/drive.ini";
     if (ini_parse(ini_file, handler, &conf) < 0) {
         fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file);
         return -1;
@@ -350,25 +351,29 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     char filename_topdown[256];
     char filename_agent[256];
 
-    if (output_topdown != NULL && output_agent != NULL) {
-        strcpy(filename_topdown, output_topdown);
-        strcpy(filename_agent, output_agent);
-    } else {
-        char policy_base[256];
-        strcpy(policy_base, policy_name);
-        *strrchr(policy_base, '.') = '\0';
+    char policy_base[256];
+    strcpy(policy_base, policy_name);
+    *strrchr(policy_base, '.') = '\0';
 
-        char map[256];
-        strcpy(map, basename((char *)map_name));
-        *strrchr(map, '.') = '\0';
+    char map[256];
+    strcpy(map, basename((char *)map_name));
+    *strrchr(map, '.') = '\0';
 
-        char video_dir[256];
-        sprintf(video_dir, "%s/video", policy_base);
-        char mkdir_cmd[512];
-        snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
-        system(mkdir_cmd);
+    char video_dir[256];
+    sprintf(video_dir, "%s/video", policy_base);
+    char mkdir_cmd[512];
+    snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
+    system(mkdir_cmd);
 
+    if (output_topdown != NULL) {
+        strcpy(filename_topdown, output_topdown);
+    } else {
         sprintf(filename_topdown, "%s/video/%s_topdown.mp4", policy_base, map);
+    }
+
+    if (output_agent != NULL) {
+        strcpy(filename_agent, output_agent);
+    } else {
         sprintf(filename_agent, "%s/video/%s_agent.mp4", policy_base, map);
     }
 
@@ -555,6 +560,6 @@ int main(int argc, char *argv[]) {
     }
 
     eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
-             output_agent, num_maps, zoom_in);
+             output_agent, num_maps, zoom_in, ini_file);
     return 0;
 }