diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini
index 0990fdb13..3b7984f75 100644
--- a/pufferlib/config/default.ini
+++ b/pufferlib/config/default.ini
@@ -29,18 +29,18 @@ optimizer = muon
 anneal_lr = True
 precision = float32
 total_timesteps = 10_000_000
-learning_rate = 0.015
-gamma = 0.995
+learning_rate = 0.003
+gamma = 0.9650881439471051
 gae_lambda = 0.90
 update_epochs = 1
-clip_coef = 0.2
+clip_coef = 0.24983563859666713
 vf_coef = 2.0
 vf_clip_coef = 0.2
 max_grad_norm = 1.5
-ent_coef = 0.001
-adam_beta1 = 0.95
-adam_beta2 = 0.999
-adam_eps = 1e-12
+ent_coef = 0.013899446116244659
+adam_beta1 = 0.9739575783018397
+adam_beta2 = 0.9983789918385146
+adam_eps = 2.112852785895044e-07
 
 data_dir = experiments
 checkpoint_interval = 200
@@ -74,23 +74,23 @@ downsample = 10
 #scale = auto
 
 # TODO: Elim from base
-[sweep.train.total_timesteps]
-distribution = log_normal
-min = 5e7
-max = 1e10
-mean = 1e8
-scale = time
-
-[sweep.train.bptt_horizon]
-distribution = uniform_pow2
-min = 16
-max = 64
-mean = 64
-scale = auto
+#[sweep.train.total_timesteps]
+#distribution = log_normal
+#min = 5e7
+#max = 1e10
+#mean = 1e8
+#scale = time
+
+#[sweep.train.bptt_horizon]
+#distribution = uniform_pow2
+#min = 16
+#max = 64
+#mean = 64
+#scale = auto
 
 [sweep.train.minibatch_size]
 distribution = uniform_pow2
-min = 8192
+min = 16384
 max = 65536
 mean = 32768
 scale = auto
@@ -98,28 +98,28 @@ scale = auto
 [sweep.train.learning_rate]
 distribution = log_normal
 min = 0.00001
-mean = 0.01
-max = 0.1
+mean = 0.00986346440817611
+max = 0.05
 scale = 0.5
 
 [sweep.train.ent_coef]
 distribution = log_normal
 min = 0.00001
-mean = 0.01
+mean = 0.01510492391719301
 max = 0.2
 scale = auto
 
 [sweep.train.gamma]
 distribution = logit_normal
 min = 0.8
-mean = 0.98
+mean = 0.9785788872888401
 max = 0.9999
 scale = auto
 
 [sweep.train.gae_lambda]
 distribution = logit_normal
 min = 0.6
-mean = 0.95
+mean = 0.9909980359361995
 max = 0.995
 scale = auto
 
@@ -127,14 +127,14 @@ scale = auto
 distribution = uniform
 min = 0.0
 max = 5.0
-mean = 1.0
+mean = 4.561253323222173
 scale = auto
 
 [sweep.train.vtrace_c_clip]
 distribution = uniform
 min = 0.0
 max = 5.0
-mean = 1.0
+mean = 1.0511015002528337
 scale = auto
 
 #[sweep.train.update_epochs]
@@ -148,7 +148,7 @@ scale = auto
 distribution = uniform
 min = 0.01
 max = 1.0
-mean = 0.2
+mean = 0.38615719521473363
 scale = auto
 
 # Optimal vf clip can be lower than 0.1,
@@ -157,54 +157,54 @@ scale = auto
 distribution = uniform
 min = 0.1
 max = 5.0
-mean = 0.2
+mean = 0.101
 scale = auto
 
 [sweep.train.vf_coef]
 distribution = uniform
 min = 0.0
 max = 5.0
-mean = 2.0
+mean = 4.999
 scale = auto
 
 [sweep.train.max_grad_norm]
 distribution = uniform
 min = 0.0
-mean = 1.0
+mean = 2.3757898890403624
 max = 5.0
 scale = auto
 
 [sweep.train.adam_beta1]
 distribution = logit_normal
 min = 0.5
-mean = 0.9
+mean = 0.9227290229121611
 max = 0.999
 scale = auto
 
 [sweep.train.adam_beta2]
 distribution = logit_normal
 min = 0.9
-mean = 0.999
+mean = 0.9615271430090461
 max = 0.99999
 scale = auto
 
 [sweep.train.adam_eps]
 distribution = log_normal
 min = 1e-14
-mean = 1e-8
+mean = 5.941675291037148e-05
 max = 1e-4
 scale = auto
 
 [sweep.train.prio_alpha]
 distribution = logit_normal
 min = 0.1
-mean = 0.85
-max = 0.99
+mean = 0.9698884328392982
+max = 0.999
 scale = auto
 
 [sweep.train.prio_beta0]
 distribution = logit_normal
 min = 0.1
-mean = 0.85
-max = 0.99
+mean = 0.9876650505878536
+max = 0.999
 scale = auto
diff --git a/pufferlib/config/ocean/drone_delivery.ini b/pufferlib/config/ocean/drone_delivery.ini
new file mode 100644
index 000000000..9ffa3006a
--- /dev/null
+++ b/pufferlib/config/ocean/drone_delivery.ini
@@ -0,0 +1,196 @@
+[base]
+package = ocean
+env_name = puffer_drone_delivery
+policy_name = Policy
+rnn_name = Recurrent
+
+[policy]
+hidden_size = 256
+
+[rnn]
+input_size = 256
+hidden_size = 256
+
+[vec]
+num_envs = 16
+
+[env]
+num_envs = 16
+num_drones = 64
+
+ablation = 0.0
+anneal_min = 0.9236471304397496
+
+grip_k_max = 5.123485320457474
+
+perfect_anneal = 275_000_000
+perfect_deadline = 6.077482208019486e+07
+
+pos_const = 0.8113024567778199
+pos_penalty = 0.02588602599777176
+
+reward_grip = 0.9760442313179252
+reward_ho_drop = 0.5
+reward_hover = 0.0
+
+reward_max_dist = 65.0
+reward_min_dist = 2.531838566923265
+
+vel_penalty_clamp = 0.056085060041244056
+
+w_approach = 1.5767291682972426
+w_position = 1.0188128215656589
+w_stability = 0.012739532721609687
+w_velocity = 0.11573303880718092
+
+[train]
+adam_beta1 = 0.9227290229121611
+adam_beta2 = 0.9615271430090461
+adam_eps = 5.941675291037148e-05
+anneal_lr = true
+batch_size = auto
+bptt_horizon = 64
+checkpoint_interval = 200
+clip_coef = 0.38615719521473363
+ent_coef = 0.01510492391719301
+gae_lambda = 0.9909980359361995
+gamma = 0.9785788872888401
+learning_rate = 0.00986346440817611
+#learning_rate = 0.005
+max_grad_norm = 2.3757898890403624
+max_minibatch_size = 65536
+minibatch_size = 32768 #32768
+prio_alpha = 0.9698884328392982
+prio_beta0 = 0.9876650505878536
+total_timesteps = 300_000_000
+update_epochs = 1
+#use_rnn = false
+vf_clip_coef = 0.101
+vf_coef = 4.999
+vtrace_c_clip = 1.0511015002528337
+vtrace_rho_clip = 4.561253323222173
+
+[sweep]
+method = Protein
+metric = perfect_deliv
+goal = maximize
+downsample = 0
+
+[sweep.env.ablation]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 0.0
+scale = auto
+
+[sweep.env.anneal_min]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 0.9236471304397496
+scale = auto
+
+[sweep.env.grip_k_max]
+distribution = uniform
+min = 5.0
+max = 20.0
+mean = 5.123485320457474
+scale = auto
+
+[sweep.env.perfect_anneal]
+distribution = uniform
+min =  200_000_000
+max =  275_000_000
+mean = 275_000_000
+scale = auto
+
+#[sweep.env.perfect_anneal_start]
+#distribution = uniform
+#min =  150_000_000
+#max =  200_000_000
+#mean = 175_000_000
+#scale = auto
+
+[sweep.env.perfect_deadline]
+distribution = uniform
+min =   50_000_000
+max =  150_000_000
+mean =  6.077482208019486e+07
+scale = auto
+
+[sweep.env.pos_const]
+distribution = uniform
+min = 0.001
+max = 1.0
+mean = 0.8113024567778199
+scale = auto
+
+[sweep.env.pos_penalty]
+distribution = uniform
+min = 0.0
+max = 0.25
+mean = 0.02588602599777176
+scale = auto
+
+[sweep.env.reward_grip]
+distribution = uniform
+min = 0.0
+max = 1.0
+mean = 0.9760442313179252
+scale = auto
+
+[sweep.env.reward_ho_drop]
+distribution = uniform
+min = 0.1
+max = 0.5
+mean = 0.5
+scale = auto
+
+[sweep.env.reward_hover]
+distribution = uniform
+min = 0.0
+max = 0.5
+mean = 0.01
+scale = auto
+
+[sweep.env.reward_min_dist]
+distribution = uniform
+min = 0.1
+max = 5.0
+mean = 2.531838566923265
+scale = auto
+
+[sweep.env.vel_penalty_clamp]
+distribution = uniform
+min = 0.0
+max = 0.5
+mean = 0.056085060041244056
+scale = auto
+
+[sweep.env.w_approach]
+distribution = uniform
+min = 0.0
+max = 2.5
+mean = 1.5767291682972426
+scale = auto
+
+[sweep.env.w_position]
+distribution = uniform
+min = 0.0
+max = 1.5
+mean = 1.0188128215656589
+scale = auto
+
+[sweep.env.w_stability]
+distribution = uniform
+min = 0.0
+max = 2.5
+mean = 0.012739532721609687
+scale = auto
+
+[sweep.env.w_velocity]
+distribution = uniform
+min = 0.0
+max = 1.5
+mean = 0.11573303880718092
+scale = auto
diff --git a/pufferlib/config/ocean/drone_swarm.ini b/pufferlib/config/ocean/drone_swarm.ini
index 59915325a..fd1ef6dcc 100644
--- a/pufferlib/config/ocean/drone_swarm.ini
+++ b/pufferlib/config/ocean/drone_swarm.ini
@@ -49,9 +49,9 @@ vtrace_rho_clip = 3.8183814893708057
 [sweep]
 downsample = 0
 
-[sweep.train.total_timesteps]
-distribution = log_normal
-min = 2e8
-max = 4e8
-mean = 2e8
-scale = time
+#[sweep.train.total_timesteps]
+#distribution = log_normal
+#min = 2e8
+#max = 4e8
+#mean = 2e8
+#scale = time
diff --git a/pufferlib/models.py b/pufferlib/models.py
index fa43d7071..7ec62d037 100644
--- a/pufferlib/models.py
+++ b/pufferlib/models.py
@@ -43,7 +43,7 @@ def __init__(self, env, hidden_size=128):
                 pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)),
                 nn.GELU(),
             )
-            
+
         if self.is_multidiscrete:
             self.action_nvec = tuple(env.single_action_space.nvec)
             num_atns = sum(self.action_nvec)
@@ -56,8 +56,9 @@ def __init__(self, env, hidden_size=128):
         else:
             self.decoder_mean = pufferlib.pytorch.layer_init(
                 nn.Linear(hidden_size, env.single_action_space.shape[0]), std=0.01)
-            self.decoder_logstd = nn.Parameter(torch.zeros(
-                1, env.single_action_space.shape[0]))
+            # Initialize logstd to -0.5 (std ~0.6) for more conservative exploration
+            self.decoder_logstd = nn.Parameter(torch.ones(
+                1, env.single_action_space.shape[0]) * -0.5)
 
         self.value = pufferlib.pytorch.layer_init(
             nn.Linear(hidden_size, 1), std=1)
@@ -77,24 +78,90 @@ def encode_observations(self, observations, state=None):
         if self.is_dict_obs:
             observations = pufferlib.pytorch.nativize_tensor(observations, self.dtype)
             observations = torch.cat([v.view(batch_size, -1) for v in observations.values()], dim=1)
-        else: 
+        else:
             observations = observations.view(batch_size, -1)
+
+        # Check for NaN in observations and replace with zeros
+        if torch.isnan(observations).any():
+            #print(f"Warning: NaN detected in observations, replacing with zeros")
+            observations = torch.nan_to_num(observations, nan=0.0)
+
+        # Clamp observations to prevent extreme values
+        observations = torch.clamp(observations, min=-100.0, max=100.0)
+
         return self.encoder(observations.float())
 
+    def check_and_fix_weights(self):
+        '''Check for NaN in model weights and reinitialize if needed'''
+        nan_found = False
+        for name, param in self.named_parameters():
+            if torch.isnan(param).any():
+                #print(f"Warning: NaN found in weight {name}, reinitializing")
+                nan_found = True
+                # Reinitialize the parameter
+                if 'decoder_logstd' in name:
+                    param.data.fill_(-0.5)
+                elif 'bias' in name:
+                    param.data.zero_()
+                else:
+                    # Use Xavier initialization for weights
+                    if param.dim() >= 2:
+                        torch.nn.init.xavier_uniform_(param)
+                    else:
+                        param.data.normal_(0, 0.01)
+        return nan_found
+
     def decode_actions(self, hidden):
         '''Decodes a batch of hidden states into (multi)discrete actions.
         Assumes no time dimension (handled by LSTM wrappers).'''
+        # Check for NaN in hidden states first
+        if torch.isnan(hidden).any():
+            #print(f"Warning: NaN in hidden states before decode, replacing with zeros")
+            hidden = torch.nan_to_num(hidden, nan=0.0)
+            # Also check weights when NaN is detected
+            self.check_and_fix_weights()
+
         if self.is_multidiscrete:
             logits = self.decoder(hidden).split(self.action_nvec, dim=1)
         elif self.is_continuous:
             mean = self.decoder_mean(hidden)
+
+            # More aggressive clamping
+            mean = torch.clamp(mean, min=-5.0, max=5.0)
+
+            # Check for NaN before continuing
+            if torch.isnan(mean).any():
+                #print(f"Warning: NaN in mean after decoder, checking weights")
+                self.check_and_fix_weights()
+                # Retry with fixed weights
+                mean = self.decoder_mean(hidden)
+                mean = torch.clamp(mean, min=-5.0, max=5.0)
+
+            # Clamp logstd more conservatively
             logstd = self.decoder_logstd.expand_as(mean)
+            logstd = torch.clamp(logstd, min=-5.0, max=0.0)  # Max std = 1.0
             std = torch.exp(logstd)
+
+            # Add small epsilon to std to ensure numerical stability
+            std = std + 1e-6
+
+            # Final NaN check and replace with safe values
+            if torch.isnan(mean).any() or torch.isnan(std).any():
+                #print(f"Warning: NaN detected in policy - mean: {torch.isnan(mean).any()}, std: {torch.isnan(std).any()}")
+                mean = torch.nan_to_num(mean, nan=0.0)
+                std = torch.nan_to_num(std, nan=0.5)
+
             logits = torch.distributions.Normal(mean, std)
         else:
             logits = self.decoder(hidden)
 
         values = self.value(hidden)
+
+        # Check values for NaN
+        if torch.isnan(values).any():
+            #print(f"Warning: NaN in value output, replacing with zeros")
+            values = torch.nan_to_num(values, nan=0.0)
+
         return logits, values
 
 class LSTMWrapper(nn.Module):
@@ -186,7 +253,16 @@ def forward(self, observations, state):
         #hidden = self.pre_layernorm(hidden)
         hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state)
         hidden = hidden.float()
- 
+
+        # Check for NaN after LSTM and replace with zeros
+        if torch.isnan(hidden).any():
+            #print(f"Warning: NaN detected after LSTM, replacing with zeros")
+            hidden = torch.nan_to_num(hidden, nan=0.0)
+        if torch.isnan(lstm_h).any() or torch.isnan(lstm_c).any():
+            #print(f"Warning: NaN detected in LSTM state, resetting")
+            lstm_h = torch.nan_to_num(lstm_h, nan=0.0)
+            lstm_c = torch.nan_to_num(lstm_c, nan=0.0)
+
         #hidden = self.post_layernorm(hidden)
         hidden = hidden.transpose(0, 1)
 
diff --git a/pufferlib/ocean/drone_delivery/binding.c b/pufferlib/ocean/drone_delivery/binding.c
new file mode 100644
index 000000000..2c5083d9e
--- /dev/null
+++ b/pufferlib/ocean/drone_delivery/binding.c
@@ -0,0 +1,66 @@
+#include "drone_delivery.h"
+
+#define Env DroneDelivery
+#include "../env_binding.h"
+
+static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
+    env->num_agents = unpack(kwargs, "num_agents");
+
+    env->grip_k_max = unpack(kwargs, "grip_k_max");
+
+    env->ablation = unpack(kwargs, "ablation");
+    env->anneal_min = unpack(kwargs, "anneal_min");
+
+    env->num_envs = unpack(kwargs, "num_envs");
+    env->perfect_anneal = unpack(kwargs, "perfect_anneal");
+    env->perfect_deadline = unpack(kwargs, "perfect_deadline");
+
+    env->pos_const = unpack(kwargs, "pos_const");
+    env->pos_penalty = unpack(kwargs, "pos_penalty");
+
+    env->reward_grip = unpack(kwargs, "reward_grip");
+    env->reward_ho_drop = unpack(kwargs, "reward_ho_drop");
+    env->reward_hover = unpack(kwargs, "reward_hover");
+
+    env->reward_max_dist = unpack(kwargs, "reward_max_dist");
+    env->reward_min_dist = unpack(kwargs, "reward_min_dist");
+
+    env->vel_penalty_clamp = unpack(kwargs, "vel_penalty_clamp");
+
+    env->w_approach = unpack(kwargs, "w_approach");
+    env->w_position = unpack(kwargs, "w_position");
+    env->w_stability = unpack(kwargs, "w_stability");
+    env->w_velocity = unpack(kwargs, "w_velocity");
+
+    init(env);
+    return 0;
+}
+
+static int my_log(PyObject *dict, Log *log) {
+    assign_to_dict(dict, "perf", log->perf);
+    assign_to_dict(dict, "score", log->score);
+    //assign_to_dict(dict, "collision_rate", log->collision_rate);
+    //assign_to_dict(dict, "oob", log->oob);
+    //assign_to_dict(dict, "episode_return", log->episode_return);
+    //assign_to_dict(dict, "episode_length", log->episode_length);
+
+    //assign_to_dict(dict, "jitter", log->jitter);
+    assign_to_dict(dict, "perfect_grip", log->perfect_grip);
+    assign_to_dict(dict, "perfect_deliv", log->perfect_deliv);
+    //assign_to_dict(dict, "perfect_now", log->perfect_now);
+    //assign_to_dict(dict, "to_pickup", log->to_pickup);
+    //assign_to_dict(dict, "ho_pickup", log->ho_pickup);
+    //assign_to_dict(dict, "de_pickup", log->de_pickup);
+    //assign_to_dict(dict, "to_drop", log->to_drop);
+    //assign_to_dict(dict, "ho_drop", log->ho_drop);
+    //assign_to_dict(dict, "dist", log->dist);
+    //assign_to_dict(dict, "dist100", log->dist100);
+
+    assign_to_dict(dict, "episode_num", log->episode_num);
+    //assign_to_dict(dict, "tick", log->tick);
+    assign_to_dict(dict, "episode_gain", log->episode_gain);
+    assign_to_dict(dict, "anneal", log->anneal);
+
+    //assign_to_dict(dict, "n", log->n);
+    return 0;
+}
diff --git a/pufferlib/ocean/drone_delivery/drone_delivery.c b/pufferlib/ocean/drone_delivery/drone_delivery.c
new file mode 100644
index 000000000..a457d47bd
--- /dev/null
+++ b/pufferlib/ocean/drone_delivery/drone_delivery.c
@@ -0,0 +1,202 @@
+#include "drone_delivery.h"
+#include "puffernet.h"
+#include <time.h>
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+double randn(double mean, double std) {
+    static int has_spare = 0;
+    static double spare;
+
+    if (has_spare) {
+        has_spare = 0;
+        return mean + std * spare;
+    }
+
+    has_spare = 1;
+    double u, v, s;
+    do {
+        u = 2.0 * rand() / RAND_MAX - 1.0;
+        v = 2.0 * rand() / RAND_MAX - 1.0;
+        s = u * u + v * v;
+    } while (s >= 1.0 || s == 0.0);
+
+    s = sqrt(-2.0 * log(s) / s);
+    spare = v * s;
+    return mean + std * (u * s);
+}
+
+typedef struct LinearContLSTM LinearContLSTM;
+struct LinearContLSTM {
+    int num_agents;
+    float *obs;
+    float *log_std;
+    Linear *encoder;
+    GELU *gelu1;
+    LSTM *lstm;
+    Linear *actor;
+    Linear *value_fn;
+    int num_actions;
+};
+
+LinearContLSTM *make_linearcontlstm(Weights *weights, int num_agents, int input_dim,
+                                    int logit_sizes[], int num_actions) {
+    LinearContLSTM *net = calloc(1, sizeof(LinearContLSTM));
+    net->num_agents = num_agents;
+    net->obs = calloc(num_agents * input_dim, sizeof(float));
+    net->num_actions = logit_sizes[0];
+    net->log_std = weights->data;
+    weights->idx += net->num_actions;
+    net->encoder = make_linear(weights, num_agents, input_dim, 128);
+    net->gelu1 = make_gelu(num_agents, 128);
+    int atn_sum = 0;
+    for (int i = 0; i < num_actions; i++) {
+        atn_sum += logit_sizes[i];
+    }
+    net->actor = make_linear(weights, num_agents, 128, atn_sum);
+    net->value_fn = make_linear(weights, num_agents, 128, 1);
+    net->lstm = make_lstm(weights, num_agents, 128, 128);
+    return net;
+}
+
+void free_linearcontlstm(LinearContLSTM *net) {
+    free(net->obs);
+    free(net->encoder);
+    free(net->gelu1);
+    free(net->actor);
+    free(net->value_fn);
+    free(net->lstm);
+    free(net);
+}
+
+void forward_linearcontlstm(LinearContLSTM *net, float *observations, float *actions) {
+    linear(net->encoder, observations);
+    gelu(net->gelu1, net->encoder->output);
+    lstm(net->lstm, net->gelu1->output);
+    linear(net->actor, net->lstm->state_h);
+    linear(net->value_fn, net->lstm->state_h);
+    for (int i = 0; i < net->num_actions; i++) {
+        float std = expf(net->log_std[i]);
+        float mean = net->actor->output[i];
+        actions[i] = randn(mean, std);
+    }
+}
+
+void generate_dummy_actions(DroneDelivery *env) {
+    // Generate random floats in [-1, 1] range
+    env->actions[0] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+    env->actions[1] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+    env->actions[2] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+    env->actions[3] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+}
+
+#ifdef __EMSCRIPTEN__
+typedef struct {
+    DroneDelivery *env;
+    LinearContLSTM *net;
+    Weights *weights;
+} WebRenderArgs;
+
+void emscriptenStep(void *e) {
+    WebRenderArgs *args = (WebRenderArgs *)e;
+    DroneDelivery *env = args->env;
+    LinearContLSTM *net = args->net;
+
+    forward_linearcontlstm(net, env->observations, env->actions);
+    c_step(env);
+    c_render(env);
+    return;
+}
+
+WebRenderArgs *web_args = NULL;
+#endif
+
+int main() {
+    srand(42); // Seed random number generator
+
+    DroneDelivery *env = calloc(1, sizeof(DroneDelivery));
+    env->num_agents = 64;
+
+    env->grip_k_max = 17.105300970916993;
+
+    env->ablation = 0.0;
+    env->anneal_min = 0.5;
+
+    env->num_envs = 16;
+    env->perfect_anneal = 250000000;
+    env->perfect_anneal_start = 175000000;
+    env->perfect_deadline = 150000000;
+
+    env->pos_const = 0.7108647198043252;
+    env->pos_penalty = 0.0009629600280207475;
+
+    env->reward_grip = 0.9999;
+    env->reward_ho_drop = 0.20890470430909128;
+    env->reward_hover = 0.1603521816569735;
+
+    env->reward_max_dist = 65.0;
+    env->reward_min_dist = 0.9330297552248776;
+
+    env->vel_penalty_clamp = 0.25;
+
+    env->w_approach = 2.2462377881752698;
+    env->w_position = 0.7312628607193232;
+    env->w_stability = 1.6094417286807845;
+    env->w_velocity = 0.0009999999999999731;
+
+    init(env);
+
+    size_t obs_size = 45;
+    size_t act_size = 4;
+    env->observations = (float *)calloc(env->num_agents * obs_size, sizeof(float));
+    env->actions = (float *)calloc(env->num_agents * act_size, sizeof(float));
+    env->rewards = (float *)calloc(env->num_agents, sizeof(float));
+    env->terminals = (unsigned char *)calloc(env->num_agents, sizeof(float));
+
+    //Weights *weights = load_weights("resources/drone/drone_weights.bin", 136073);
+    //int logit_sizes[1] = {4};
+    //LinearContLSTM *net = make_linearcontlstm(weights, env->num_agents, obs_size, logit_sizes, 1);
+
+    if (!env->observations || !env->actions || !env->rewards) {
+        fprintf(stderr, "ERROR: Failed to allocate memory for demo buffers.\n");
+        free(env->observations);
+        free(env->actions);
+        free(env->rewards);
+        free(env->terminals);
+        free(env);
+        return 0;
+    }
+
+    init(env);
+    c_reset(env);
+
+#ifdef __EMSCRIPTEN__
+    WebRenderArgs *args = calloc(1, sizeof(WebRenderArgs));
+    args->env = env;
+    args->net = net;
+    args->weights = weights;
+    web_args = args;
+
+    emscripten_set_main_loop_arg(emscriptenStep, args, 0, true);
+#else
+    c_render(env);
+
+    while (!WindowShouldClose()) {
+        //forward_linearcontlstm(net, env->observations, env->actions);
+        c_step(env);
+        c_render(env);
+    }
+
+    c_close(env);
+    //free_linearcontlstm(net);
+    free(env->observations);
+    free(env->actions);
+    free(env->rewards);
+    free(env->terminals);
+    free(env);
+#endif
+
+    return 0;
+}
diff --git a/pufferlib/ocean/drone_delivery/drone_delivery.h b/pufferlib/ocean/drone_delivery/drone_delivery.h
new file mode 100644
index 000000000..124c75a16
--- /dev/null
+++ b/pufferlib/ocean/drone_delivery/drone_delivery.h
@@ -0,0 +1,905 @@
+// Originally made by Sam Turner and Finlay Sanders, 2025.
+// Included in pufferlib under the original project's MIT license.
+// https://github.com/stmio/drone
+
+// Keith (Kinvert) used their (above) work (drone_swarm) as a base to make drone_delivery for a hackathon 2025
+//   Thank you Sam, Finlay, and Joseph
+//   Team members for hackathon were BET Adsorption and Autark
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "raylib.h"
+#include "dronelib_delivery.h"
+
+typedef struct Client Client;
+struct Client {
+    Camera3D camera;
+    float width;
+    float height;
+
+    float camera_distance;
+    float camera_azimuth;
+    float camera_elevation;
+    bool is_dragging;
+    Vector2 last_mouse_pos;
+
+    // Trailing path buffer (for rendering only)
+    Trail* trails;
+};
+
+typedef struct {
+    float *observations;
+    float *actions;
+    float *rewards;
+    unsigned char *terminals;
+
+    float dist;
+
+    Log log;
+    int tick;
+    int report_interval;
+    bool render;
+
+    int num_agents;
+    Drone* agents;
+    int num_envs;
+
+    float ablation;
+    float anneal;
+    float anneal_min;
+    float annealed_episode;
+    float inv_annealed_episode;
+
+    float box_k;
+    float box_k_max;
+    float box_k_min;
+
+    float dist_decay;
+
+    float grip_k;
+    float grip_k_decay;
+    float grip_k_max;
+
+    float perfect_anneal;
+    float perfect_deadline;
+    float perfect_episode;
+    float inv_perfect_episode;
+
+    float pos_const;
+    float pos_penalty;
+
+    float reward_dist;
+    float inv_reward_dist;
+    float reward_grip;
+    float reward_ho_drop;
+    float reward_hover;
+
+    float reward_max_dist;
+    float reward_min_dist;
+
+    float vel_penalty_clamp;
+
+    float w_approach;
+    float w_position;
+    float w_stability;
+    float w_velocity;
+
+    int episode_num;
+    float episode_gain;
+    float episode_gain_increment;
+
+    Client *client;
+} DroneDelivery;
+
+void init(DroneDelivery *env) {
+    env->render = false;
+    env->box_k = 0.001f;
+    env->box_k_min = 0.001f;
+    env->box_k_max = 1.0f;
+    env->episode_gain = 0.0f;
+    env->agents = calloc(env->num_agents, sizeof(Drone));
+    env->log = (Log){0};
+    env->tick = 0;
+    env->episode_num = 0;
+    env->perfect_episode = env->perfect_deadline / (env->num_envs * env->num_envs * env->num_agents * HORIZON);
+    env->inv_perfect_episode = 1.0f / env->perfect_episode;
+    env->annealed_episode = (env->perfect_anneal - env->perfect_deadline) / (env->num_envs * env->num_envs * env->num_agents * HORIZON);
+    env->inv_annealed_episode = 1.0f / env->annealed_episode;
+    env->grip_k_decay = env->grip_k_max * env->inv_perfect_episode;
+    env->dist_decay = env->reward_max_dist * env->inv_perfect_episode;
+    env->anneal = 1.0f;
+
+    float ablation = env->ablation;
+    if (ablation > 0.0001f && ablation < 0.2f) {
+        env->w_approach = 0.0f;
+    } else if (ablation >= 0.2f && ablation < 0.4f) {
+        env->w_position - 0.0f;
+    }  else if (ablation >= 0.4f && ablation < 0.6f) {
+        env->w_stability - 0.0f;
+    }  else if (ablation >= 0.6f && ablation < 0.8f) {
+        env->w_velocity - 0.0f;
+    }  else if (ablation >= 0.8f) {
+        env->reward_hover - 0.0f;
+    }
+}
+
+void add_log(DroneDelivery *env, int idx, bool oob) {
+    Drone *agent = &env->agents[idx];
+    env->log.score += agent->score;
+    env->log.episode_return += agent->episode_return;
+    env->log.episode_length += agent->episode_length;
+    env->log.collision_rate += agent->collisions / (float)agent->episode_length;
+    //env->log.perf += agent->score / (float)agent->episode_length;
+    env->log.perf += agent->perfect_deliv;
+    if (oob) {
+        env->log.oob += 1.0f;
+    }
+    env->log.n += 1.0f;
+
+    env->log.episode_num += env->episode_num;
+    env->log.tick += env->tick;
+    env->log.episode_gain += env->episode_gain;
+    env->log.anneal += env->anneal;
+
+    agent->episode_length = 0;
+    agent->episode_return = 0.0f;
+}
+
+Drone* nearest_drone(DroneDelivery* env, Drone *agent) {
+    float min_dist = 999999.0f;
+    Drone *nearest = NULL;
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *other = &env->agents[i];
+        if (other == agent) {
+            continue;
+        }
+        float dx = agent->state.pos.x - other->state.pos.x;
+        float dy = agent->state.pos.y - other->state.pos.y;
+        float dz = agent->state.pos.z - other->state.pos.z;
+        float dist = sqrtf(dx*dx + dy*dy + dz*dz);
+        if (dist < min_dist) {
+            min_dist = dist;
+            nearest = other;
+        }
+    }
+    if (nearest == NULL) {
+        int x = 0;
+
+    }
+    return nearest;
+}
+
+void compute_observations(DroneDelivery *env) {
+    int idx = 0;
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+
+        Quat q_inv = quat_inverse(agent->state.quat);
+        Vec3 linear_vel_body = quat_rotate(q_inv, agent->state.vel);
+        Vec3 drone_up_world = quat_rotate(agent->state.quat, (Vec3){0.0f, 0.0f, 1.0f});
+
+        // TODO: Need abs observations now right? idk
+        // 45 after dvx dvy dvz for moving box
+        env->observations[idx++] = linear_vel_body.x * agent->params.inv_max_vel;
+        env->observations[idx++] = linear_vel_body.y * agent->params.inv_max_vel;
+        env->observations[idx++] = linear_vel_body.z * agent->params.inv_max_vel;
+        env->observations[idx++] = clampf(agent->state.vel.x, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(agent->state.vel.y, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(agent->state.vel.z, -1.0f, 1.0f);
+
+        env->observations[idx++] = clampf(agent->state.omega.x * agent->params.inv_max_omega, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(agent->state.omega.y * agent->params.inv_max_omega, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(agent->state.omega.z * agent->params.inv_max_omega, -1.0f, 1.0f);
+
+        env->observations[idx++] = drone_up_world.x;
+        env->observations[idx++] = drone_up_world.y;
+        env->observations[idx++] = drone_up_world.z;
+
+        env->observations[idx++] = agent->state.quat.w;
+        env->observations[idx++] = agent->state.quat.x;
+        env->observations[idx++] = agent->state.quat.y;
+        env->observations[idx++] = agent->state.quat.z;
+
+        env->observations[idx++] = agent->state.rpms[0] * agent->params.inv_max_rpm;
+        env->observations[idx++] = agent->state.rpms[1] * agent->params.inv_max_rpm;
+        env->observations[idx++] = agent->state.rpms[2] * agent->params.inv_max_rpm;
+        env->observations[idx++] = agent->state.rpms[3] * agent->params.inv_max_rpm;
+
+        env->observations[idx++] = agent->state.pos.x * INV_GRID_X;
+        env->observations[idx++] = agent->state.pos.y * INV_GRID_Y;
+        env->observations[idx++] = agent->state.pos.z * INV_GRID_Z;
+
+        float dx = agent->target_pos.x - agent->state.pos.x;
+        float dy = agent->target_pos.y - agent->state.pos.y;
+        float dz = agent->target_pos.z - agent->state.pos.z;
+        env->observations[idx++] = clampf(dx, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(dy, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(dz, -1.0f, 1.0f);
+        env->observations[idx++] = dx * INV_GRID_X;
+        env->observations[idx++] = dy * INV_GRID_Y;
+        env->observations[idx++] = dz * INV_GRID_Z;
+
+        env->observations[idx++] = agent->last_collision_reward;
+        env->observations[idx++] = agent->last_abs_reward;
+        env->observations[idx++] = clampf(agent->last_position_reward, -1.0f, 1.0f);
+        //env->observations[idx++] = clampf(agent->last_velocity_reward, -1.0f, 1.0f);
+        //env->observations[idx++] = clampf(agent->last_stability_reward, -1.0f, 1.0f);
+        //env->observations[idx++] = clampf(agent->last_approach_reward, -1.0f, 1.0f);
+
+        // Multiagent obs
+        Drone* nearest = nearest_drone(env, agent);
+        if (env->num_agents > 1) {
+            env->observations[idx++] = clampf(nearest->state.pos.x - agent->state.pos.x, -1.0f, 1.0f);
+            env->observations[idx++] = clampf(nearest->state.pos.y - agent->state.pos.y, -1.0f, 1.0f);
+            env->observations[idx++] = clampf(nearest->state.pos.z - agent->state.pos.z, -1.0f, 1.0f);
+        } else {
+            env->observations[idx++] = 0.0f;
+            env->observations[idx++] = 0.0f;
+            env->observations[idx++] = 0.0f;
+        }
+
+        Vec3 to_box = quat_rotate(q_inv, sub3(agent->box_pos, agent->state.pos));
+        Vec3 to_drop = quat_rotate(q_inv, sub3(agent->drop_pos, agent->state.pos));
+        env->observations[idx++] = to_box.x * INV_GRID_X;
+        env->observations[idx++] = to_box.y * INV_GRID_Y;
+        env->observations[idx++] = to_box.z * INV_GRID_Z;
+        env->observations[idx++] = to_drop.x * INV_GRID_X;
+        env->observations[idx++] = to_drop.y * INV_GRID_Y;
+        env->observations[idx++] = to_drop.z * INV_GRID_Z;
+        env->observations[idx++] = 1.0f; // tk todo remove
+        float dvx = agent->target_vel.x - agent->state.vel.x;
+        float dvy = agent->target_vel.y - agent->state.vel.y;
+        float dvz = agent->target_vel.z - agent->state.vel.z;
+        env->observations[idx++] = clampf(dvx, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(dvy, -1.0f, 1.0f);
+        env->observations[idx++] = clampf(dvz, -1.0f, 1.0f);
+    }
+}
+
+float compute_reward(DroneDelivery* env, Drone *agent, bool collision) {
+    Vec3 tgt = agent->hidden_pos;
+
+    Vec3 pos_error = {agent->state.pos.x - tgt.x, agent->state.pos.y - tgt.y, agent->state.pos.z - tgt.z};
+    float dist = sqrtf(pos_error.x * pos_error.x + pos_error.y * pos_error.y + pos_error.z * pos_error.z) + 0.00000001;
+
+    Vec3 vel_error = {agent->state.vel.x - agent->hidden_vel.x,
+                      agent->state.vel.y - agent->hidden_vel.y,
+                      agent->state.vel.z - agent->hidden_vel.z};
+    float vel_magnitude = sqrtf(vel_error.x * vel_error.x + vel_error.y * vel_error.y + vel_error.z * vel_error.z);
+
+    float angular_vel_magnitude = sqrtf(agent->state.omega.x * agent->state.omega.x +
+                                      agent->state.omega.y * agent->state.omega.y +
+                                      agent->state.omega.z * agent->state.omega.z);
+
+    float proximity_factor = clampf(1.0f - dist * env->inv_reward_dist, 0.0f, 1.0f);
+
+    agent->last_position_reward = clampf(expf(-dist / (env->reward_dist * env->pos_const)), -env->pos_penalty, 1.0f);
+
+    // slight reward for 0.05 for example, large penalty for over 0.4
+    float velocity_reward = clampf(proximity_factor * (2.0f * expf(-(vel_magnitude - 0.05f) * 10.0f) - 1.0f), -env->vel_penalty_clamp, 1.0f);
+    if (velocity_reward < 0.0f) agent->last_velocity_reward = velocity_reward * env->episode_gain;
+
+    agent->last_stability_reward = -angular_vel_magnitude * agent->params.inv_max_omega;
+
+    Vec3 to_target_unit = {0, 0, 0};
+    if (dist > 0.001f) {
+        to_target_unit.x = -pos_error.x / dist;
+        to_target_unit.y = -pos_error.y / dist;
+        to_target_unit.z = -pos_error.z / dist;
+    }
+    float approach_dot = to_target_unit.x * agent->state.vel.x +
+                        to_target_unit.y * agent->state.vel.y +
+                        to_target_unit.z * agent->state.vel.z;
+
+    float approach_weight = clampf(dist * env->inv_reward_dist, 0.0f, 1.0f); // todo
+    agent->last_approach_reward = approach_weight * clampf(approach_dot * agent->params.inv_max_vel, -0.5f, 0.5f);
+
+    float hover_bonus = 0.0f; // todo add a K
+    if (dist < env->reward_dist * 0.2f && vel_magnitude < 0.2f && agent->state.vel.z < 0.0f) {
+        hover_bonus = env->reward_hover;
+    }
+
+    float collision_penalty = 0.0f;
+    if (collision && env->num_agents > 1) {
+        Drone *nearest = nearest_drone(env, agent);
+        float dx = agent->state.pos.x - nearest->state.pos.x;
+        float dy = agent->state.pos.y - nearest->state.pos.y;
+        float dz = agent->state.pos.z - nearest->state.pos.z;
+        float min_dist = sqrtf(dx*dx + dy*dy + dz*dz);
+        if (min_dist < 1.0f) {
+            collision_penalty = -1.0f;
+            agent->collisions += 1.0f;
+        }
+    }
+
+    float total_reward = env->w_position * agent->last_position_reward +
+                        env->w_velocity * agent->last_velocity_reward +
+                        env->w_stability * agent->last_stability_reward +
+                        env->w_approach * agent->last_approach_reward +
+                        hover_bonus +
+                        collision_penalty;
+
+    total_reward = clampf(total_reward, -1.0f, 1.0f);
+
+    float delta_reward = total_reward - agent->last_abs_reward;
+
+    agent->last_collision_reward = collision_penalty;
+    agent->last_abs_reward = total_reward;
+    agent->episode_length++;
+    agent->score += total_reward;
+    env->dist = dist * dist;
+    agent->jitter = 10.0f - (dist + vel_magnitude + angular_vel_magnitude);
+
+    return delta_reward * env->anneal;
+}
+
+void reset_delivery(DroneDelivery* env, Drone *agent, int idx) {
+    agent->box_pos = (Vec3){rndf(-MARGIN_X, MARGIN_X), rndf(-MARGIN_Y, MARGIN_Y), rndf(-GRID_Z + 0.5f, -GRID_Z + 3.0f)};
+    agent->drop_pos = (Vec3){rndf(-MARGIN_X, MARGIN_X), rndf(-MARGIN_Y, MARGIN_Y), -GRID_Z + 0.5f};
+    agent->box_vel = (Vec3){0.0f, 0.0f, 0.0f};
+    agent->box_vel.x = agent->box_pos.x > 0.0f ? rndf(-0.2f, 0.0f) : rndf(0.0f, 0.2f);
+    agent->box_vel.y = agent->box_pos.y > 0.0f ? rndf(-0.2f, 0.0f) : rndf(0.0f, 0.2f);
+    agent->gripping = false;
+    agent->delivered = false;
+    agent->grip_height = 0.0f;
+    agent->approaching_pickup = false;
+    agent->hovering_pickup = false;
+    agent->descent_pickup = false;
+    agent->approaching_drop = false;
+    agent->hovering_drop = false;
+    agent->descent_drop = false;
+    agent->hover_timer = 0.0f;
+    agent->target_pos = agent->box_pos;
+    agent->target_vel = agent->box_vel;
+    agent->hidden_pos = agent->target_pos;
+    agent->hidden_pos.z += 1.0f;
+    agent->hidden_vel = agent->box_vel;
+
+    float drone_capacity = agent->params.arm_len * 4.0f;
+    agent->box_size = rndf(0.3f, fmaxf(fminf(drone_capacity, 1.0f), 0.3f));
+
+    float box_volume = agent->box_size * agent->box_size * agent->box_size;
+    agent->box_base_mass = fminf(BASE_BOX_DENSITY * box_volume * rndf(0.05f, 2.0f), agent->box_mass_max);
+    agent->box_mass = env->box_k * agent->box_base_mass;
+
+    agent->base_mass = agent->params.mass;
+    agent->base_ixx = agent->params.ixx;
+    agent->base_iyy = agent->params.iyy;
+    agent->base_izz = agent->params.izz;
+    agent->base_k_drag = agent->params.k_drag;
+    agent->base_b_drag = agent->params.b_drag;
+}
+
+void reset_agent(DroneDelivery* env, Drone *agent, int idx) {
+    agent->episode_return = 0.0f;
+    agent->episode_length = 0;
+    agent->collisions = 0.0f;
+    agent->score = 0.0f;
+    agent->perfect_grip = false;
+    agent->perfect_deliveries = 0.0f;
+    agent->perfect_deliv = false;
+    agent->perfect_now = false;
+    agent->has_delivered = false;
+    agent->jitter = 100.0f;
+    agent->box_physics_on = false;
+
+    //float size = 0.2f;
+    //init_drone(agent, size, 0.0f);
+    float size = rndf(0.3f, 1.0);
+    init_drone(agent, size, 0.25f);
+    agent->color = (Color){255, 0, 0, 255};
+
+    agent->state.pos = (Vec3){
+        rndf(-MARGIN_X, MARGIN_X),
+        rndf(-MARGIN_Y, MARGIN_Y),
+        rndf(-MARGIN_Z, MARGIN_Z)
+    };
+    agent->prev_pos = agent->state.pos;
+    agent->spawn_pos = agent->state.pos;
+
+    reset_delivery(env, agent, idx);
+
+    bool watch_for_collisions = true;
+    compute_reward(env, agent, watch_for_collisions);
+}
+
+void random_bump(Drone* agent) {
+    agent->state.vel.x += rndf(-0.1f, 0.1f);
+    agent->state.vel.y += rndf(-0.1f, 0.1f);
+    agent->state.vel.z += rndf(0.05f, 0.3f);
+    agent->state.omega.x += rndf(-0.5f, 0.5f);
+    agent->state.omega.y += rndf(-0.5f, 0.5f);
+    agent->state.omega.z += rndf(-0.5f, 0.5f);
+
+}
+
+void update_gripping_physics(Drone* agent) {
+    if (agent->gripping) {
+        agent->params.mass = agent->base_mass + agent->box_mass * rndf(0.9f, 1.1f);
+
+        float grip_dist = agent->box_size * 0.5f;
+        float added_inertia = agent->box_mass * grip_dist * grip_dist * rndf(0.8f, 1.2f);
+        agent->params.ixx = agent->base_ixx + added_inertia;
+        agent->params.iyy = agent->base_iyy + added_inertia;
+        agent->params.izz = agent->base_izz + added_inertia * 0.5f;
+
+        float drag_multiplier = 1.0f + (agent->box_size / agent->params.arm_len) * rndf(0.5f, 1.0f);
+        agent->params.k_drag = agent->base_k_drag * drag_multiplier;
+        agent->params.b_drag = agent->base_b_drag * drag_multiplier;
+        agent->box_physics_on = true;
+    } else {
+        agent->params.mass = agent->base_mass;
+        agent->params.ixx = agent->base_ixx;
+        agent->params.iyy = agent->base_iyy;
+        agent->params.izz = agent->base_izz;
+        agent->params.k_drag = agent->base_k_drag;
+        agent->params.b_drag = agent->base_b_drag;
+    }
+}
+
+void c_reset(DroneDelivery *env) {
+    env->tick = 0;
+    env->episode_num += 1;
+    env->reward_dist = clampf(env->reward_max_dist - env->episode_num * env->dist_decay, env->reward_min_dist, 100.0f);
+    env->inv_reward_dist = 1.0f / env->reward_dist;
+    if (env->episode_num > 1) env->episode_gain = clampf(env->episode_gain + env->inv_perfect_episode, 0.0f, 1.0f);
+
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+        reset_agent(env, agent, i);
+        agent->target_pos = (Vec3){agent->box_pos.x, agent->box_pos.y, agent->box_pos.z};
+        agent->target_vel = agent->box_vel;
+    }
+
+    if (env->grip_k < 1.01 && env->box_k > 0.99f) {
+        env->anneal = clampf(env->anneal - (env->inv_annealed_episode * (1.0f - env->anneal_min)), env->anneal_min, 1.0f);
+    }
+ 
+    compute_observations(env);
+}
+
+void c_step(DroneDelivery *env) {
+    env->tick = (env->tick + 1) % HORIZON;
+    //env->log.dist = 0.0f;
+    //env->log.dist100 = 0.0f;
+    env->grip_k = clampf(env->grip_k_max - env->episode_num * env->grip_k_decay, 1.0f, 100.0f);
+    env->box_k = clampf(env->box_k_min + env->episode_num * env->inv_perfect_episode, env->box_k_min, env->box_k_max);
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+        env->rewards[i] = 0;
+        env->terminals[i] = 0;
+        agent->perfect_now = false;
+
+        float* atn = &env->actions[4*i];
+        move_drone(agent, atn);
+
+        bool out_of_bounds = agent->state.pos.x < -GRID_X || agent->state.pos.x > GRID_X ||
+                             agent->state.pos.y < -GRID_Y || agent->state.pos.y > GRID_Y ||
+                             agent->state.pos.z < -GRID_Z || agent->state.pos.z > GRID_Z;
+
+        float reward = 0.0f;
+
+        if (!agent->gripping) {
+            agent->box_pos.x += agent->box_vel.x * DT;
+            agent->box_pos.y += agent->box_vel.y * DT;
+            agent->box_pos.z += agent->box_vel.z * DT;
+            agent->hidden_pos.x = agent->box_pos.x;
+            agent->hidden_pos.y = agent->box_pos.y;
+        }
+        agent->hidden_pos.z += agent->hidden_vel.z * DT;
+        if (agent->hidden_pos.z < agent->target_pos.z) {
+            agent->hidden_pos.z = agent->target_pos.z;
+            agent->hidden_vel.z = 0.0f;
+        }
+        agent->approaching_pickup = true;
+
+        Vec3 vel_error = {agent->state.vel.x - agent->hidden_vel.x,
+                        agent->state.vel.y - agent->hidden_vel.y,
+                        agent->state.vel.z - agent->hidden_vel.z};
+        float speed = sqrtf(vel_error.x * vel_error.x + vel_error.y * vel_error.y + vel_error.z * vel_error.z);
+
+        agent->box_mass = env->box_k * agent->box_base_mass;
+        float k = env->grip_k;
+        if (!agent->gripping) {
+            float dist_to_hidden = sqrtf(powf(agent->state.pos.x - agent->hidden_pos.x, 2) +
+                                        powf(agent->state.pos.y - agent->hidden_pos.y, 2) +
+                                        powf(agent->state.pos.z - agent->hidden_pos.z, 2));
+            float xy_dist_to_box = sqrtf(powf(agent->state.pos.x - agent->box_pos.x, 2) +
+                                        powf(agent->state.pos.y - agent->box_pos.y, 2));
+            float z_dist_above_box = agent->state.pos.z - agent->box_pos.z;
+
+            if (xy_dist_to_box < 2.0f && agent->state.pos.z < agent->target_pos.z + 10.0f) {
+                reward -= 0.001f * env->episode_num - 0.001f;
+            }
+
+            // Phase 1 Box Hover
+            if (!agent->hovering_pickup) {
+                if (dist_to_hidden < 0.4f && speed < 0.4f) {
+                    agent->hovering_pickup = true;
+                    agent->color = (Color){255, 255, 255, 255}; // White
+                } else {
+                    if (!agent->has_delivered) {
+                        agent->color = (Color){255, 100, 100, 255}; // Light Red
+                    }
+                }
+            }
+
+            // Phase 2 Box Descent
+            else {
+                agent->descent_pickup = true;
+                agent->hidden_vel.z = -0.1f;
+                if (
+                    xy_dist_to_box < k * 0.1f &&
+                    z_dist_above_box < k * 0.1f && z_dist_above_box > 0.0f &&
+                    speed < k * 0.1f &&
+                    agent->state.vel.z > k * -0.05f && agent->state.vel.z < 0.0f
+                ) {
+                    if (k < 1.01 && env->box_k > 0.99f) {
+                        agent->perfect_grip = true;
+                        agent->color = (Color){100, 100, 255, 255}; // Light Blue
+                    }
+                    agent->gripping = true;
+                    reward += env->reward_grip * env->anneal;
+                    random_bump(agent);
+                } else if (dist_to_hidden > 0.4f || speed > 0.4f) {
+                    agent->color = (Color){255, 100, 100, 255}; // Light Red
+                }
+            }
+        } else {
+
+            // Phase 3 Drop Hover
+            agent->box_pos = agent->state.pos;
+            agent->box_pos.z -= 0.5f;
+            agent->target_pos = agent->drop_pos;
+            agent->target_vel = (Vec3){0.0f, 0.0f, 0.0f};
+            float xy_dist_to_drop = sqrtf(powf(agent->state.pos.x - agent->drop_pos.x, 2) +
+                                        powf(agent->state.pos.y - agent->drop_pos.y, 2));
+            float z_dist_above_drop = agent->state.pos.z - agent->drop_pos.z;
+
+            //if (xy_dist_to_drop < 2.0f && agent->state.pos.z < agent->target_pos.z + 10.0f) {
+            //    reward -= 0.001f * env->episode_num - 0.001f;
+            //}
+
+            if (!agent->box_physics_on && agent->state.vel.z > 0.3f) {
+                update_gripping_physics(agent);
+            }
+
+            if (!agent->hovering_drop) {
+                agent->target_pos = (Vec3){agent->drop_pos.x, agent->drop_pos.y, agent->drop_pos.z + 0.4f};
+                agent->hidden_pos = (Vec3){agent->drop_pos.x, agent->drop_pos.y, agent->drop_pos.z + 1.0f};
+                agent->hidden_vel = (Vec3){0.0f, 0.0f, 0.0f};
+                if (xy_dist_to_drop < k * 0.4f && z_dist_above_drop > 0.7f && z_dist_above_drop < 1.3f) {
+                    agent->hovering_drop = true;
+                    reward += env->reward_ho_drop * env->anneal;
+                    agent->color = (Color){0, 0, 255, 255}; // Blue
+                }
+            }
+
+            // Phase 4 Drop Descent
+            else {
+                agent->target_pos = agent->drop_pos;
+                agent->hidden_pos.x = agent->drop_pos.x;
+                agent->hidden_pos.y = agent->drop_pos.y;
+                agent->hidden_vel = (Vec3){0.0f, 0.0f, -0.1f};
+                if (xy_dist_to_drop < k * 0.2f && z_dist_above_drop < k * 0.2f) {
+                    agent->hovering_pickup = false;
+                    agent->gripping = false;
+                    update_gripping_physics(agent);
+                    agent->box_physics_on = false;
+                    agent->hovering_drop = false;
+                    reward += 1.0f;
+                    agent->delivered = true;
+                    agent->has_delivered = true;
+                    if (k < 1.01f && agent->perfect_grip  && env->box_k > 0.99f) {
+                        agent->perfect_deliv = true;
+                        agent->perfect_deliveries += 1.0f;
+                        agent->perfect_now = true;
+                        agent->color = (Color){0, 255, 0, 255}; // Green
+                    }
+                    reset_delivery(env, agent, i);
+                }
+            }
+        }
+
+        reward += compute_reward(env, agent, true);
+
+        for (int i = 0; i < env->num_agents; i++) {
+            Drone *a = &env->agents[i];
+            if (a->perfect_grip && env->grip_k < 1.01f && env->box_k > 0.99f) {
+                env->log.perfect_grip += 1.0f;
+            }
+            if (a->perfect_deliv && env->grip_k < 1.01f && a->perfect_grip && env->box_k > 0.99f) {
+                env->log.perfect_deliv += agent->perfect_deliveries;
+            }
+            if (a->perfect_deliv && env->grip_k < 1.01f && a->perfect_grip && a->perfect_now && env->box_k > 0.99f) {
+                env->log.perfect_now += 1.0f;
+            }
+        }
+
+        env->rewards[i] += reward;
+        agent->episode_return += reward;
+
+        float min_z = -GRID_Z + 0.2f;
+        if (agent->gripping) {
+            min_z += 0.1;
+        }
+
+        if (out_of_bounds || agent->state.pos.z < min_z) {
+            env->rewards[i] -= 1;
+            env->terminals[i] = 1;
+            add_log(env, i, true);
+            reset_agent(env, agent, i);
+        } else if (env->tick >= HORIZON - 1) {
+            env->terminals[i] = 1;
+            add_log(env, i, false);
+        }
+    }
+    if (env->tick >= HORIZON - 1) {
+        c_reset(env);
+    }
+
+    compute_observations(env);
+}
+
+void c_close_client(Client *client) {
+    CloseWindow();
+    free(client);
+}
+
+void c_close(DroneDelivery *env) {
+    if (env->client != NULL) {
+        c_close_client(env->client);
+    }
+}
+
+static void update_camera_position(Client *c) {
+    float r = c->camera_distance;
+    float az = c->camera_azimuth;
+    float el = c->camera_elevation;
+
+    float x = r * cosf(el) * cosf(az);
+    float y = r * cosf(el) * sinf(az);
+    float z = r * sinf(el);
+
+    c->camera.position = (Vector3){x, y, z};
+    c->camera.target = (Vector3){0, 0, 0};
+}
+
+void handle_camera_controls(Client *client) {
+    Vector2 mouse_pos = GetMousePosition();
+
+    if (IsMouseButtonPressed(MOUSE_BUTTON_LEFT)) {
+        client->is_dragging = true;
+        client->last_mouse_pos = mouse_pos;
+    }
+
+    if (IsMouseButtonReleased(MOUSE_BUTTON_LEFT)) {
+        client->is_dragging = false;
+    }
+
+    if (client->is_dragging && IsMouseButtonDown(MOUSE_BUTTON_LEFT)) {
+        Vector2 mouse_delta = {mouse_pos.x - client->last_mouse_pos.x,
+                               mouse_pos.y - client->last_mouse_pos.y};
+
+        float sensitivity = 0.005f;
+
+        client->camera_azimuth -= mouse_delta.x * sensitivity;
+
+        client->camera_elevation += mouse_delta.y * sensitivity;
+        client->camera_elevation =
+            clampf(client->camera_elevation, -PI / 2.0f + 0.1f, PI / 2.0f - 0.1f);
+
+        client->last_mouse_pos = mouse_pos;
+
+        update_camera_position(client);
+    }
+
+    float wheel = GetMouseWheelMove();
+    if (wheel != 0) {
+        client->camera_distance -= wheel * 2.0f;
+        client->camera_distance = clampf(client->camera_distance, 5.0f, 50.0f);
+        update_camera_position(client);
+    }
+}
+
+Client *make_client(DroneDelivery *env) {
+    Client *client = (Client *)calloc(1, sizeof(Client));
+
+    client->width = WIDTH;
+    client->height = HEIGHT;
+
+    SetConfigFlags(FLAG_MSAA_4X_HINT); // antialiasing
+    InitWindow(WIDTH, HEIGHT, "PufferLib DroneDelivery");
+
+#ifndef __EMSCRIPTEN__
+    SetTargetFPS(60);
+#endif
+
+    if (!IsWindowReady()) {
+        TraceLog(LOG_ERROR, "Window failed to initialize\n");
+        free(client);
+        return NULL;
+    }
+
+    client->camera_distance = 40.0f;
+    client->camera_azimuth = 0.0f;
+    client->camera_elevation = PI / 10.0f;
+    client->is_dragging = false;
+    client->last_mouse_pos = (Vector2){0.0f, 0.0f};
+
+    client->camera.up = (Vector3){0.0f, 0.0f, 1.0f};
+    client->camera.fovy = 45.0f;
+    client->camera.projection = CAMERA_PERSPECTIVE;
+
+    update_camera_position(client);
+
+    // Initialize trail buffer
+    client->trails = (Trail*)calloc(env->num_agents, sizeof(Trail));
+    for (int i = 0; i < env->num_agents; i++) {
+        Trail* trail = &client->trails[i];
+        trail->index = 0;
+        trail->count = 0;
+        for (int j = 0; j < TRAIL_LENGTH; j++) {
+            trail->pos[j] = env->agents[i].state.pos;
+        }
+    }
+
+    return client;
+}
+
+const Color PUFF_RED = (Color){187, 0, 0, 255};
+const Color PUFF_CYAN = (Color){0, 187, 187, 255};
+const Color PUFF_WHITE = (Color){241, 241, 241, 241};
+const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255};
+
+void c_render(DroneDelivery *env) {
+    if (env->client == NULL) {
+        env->client = make_client(env);
+        if (env->client == NULL) {
+            TraceLog(LOG_ERROR, "Failed to initialize client for rendering\n");
+            return;
+        }
+    }
+    env->render = true;
+    env->grip_k_max = 1.0f;
+    env->box_k_max = 1.0f;
+    env->box_k_min = 1.0f;
+    env->box_k = 1.0f;
+    env->reward_dist = env->reward_min_dist;
+    if (WindowShouldClose()) {
+        c_close(env);
+        exit(0);
+    }
+
+    if (IsKeyDown(KEY_ESCAPE)) {
+        c_close(env);
+        exit(0);
+    }
+
+    handle_camera_controls(env->client);
+
+    Client *client = env->client;
+
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+        Trail *trail = &client->trails[i];
+        trail->pos[trail->index] = agent->state.pos;
+        trail->index = (trail->index + 1) % TRAIL_LENGTH;
+        if (trail->count < TRAIL_LENGTH) {
+            trail->count++;
+        }
+        if (env->terminals[i]) {
+            trail->index = 0;
+            trail->count = 0;
+        }
+    }
+
+    BeginDrawing();
+    ClearBackground(PUFF_BACKGROUND);
+
+    BeginMode3D(client->camera);
+
+    // draws bounding cube
+    DrawCubeWires((Vector3){0.0f, 0.0f, 0.0f}, GRID_X * 2.0f,
+        GRID_Y * 2.0f, GRID_Z * 2.0f, WHITE);
+
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+
+        // draws drone body
+        DrawSphere((Vector3){agent->state.pos.x, agent->state.pos.y, agent->state.pos.z}, 0.3f, agent->color);
+
+        // draws rotors according to thrust
+        float T[4];
+        for (int j = 0; j < 4; j++) {
+            float rpm = (env->actions[4*i + j] + 1.0f) * 0.5f * agent->params.max_rpm;
+            T[j] = agent->params.k_thrust * rpm * rpm;
+        }
+
+        const float rotor_radius = 0.15f;
+        const float visual_arm_len = agent->params.arm_len * 4.0f;
+
+        Vec3 rotor_offsets_body[4] = {{+visual_arm_len, 0.0f, 0.0f},
+                                      {-visual_arm_len, 0.0f, 0.0f},
+                                      {0.0f, +visual_arm_len, 0.0f},
+                                      {0.0f, -visual_arm_len, 0.0f}};
+
+        //Color base_colors[4] = {ORANGE, PURPLE, LIME, SKYBLUE};
+        Color base_colors[4] = {agent->color, agent->color, agent->color, agent->color};
+
+        for (int j = 0; j < 4; j++) {
+            Vec3 world_off = quat_rotate(agent->state.quat, rotor_offsets_body[j]);
+
+            Vector3 rotor_pos = {agent->state.pos.x + world_off.x, agent->state.pos.y + world_off.y,
+                                 agent->state.pos.z + world_off.z};
+
+            float rpm = (env->actions[4*i + j] + 1.0f) * 0.5f * agent->params.max_rpm;
+            float intensity = 0.75f + 0.25f * (rpm / agent->params.max_rpm);
+
+            Color rotor_color = (Color){(unsigned char)(base_colors[j].r * intensity),
+                                        (unsigned char)(base_colors[j].g * intensity),
+                                        (unsigned char)(base_colors[j].b * intensity), 255};
+
+            DrawSphere(rotor_pos, rotor_radius, rotor_color);
+
+            DrawCylinderEx((Vector3){agent->state.pos.x, agent->state.pos.y, agent->state.pos.z}, rotor_pos, 0.02f, 0.02f, 8,
+                           BLACK);
+        }
+
+        // draws line with direction and magnitude of velocity / 10
+        if (norm3(agent->state.vel) > 0.1f) {
+            DrawLine3D((Vector3){agent->state.pos.x, agent->state.pos.y, agent->state.pos.z},
+                       (Vector3){agent->state.pos.x + agent->state.vel.x * 0.1f, agent->state.pos.y + agent->state.vel.y * 0.1f,
+                                 agent->state.pos.z + agent->state.vel.z * 0.1f},
+                       MAGENTA);
+        }
+
+        // Draw trailing path
+        Trail *trail = &client->trails[i];
+        if (trail->count <= 2) {
+            continue;
+        }
+        for (int j = 0; j < trail->count - 1; j++) {
+            int idx0 = (trail->index - j - 1 + TRAIL_LENGTH) % TRAIL_LENGTH;
+            int idx1 = (trail->index - j - 2 + TRAIL_LENGTH) % TRAIL_LENGTH;
+            float alpha = (float)(TRAIL_LENGTH - j) / (float)trail->count * 0.8f; // fade out
+            Color trail_color = ColorAlpha((Color){0, 187, 187, 255}, alpha);
+            DrawLine3D((Vector3){trail->pos[idx0].x, trail->pos[idx0].y, trail->pos[idx0].z},
+                       (Vector3){trail->pos[idx1].x, trail->pos[idx1].y, trail->pos[idx1].z},
+                       trail_color);
+        }
+    }
+
+    for (int i = 0; i < env->num_agents; i++) {
+        Drone *agent = &env->agents[i];
+        Vec3 render_pos = agent->box_pos;
+        DrawCube((Vector3){render_pos.x, render_pos.y, render_pos.z}, agent->box_size, agent->box_size, agent->box_size, BROWN);
+        DrawCube((Vector3){agent->drop_pos.x, agent->drop_pos.y, agent->drop_pos.z}, 0.5f, 0.5f, 0.1f, YELLOW);
+        //DrawSphere((Vector3){agent->hidden_pos.x, agent->hidden_pos.y, agent->hidden_pos.z}, 0.2f, YELLOW);
+    }
+
+    if (IsKeyDown(KEY_TAB)) {
+        for (int i = 0; i < env->num_agents; i++) {
+            Drone *agent = &env->agents[i];
+            Vec3 target_pos = agent->target_pos;
+            DrawSphere((Vector3){target_pos.x, target_pos.y, target_pos.z}, 0.45f, (Color){0, 255, 255, 100});
+        }
+    }
+
+    EndMode3D();
+
+    DrawText("Left click + drag: Rotate camera", 10, 10, 16, PUFF_WHITE);
+    DrawText("Mouse wheel: Zoom in/out", 10, 30, 16, PUFF_WHITE);
+    DrawText(TextFormat("Tick = %d", env->tick), 10, 40, 16, PUFF_WHITE);
+    DrawText(TextFormat("grip_k = %.3f", env->grip_k), 10, 70, 16, PUFF_WHITE);
+    DrawText(TextFormat("box_k = %.3f", env->box_k), 10, 90, 16, PUFF_WHITE);
+    DrawText(TextFormat("eps = %.d", env->episode_num), 10, 110, 16, PUFF_WHITE);
+
+    EndDrawing();
+}
diff --git a/pufferlib/ocean/drone_delivery/drone_delivery.py b/pufferlib/ocean/drone_delivery/drone_delivery.py
new file mode 100644
index 000000000..8b5dec3ce
--- /dev/null
+++ b/pufferlib/ocean/drone_delivery/drone_delivery.py
@@ -0,0 +1,144 @@
+import numpy as np
+import gymnasium
+
+import pufferlib
+from pufferlib.ocean.drone_delivery import binding
+
+class DroneDelivery(pufferlib.PufferEnv):
+    def __init__(
+        self,
+        num_envs=16,
+        num_drones=64,
+
+        ablation=0.0,
+        anneal_min=0.5,
+
+        grip_k_max=17.105300970916993,
+
+        perfect_anneal=150000000,
+        perfect_deadline=150000000,
+
+        pos_const=0.7108647198043252,
+        pos_penalty=0.0009629600280207475,
+
+        reward_grip=0.9999,
+        reward_ho_drop=0.20890470430909128,
+        reward_hover=0.1603521816569735,
+
+        reward_max_dist=65.0,
+        reward_min_dist=0.9330297552248776,
+
+        vel_penalty_clamp=0.25,
+
+        w_approach=2.2462377881752698,
+        w_position=0.7312628607193232,
+        w_stability=1.6094417286807845,
+        w_velocity=0.0009999999999999731,
+
+        render_mode=None,
+        report_interval=1024,
+        buf=None,
+        seed=0,
+    ):
+        self.single_observation_space = gymnasium.spaces.Box(
+            low=-1,
+            high=1,
+            shape=(45,),
+            dtype=np.float32,
+        )
+
+        self.single_action_space = gymnasium.spaces.Box(
+            low=-1, high=1, shape=(4,), dtype=np.float32
+        )
+
+        self.num_agents = num_envs*num_drones
+        self.render_mode = render_mode
+        self.report_interval = report_interval
+        self.tick = 0
+
+        super().__init__(buf)
+        self.actions = self.actions.astype(np.float32)
+
+        c_envs = []
+        for i in range(num_envs):
+            c_envs.append(binding.env_init(
+                self.observations[i*num_drones:(i+1)*num_drones],
+                self.actions[i*num_drones:(i+1)*num_drones],
+                self.rewards[i*num_drones:(i+1)*num_drones],
+                self.terminals[i*num_drones:(i+1)*num_drones],
+                self.truncations[i*num_drones:(i+1)*num_drones],
+                i,
+                num_agents=num_drones,
+
+                ablation=ablation,
+                anneal_min=anneal_min,
+
+                grip_k_max=grip_k_max,
+
+                num_envs=num_envs,
+                perfect_anneal=perfect_anneal,
+                perfect_deadline=perfect_deadline,
+
+                pos_const=pos_const,
+                pos_penalty=pos_penalty,
+
+                reward_grip=reward_grip,
+                reward_ho_drop=reward_ho_drop,
+                reward_hover=reward_hover,
+
+                reward_max_dist=reward_max_dist,
+                reward_min_dist=reward_min_dist,
+
+                vel_penalty_clamp=vel_penalty_clamp,
+
+                w_approach=w_approach,
+                w_position=w_position,
+                w_stability=w_stability,
+                w_velocity=w_velocity
+            ))
+
+        self.c_envs = binding.vectorize(*c_envs)
+
+    def reset(self, seed=None):
+        self.tick = 0
+        binding.vec_reset(self.c_envs, seed)
+        return self.observations, []
+
+    def step(self, actions):
+        self.actions[:] = actions
+
+        self.tick += 1
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.report_interval == 0:
+            log_data = binding.vec_log(self.c_envs)
+            if log_data:
+                info.append(log_data)
+
+        return (self.observations, self.rewards, self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+def test_performance(timeout=10, atn_cache=1024):
+    env = DroneDelivery(num_envs=1000)
+    env.reset()
+    tick = 0
+
+    actions = [env.action_space.sample() for _ in range(atn_cache)]
+
+    import time
+    start = time.time()
+    while time.time() - start < timeout:
+        atn = actions[tick % atn_cache]
+        env.step(atn)
+        tick += 1
+
+    print(f"SPS: {env.num_agents * tick / (time.time() - start)}")
+
+if __name__ == "__main__":
+    test_performance()
diff --git a/pufferlib/ocean/drone_delivery/dronelib_delivery.h b/pufferlib/ocean/drone_delivery/dronelib_delivery.h
new file mode 100644
index 000000000..378280884
--- /dev/null
+++ b/pufferlib/ocean/drone_delivery/dronelib_delivery.h
@@ -0,0 +1,507 @@
+// Originally made by Sam Turner and Finlay Sanders, 2025.
+// Included in pufferlib under the original project's MIT license.
+// https://github.com/stmio/drone
+
+// Some changes made by Keith (Kinvert) for hackathon -> drone_delivery
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "raylib.h"
+
+// Visualisation properties
+#define WIDTH 1080
+#define HEIGHT 720
+#define TRAIL_LENGTH 50
+#define HORIZON 1024
+
+// Physical constants for the drone
+#define BASE_MASS 1.0f       // kg
+#define BASE_IXX 0.01f       // kgm^2
+#define BASE_IYY 0.01f       // kgm^2
+#define BASE_IZZ 0.02f       // kgm^2
+#define BASE_ARM_LEN 0.1f    // m
+#define BASE_K_THRUST 3e-5f  // thrust coefficient
+#define BASE_K_ANG_DAMP 0.2f // angular damping coefficient
+#define BASE_K_DRAG 1e-6f    // drag (torque) coefficient
+#define BASE_B_DRAG 0.1f     // linear drag coefficient
+#define BASE_GRAVITY 9.81f   // m/s^2
+#define BASE_MAX_RPM 750.0f  // rad/s
+#define BASE_MAX_VEL 50.0f   // m/s
+#define BASE_MAX_OMEGA 6.3f  // rad/s
+#define BASE_K_MOT 0.1f      // s (Motor lag constant)
+#define BASE_J_MOT 1e-5f     // kgm^2 (Motor rotational inertia)
+
+// Simulation properties
+#define GRID_X 30.0f
+#define GRID_Y 30.0f
+#define GRID_Z 10.0f
+#define INV_GRID_X 0.33333f
+#define INV_GRID_Y 0.33333f
+#define INV_GRID_Z 0.1f
+
+#define MARGIN_X (GRID_X - 1)
+#define MARGIN_Y (GRID_Y - 1)
+#define MARGIN_Z (GRID_Z - 1)
+#define V_TARGET 0.05f
+#define DT 0.05f
+#define DT_RNG 0.0f
+#define MAX_THRUST_TO_MASS_RATION 0.5f
+
+// Corner to corner distance
+#define MAX_DIST sqrtf((2*GRID_X)*(2*GRID_X) + (2*GRID_Y)*(2*GRID_Y) + (2*GRID_Z)*(2*GRID_Z))
+
+#define BASE_BOX_DENSITY 50.0f
+
+typedef struct Log Log;
+struct Log {
+    float episode_return;
+    float episode_length;
+    float collision_rate;
+    float oob;
+    float timeout;
+    float score;
+    float perf;
+    float gripping;
+    float perfect_grip;
+    float delivered;
+    float perfect_deliv;
+    float perfect_now;
+    float to_pickup;
+    float ho_pickup;
+    float de_pickup;
+    float to_drop;
+    float ho_drop;
+    float de_drop;
+    float dist;
+    float dist100;
+    float jitter;
+    float episode_num;
+    float episode_gain;
+    float tick;
+    float anneal;
+    float n;
+};
+
+typedef struct {
+    float w, x, y, z;
+} Quat;
+
+typedef struct {
+    float x, y, z;
+} Vec3;
+
+static inline float clampf(float v, float min, float max) {
+    if (v < min)
+        return min;
+    if (v > max)
+        return max;
+    return v;
+}
+
+static inline float rndf(float a, float b) {
+    return a + ((float)rand() / (float)RAND_MAX) * (b - a);
+}
+
+static inline Vec3 add3(Vec3 a, Vec3 b) { return (Vec3){a.x + b.x, a.y + b.y, a.z + b.z}; }
+
+static inline Quat add_quat(Quat a, Quat b) { return (Quat){a.w + b.w, a.x + b.x, a.y + b.y, a.z + b.z}; }
+
+static inline Vec3 sub3(Vec3 a, Vec3 b) { return (Vec3){a.x - b.x, a.y - b.y, a.z - b.z}; }
+
+static inline Vec3 scalmul3(Vec3 a, float b) { return (Vec3){a.x * b, a.y * b, a.z * b}; }
+
+static inline Quat scalmul_quat(Quat a, float b) { return (Quat){a.w * b, a.x * b, a.y * b, a.z * b}; }
+
+static inline float dot3(Vec3 a, Vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
+
+static inline float norm3(Vec3 a) { return sqrtf(dot3(a, a)); }
+
+static inline void clamp3(Vec3 *vec, float min, float max) {
+    vec->x = clampf(vec->x, min, max);
+    vec->y = clampf(vec->y, min, max);
+    vec->z = clampf(vec->z, min, max);
+}
+
+static inline void clamp4(float a[4], float min, float max) {
+    a[0] = clampf(a[0], min, max);
+    a[1] = clampf(a[1], min, max);
+    a[2] = clampf(a[2], min, max);
+    a[3] = clampf(a[3], min, max);
+}
+
+static inline Quat quat_mul(Quat q1, Quat q2) {
+    Quat out;
+    out.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
+    out.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
+    out.y = q1.w * q2.y - q1.x * q2.z + q1.y * q2.w + q1.z * q2.x;
+    out.z = q1.w * q2.z + q1.x * q2.y - q1.y * q2.x + q1.z * q2.w;
+    return out;
+}
+
+static inline void quat_normalize(Quat *q) {
+    float n = sqrtf(q->w * q->w + q->x * q->x + q->y * q->y + q->z * q->z);
+    if (n > 0.0f) {
+        q->w /= n;
+        q->x /= n;
+        q->y /= n;
+        q->z /= n;
+    }
+}
+
+static inline Vec3 quat_rotate(Quat q, Vec3 v) {
+    Quat qv = {0.0f, v.x, v.y, v.z};
+    Quat tmp = quat_mul(q, qv);
+    Quat q_conj = {q.w, -q.x, -q.y, -q.z};
+    Quat res = quat_mul(tmp, q_conj);
+    return (Vec3){res.x, res.y, res.z};
+}
+
+static inline Quat quat_inverse(Quat q) { return (Quat){q.w, -q.x, -q.y, -q.z}; }
+
+Quat rndquat() {
+    float u1 = rndf(0.0f, 1.0f);
+    float u2 = rndf(0.0f, 1.0f);
+    float u3 = rndf(0.0f, 1.0f);
+
+    float sqrt_1_minus_u1 = sqrtf(1.0f - u1);
+    float sqrt_u1 = sqrtf(u1);
+
+    float pi_2_u2 = 2.0f * M_PI * u2;
+    float pi_2_u3 = 2.0f * M_PI * u3;
+
+    Quat q;
+    q.w = sqrt_1_minus_u1 * sinf(pi_2_u2);
+    q.x = sqrt_1_minus_u1 * cosf(pi_2_u2);
+    q.y = sqrt_u1 * sinf(pi_2_u3);
+    q.z = sqrt_u1 * cosf(pi_2_u3);
+
+    return q;
+}
+
+typedef struct {
+    Vec3 pos[TRAIL_LENGTH];
+    int index;
+    int count;
+} Trail;
+
+typedef struct {
+    Vec3 pos;      // global position (x, y, z)
+    Vec3 vel;      // linear velocity (u, v, w)
+    Quat quat;     // roll/pitch/yaw (phi/theta/psi) as a quaternion
+    Vec3 omega;    // angular velocity (p, q, r)
+    float rpms[4]; // motor RPMs
+} State;
+
+typedef struct {
+    Vec3 vel;       // Derivative of position
+    Vec3 v_dot;       // Derivative of velocity
+    Quat q_dot;       // Derivative of quaternion
+    Vec3 w_dot;       // Derivative of angular velocity
+    float rpm_dot[4]; // Derivative of motor RPMs
+} StateDerivative;
+
+typedef struct {
+    // Physical properties. Modeled as part of the drone
+    // to make domain randomization easier.
+    float mass; // kg
+    float ixx; // kgm^2
+    float iyy; // kgm^2
+    float izz; // kgm^2
+    float arm_len; // m
+    float k_thrust; // thrust coefficient
+    float k_ang_damp; // angular damping coefficient
+    float k_drag; // drag (torque) coefficient
+    float b_drag; // linear drag coefficient
+    float gravity; // m/s^2
+    float max_rpm; // rad/s
+    float inv_max_rpm; // s/rad
+    float max_vel; // m/s
+    float inv_max_vel; // s/m
+    float max_omega; // rad/s
+    float inv_max_omega; // s/rad
+    float k_mot; // s
+    float j_mot; // kgm^2
+} Params;
+
+typedef struct {
+    // core state and parameters
+    State state;
+    Params params;
+
+    // helpers for swarm logic
+    Vec3 spawn_pos;
+    Vec3 prev_pos;
+    Vec3 target_pos;
+    Vec3 target_vel;
+    Vec3 hidden_pos;
+    Vec3 hidden_vel;
+
+    // PP Pick and Place
+    Vec3 box_pos;
+    Vec3 box_vel;
+    Vec3 drop_pos;
+    bool gripping;
+    bool perfect_grip;
+    bool delivered;
+    bool perfect_deliv;
+    float perfect_deliveries;
+    bool perfect_now;
+    bool has_delivered;
+    float grip_height;
+    bool approaching_pickup;
+    bool hovering_pickup;
+    bool descent_pickup;
+    bool approaching_drop;
+    bool hovering_drop;
+    bool descent_drop;
+    float hover_timer;
+    Color color;
+
+    // logging utils
+    float last_abs_reward;
+    float last_collision_reward;
+    float last_position_reward;
+    float last_velocity_reward;
+    float last_stability_reward;
+    float last_approach_reward;
+    float episode_return;
+    float collisions;
+    int episode_length;
+    float score;
+    float jitter;
+
+    float base_mass;
+    float base_ixx;
+    float base_iyy;
+    float base_izz;
+    float base_k_drag;
+    float base_b_drag;
+    float box_size;
+    float box_base_mass;
+    float box_mass;
+    float box_mass_max;
+    bool box_physics_on;
+} Drone;
+
+
+void init_drone(Drone* drone, float size, float dr) {
+    drone->params.arm_len = size / 2.0f;
+
+    // m ~ x^3
+    float mass_scale = powf(drone->params.arm_len, 3.0f) / powf(BASE_ARM_LEN, 3.0f);
+    drone->params.mass = BASE_MASS * mass_scale * rndf(1.0f - dr, 1.0f + dr);
+
+    // I ~ mx^2
+    float base_Iscale = BASE_MASS * BASE_ARM_LEN * BASE_ARM_LEN;
+    float I_scale = drone->params.mass * powf(drone->params.arm_len, 2.0f) / base_Iscale;
+    drone->params.ixx = BASE_IXX * I_scale * rndf(1.0f - dr, 1.0f + dr);
+    drone->params.iyy = BASE_IYY * I_scale * rndf(1.0f - dr, 1.0f + dr);
+    drone->params.izz = BASE_IZZ * I_scale * rndf(1.0f - dr, 1.0f + dr);
+
+    // k_thrust ~ m/l
+    float k_thrust_scale = (drone->params.mass * drone->params.arm_len) / (BASE_MASS * BASE_ARM_LEN);
+    drone->params.k_thrust = BASE_K_THRUST * k_thrust_scale * rndf(1.0f - dr, 1.0f + dr);
+
+    // k_ang_damp ~ I
+    float base_avg_inertia = (BASE_IXX + BASE_IYY + BASE_IZZ) / 3.0f;
+    float avg_inertia = (drone->params.ixx + drone->params.iyy + drone->params.izz) / 3.0f;
+    float avg_inertia_scale = avg_inertia / base_avg_inertia;
+    drone->params.k_ang_damp = BASE_K_ANG_DAMP * avg_inertia_scale * rndf(1.0f - dr, 1.0f + dr);
+
+    // drag ~ x^2
+    float drag_scale = powf(drone->params.arm_len, 2.0f) / powf(BASE_ARM_LEN, 2.0f);
+    drone->params.k_drag = BASE_K_DRAG * drag_scale * rndf(1.0f - dr, 1.0f + dr);
+    drone->params.b_drag = BASE_B_DRAG * drag_scale * rndf(1.0f - dr, 1.0f + dr);
+
+    // Small gravity randomization
+    drone->params.gravity = BASE_GRAVITY * rndf(0.99f, 1.01f);
+
+    // RPM ~ 1/x
+    float rpm_scale = (BASE_ARM_LEN) / (drone->params.arm_len);
+    drone->params.max_rpm = BASE_MAX_RPM * rpm_scale * rndf(1.0f - dr, 1.0f + dr);
+    float max_thrust = 4.0f * drone->params.k_thrust * powf(drone->params.max_rpm, 2.0f);
+    float max_total_mass = (MAX_THRUST_TO_MASS_RATION * max_thrust) / drone->params.gravity;
+    while (max_thrust < 2.0f * drone->params.mass * drone->params.gravity) {
+        drone->params.max_rpm = drone->params.max_rpm * 2.0f;
+        max_thrust = 4.0f * drone->params.k_thrust * powf(drone->params.max_rpm, 2.0f);
+        max_total_mass = (MAX_THRUST_TO_MASS_RATION * max_thrust) / drone->params.gravity;
+    }
+    if (drone->params.max_rpm < 0.001) drone->params.max_rpm += 0.001;
+    drone->params.inv_max_rpm = 1.0f / drone->params.max_rpm;
+
+    drone->params.max_vel = BASE_MAX_VEL;
+    if (drone->params.max_vel < 0.001) drone->params.max_vel += 0.001;
+    drone->params.inv_max_vel = 1.0f / drone->params.max_vel;
+    drone->params.max_omega = BASE_MAX_OMEGA;
+    if (drone->params.max_omega < 0.001) drone->params.max_omega += 0.001;
+    drone->params.inv_max_omega = 1.0f / drone->params.max_omega;
+
+    drone->box_mass_max = max_total_mass - drone->params.mass;
+
+    drone->params.k_mot = BASE_K_MOT * rndf(1.0f - dr, 1.0f + dr);
+    drone->params.j_mot = BASE_J_MOT * I_scale * rndf(1.0f - dr, 1.0f + dr);
+    
+    for (int i = 0; i < 4; i++) {
+        drone->state.rpms[i] = 0.0f;
+    }
+    drone->state.pos = (Vec3){0.0f, 0.0f, 0.0f};
+    drone->prev_pos = drone->state.pos;
+    drone->state.vel = (Vec3){0.0f, 0.0f, 0.0f};
+    drone->state.omega = (Vec3){0.0f, 0.0f, 0.0f};
+    drone->state.quat = (Quat){1.0f, 0.0f, 0.0f, 0.0f};
+}
+
+void compute_derivatives(State* state, Params* params, float* actions, StateDerivative* derivatives) {
+    // first order rpm lag
+    float target_rpms[4];
+    for (int i = 0; i < 4; i++) {
+        target_rpms[i] = (actions[i] + 1.0f) * 0.5f * params->max_rpm;
+    }
+
+    // rpm rates
+    float rpm_dot[4];
+    for (int i = 0; i < 4; i++) {
+        rpm_dot[i] = (1.0f / params->k_mot) * (target_rpms[i] - state->rpms[i]);
+    }
+
+    // motor thrusts
+    float T[4];
+    for (int i = 0; i < 4; i++) {
+        T[i] = params->k_thrust * powf(state->rpms[i], 2.0f);
+    }
+
+    // body frame net force
+    Vec3 F_prop_body = {0.0f, 0.0f, T[0] + T[1] + T[2] + T[3]};
+
+    // body frame force -> world frame force
+    Vec3 F_prop = quat_rotate(state->quat, F_prop_body);
+
+    // world frame linear drag
+    Vec3 F_aero;
+    F_aero.x = -params->b_drag * state->vel.x;
+    F_aero.y = -params->b_drag * state->vel.y;
+    F_aero.z = -params->b_drag * state->vel.z;
+
+    // velocity rates, a = F/m
+    Vec3 v_dot;
+    v_dot.x = (F_prop.x + F_aero.x) / params->mass;
+    v_dot.y = (F_prop.y + F_aero.y) / params->mass;
+    v_dot.z = ((F_prop.z + F_aero.z) / params->mass) - params->gravity;
+
+    // quaternion rates
+    Quat omega_q = {0.0f, state->omega.x, state->omega.y, state->omega.z};
+    Quat q_dot = quat_mul(state->quat, omega_q);
+    q_dot.w *= 0.5f;
+    q_dot.x *= 0.5f;
+    q_dot.y *= 0.5f;
+    q_dot.z *= 0.5f;
+
+    // body frame torques
+    Vec3 Tau_prop;
+    Tau_prop.x = params->arm_len*(T[1] - T[3]);
+    Tau_prop.y = params->arm_len*(T[2] - T[0]);
+    Tau_prop.z = params->k_drag*(T[0] - T[1] + T[2] - T[3]);
+
+    // torque from chaging motor speeds
+    float Tau_mot_z = params->j_mot * (rpm_dot[0] - rpm_dot[1] + rpm_dot[2] - rpm_dot[3]);
+
+    // torque from angular damping
+    Vec3 Tau_aero;
+    Tau_aero.x = -params->k_ang_damp * state->omega.x;
+    Tau_aero.y = -params->k_ang_damp * state->omega.y;
+    Tau_aero.z = -params->k_ang_damp * state->omega.z;
+    
+    // gyroscopic torque
+    Vec3 Tau_iner;
+    Tau_iner.x = (params->iyy - params->izz) * state->omega.y * state->omega.z;
+    Tau_iner.y = (params->izz - params->ixx) * state->omega.z * state->omega.x;
+    Tau_iner.z = (params->ixx - params->iyy) * state->omega.x * state->omega.y;
+
+    // angular velocity rates
+    Vec3 w_dot;
+    w_dot.x = (Tau_prop.x + Tau_aero.x + Tau_iner.x) / params->ixx;
+    w_dot.y = (Tau_prop.y + Tau_aero.y + Tau_iner.y) / params->iyy;
+    w_dot.z = (Tau_prop.z + Tau_aero.z + Tau_iner.z + Tau_mot_z) / params->izz;
+
+    derivatives->vel = state->vel;
+    derivatives->v_dot = v_dot;
+    derivatives->q_dot = q_dot;
+    derivatives->w_dot = w_dot;
+    for (int i = 0; i < 4; i++) {
+        derivatives->rpm_dot[i] = rpm_dot[i];
+    }
+}
+
+static void step(State* initial, StateDerivative* deriv, float dt, State* output) {
+    output->pos = add3(initial->pos, scalmul3(deriv->vel, dt));
+    output->vel = add3(initial->vel, scalmul3(deriv->v_dot, dt));
+    output->quat = add_quat(initial->quat, scalmul_quat(deriv->q_dot, dt));
+    output->omega = add3(initial->omega, scalmul3(deriv->w_dot, dt));
+    for (int i = 0; i < 4; i++) {
+        output->rpms[i] = initial->rpms[i] + deriv->rpm_dot[i] * dt;
+    }
+    quat_normalize(&output->quat);
+}
+
+void rk4_step(State* state, Params* params, float* actions, float dt) {
+    StateDerivative k1, k2, k3, k4;
+    State temp_state;
+
+    compute_derivatives(state, params, actions, &k1);
+
+    step(state, &k1, dt * 0.5f, &temp_state);
+    compute_derivatives(&temp_state, params, actions, &k2);
+
+    step(state, &k2, dt * 0.5f, &temp_state);
+    compute_derivatives(&temp_state, params, actions, &k3);
+
+    step(state, &k3, dt, &temp_state);
+    compute_derivatives(&temp_state, params, actions, &k4);
+
+    float dt_6 = dt / 6.0f;
+
+    state->pos.x += (k1.vel.x + 2.0f * k2.vel.x + 2.0f * k3.vel.x + k4.vel.x) * dt_6;
+    state->pos.y += (k1.vel.y + 2.0f * k2.vel.y + 2.0f * k3.vel.y + k4.vel.y) * dt_6;
+    state->pos.z += (k1.vel.z + 2.0f * k2.vel.z + 2.0f * k3.vel.z + k4.vel.z) * dt_6;
+
+    state->vel.x += (k1.v_dot.x + 2.0f * k2.v_dot.x + 2.0f * k3.v_dot.x + k4.v_dot.x) * dt_6;
+    state->vel.y += (k1.v_dot.y + 2.0f * k2.v_dot.y + 2.0f * k3.v_dot.y + k4.v_dot.y) * dt_6;
+    state->vel.z += (k1.v_dot.z + 2.0f * k2.v_dot.z + 2.0f * k3.v_dot.z + k4.v_dot.z) * dt_6;
+
+    state->quat.w += (k1.q_dot.w + 2.0f * k2.q_dot.w + 2.0f * k3.q_dot.w + k4.q_dot.w) * dt_6;
+    state->quat.x += (k1.q_dot.x + 2.0f * k2.q_dot.x + 2.0f * k3.q_dot.x + k4.q_dot.x) * dt_6;
+    state->quat.y += (k1.q_dot.y + 2.0f * k2.q_dot.y + 2.0f * k3.q_dot.y + k4.q_dot.y) * dt_6;
+    state->quat.z += (k1.q_dot.z + 2.0f * k2.q_dot.z + 2.0f * k3.q_dot.z + k4.q_dot.z) * dt_6;
+
+    state->omega.x += (k1.w_dot.x + 2.0f * k2.w_dot.x + 2.0f * k3.w_dot.x + k4.w_dot.x) * dt_6;
+    state->omega.y += (k1.w_dot.y + 2.0f * k2.w_dot.y + 2.0f * k3.w_dot.y + k4.w_dot.y) * dt_6;
+    state->omega.z += (k1.w_dot.z + 2.0f * k2.w_dot.z + 2.0f * k3.w_dot.z + k4.w_dot.z) * dt_6;
+
+    for (int i = 0; i < 4; i++) {
+        state->rpms[i] += (k1.rpm_dot[i] + 2.0f * k2.rpm_dot[i] + 2.0f * k3.rpm_dot[i] + k4.rpm_dot[i]) * dt_6;
+    }
+
+    quat_normalize(&state->quat);
+}
+
+void move_drone(Drone* drone, float* actions) {
+    // clamp actions
+    clamp4(actions, -1.0f, 1.0f);
+
+    // Domain randomized dt
+    float dt = DT * rndf(1.0f - DT_RNG, 1.0 + DT_RNG);
+
+    // update drone state
+    drone->prev_pos = drone->state.pos;
+    rk4_step(&drone->state, &drone->params, actions, dt);
+
+    // clamp and normalise for observations
+    clamp3(&drone->state.vel, -drone->params.max_vel, drone->params.max_vel);
+    clamp3(&drone->state.omega, -drone->params.max_omega, drone->params.max_omega);
+}
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
index ed1408ba6..c8c58bfff 100644
--- a/pufferlib/ocean/environment.py
+++ b/pufferlib/ocean/environment.py
@@ -131,6 +131,7 @@ def make_multiagent(buf=None, **kwargs):
     'matsci': 'Matsci',
     'memory': 'Memory',
     'boids': 'Boids',
+    'drone_delivery': 'DroneDelivery',
     'drone_race': 'DroneRace',
     'drone_swarm': 'DroneSwarm',
     'nmmo3': 'NMMO3',
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 63dc77a51..2746fb2c4 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -1,8 +1,7 @@
-## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full detail0
+# puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full details
 # This is the same as python -m pufferlib.pufferl [train | eval | sweep] [env_name] [optional args]
 # Distributed example: torchrun --standalone --nnodes=1 --nproc-per-node=6 -m pufferlib.pufferl train puffer_nmmo3
 
-import contextlib
 import warnings
 warnings.filterwarnings('error', category=RuntimeWarning)
 
@@ -136,9 +135,7 @@ def __init__(self, config, vecenv, policy, logger=None):
         self.uncompiled_policy = policy
         self.policy = policy
         if config['compile']:
-            self.policy = torch.compile(policy, mode=config['compile_mode'])
-            self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode'])
-            pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode'])
+            self.policy = torch.compile(policy, mode=config['compile_mode'], fullgraph=config['compile_fullgraph'])
 
         # Optimizer
         if config['optimizer'] == 'adam':
@@ -176,9 +173,7 @@ def __init__(self, config, vecenv, policy, logger=None):
 
         # Automatic mixed precision
         precision = config['precision']
-        self.amp_context = contextlib.nullcontext()
-        if config.get('amp', True) and config['device'] == 'cuda':
-            self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision))
+        self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision))
         if precision not in ('float32', 'bfloat16'):
             raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16')
 
@@ -222,8 +217,8 @@ def evaluate(self):
 
         if config['use_rnn']:
             for k in self.lstm_h:
-                self.lstm_h[k] = torch.zeros(self.lstm_h[k].shape, device=device)
-                self.lstm_c[k] = torch.zeros(self.lstm_c[k].shape, device=device)
+                self.lstm_h[k].zero_()
+                self.lstm_c[k].zero_()
 
         self.full_rows = 0
         while self.full_rows < self.segments:
@@ -418,12 +413,46 @@ def train(self):
 
             # Learn on accumulated minibatches
             profile('learn', epoch)
+
+            # Check for NaN in loss before backward
+            if torch.isnan(loss):
+                print(f"Warning: NaN loss detected, skipping backward pass")
+                self.optimizer.zero_grad()
+                # Reduce learning rate on NaN
+                for g in self.optimizer.param_groups:
+                    g['lr'] *= 0.5
+                print(f"Reduced learning rate to {self.optimizer.param_groups[0]['lr']}")
+                continue
+
             loss.backward()
+
+            # Check for NaN in gradients
+            has_nan_grad = False
+            for name, param in self.policy.named_parameters():
+                if param.grad is not None and torch.isnan(param.grad).any():
+                    print(f"Warning: NaN gradient in {name}")
+                    has_nan_grad = True
+                    param.grad = torch.nan_to_num(param.grad, nan=0.0)
+
             if (mb + 1) % self.accumulate_minibatches == 0:
+
+                # More aggressive gradient clipping for continuous actions
+                policy_obj = self.policy.policy if hasattr(self.policy, 'policy') else self.policy
+                if hasattr(policy_obj, 'is_continuous') and policy_obj.is_continuous:
+                    torch.nn.utils.clip_grad_norm_(self.policy.parameters(), min(config['max_grad_norm'], 0.5))
+                else:
+                    torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm'])
+
                 torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm'])
                 self.optimizer.step()
                 self.optimizer.zero_grad()
 
+                # Check for NaN in weights after update
+                policy_obj = self.policy.policy if hasattr(self.policy, 'policy') else self.policy
+                if hasattr(policy_obj, 'check_and_fix_weights'):
+                    if policy_obj.check_and_fix_weights():
+                        print("Warning: NaN in weights after optimizer step, weights reinitialized")
+
         # Reprioritize experience
         profile('train_misc', epoch)
         if config['anneal_lr']:
@@ -496,7 +525,7 @@ def close(self):
         self.utilization.stop()
         model_path = self.save_checkpoint()
         run_id = self.logger.run_id
-        path = os.path.join(self.config['data_dir'], f'{self.config["env"]}_{run_id}.pt')
+        path = os.path.join(self.config['data_dir'], f'{run_id}.pt')
         shutil.copy(model_path, path)
         return path
 
@@ -506,11 +535,11 @@ def save_checkpoint(self):
                return
  
         run_id = self.logger.run_id
-        path = os.path.join(self.config['data_dir'], f'{self.config["env"]}_{run_id}')
+        path = os.path.join(self.config['data_dir'], run_id)
         if not os.path.exists(path):
             os.makedirs(path)
 
-        model_name = f'model_{self.config["env"]}_{self.epoch:06d}.pt'
+        model_name = f'model_{self.epoch:06d}.pt'
         model_path = os.path.join(path, model_name)
         if os.path.exists(model_path):
             return model_path
@@ -527,7 +556,7 @@ def save_checkpoint(self):
         }
         state_path = os.path.join(path, 'trainer_state.pt')
         torch.save(state, state_path + '.tmp')
-        os.replace(state_path + '.tmp', state_path)
+        os.rename(state_path + '.tmp', state_path)
         return model_path
 
     def print_dashboard(self, clear=False, idx=[0],
@@ -869,6 +898,7 @@ def download(self):
         return f'{data_dir}/{model_file}'
  
 def train(env_name, args=None, vecenv=None, policy=None, logger=None):
+    print("train pufferl")
     args = args or load_config(env_name)
 
     # Assume TorchRun DDP is used if LOCAL_RANK is set
@@ -882,8 +912,11 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
         torch.cuda.set_device(local_rank)
         os.environ["CUDA_VISIBLE_DEVICES"] = str(local_rank)
 
+    print("  vecenv train() pufferl before")
+    print(f"  vecenv = {vecenv}")
     vecenv = vecenv or load_env(env_name, args)
-    policy = policy or load_policy(args, vecenv, env_name)
+    print("--vecenv train() pufferl after")
+    policy = policy or load_policy(args, vecenv)
 
     if 'LOCAL_RANK' in os.environ:
         args['train']['device'] = torch.cuda.current_device()
@@ -909,11 +942,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
 
     all_logs = []
     while pufferl.global_step < train_config['total_timesteps']:
-        if train_config['device'] == 'cuda':
-            torch.compiler.cudagraph_mark_step_begin()
         pufferl.evaluate()
-        if train_config['device'] == 'cuda':
-            torch.compiler.cudagraph_mark_step_begin()
         logs = pufferl.train()
 
         if logs is not None:
@@ -936,6 +965,8 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
     pufferl.print_dashboard()
     model_path = pufferl.close()
     pufferl.logger.close(model_path)
+    print("======================LAST BREAKPOINT END TRAIN IN PUFFERL.PY======================")
+    #breakpoint()
     return all_logs
 
 def eval(env_name, args=None, vecenv=None, policy=None):
@@ -947,7 +978,7 @@ def eval(env_name, args=None, vecenv=None, policy=None):
     args['vec'] = dict(backend=backend, num_envs=1)
     vecenv = vecenv or load_env(env_name, args)
 
-    policy = policy or load_policy(args, vecenv, env_name)
+    policy = policy or load_policy(args, vecenv)
     ob, info = vecenv.reset()
     driver = vecenv.driver_env
     num_agents = vecenv.observation_space.shape[0]
@@ -1070,13 +1101,18 @@ def autotune(args=None, env_name=None, vecenv=None, policy=None):
     pufferlib.vector.autotune(make_env, batch_size=args['train']['env_batch_size'])
  
 def load_env(env_name, args):
+    print("    load_env pufferl.py begin")
     package = args['package']
     module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}'
     env_module = importlib.import_module(module_name)
     make_env = env_module.env_creator(env_name)
-    return pufferlib.vector.make(make_env, env_kwargs=args['env'], **args['vec'])
+    print("      about to pufferlib.vector.make(make_env in load_env in pufferl.py")
+    thing = pufferlib.vector.make(make_env, env_kwargs=args['env'], **args['vec'])
+    print(f"    thing = {thing}")
+    print("    load_env pufferl.py end")
+    return thing
 
-def load_policy(args, vecenv, env_name=''):
+def load_policy(args, vecenv):
     package = args['package']
     module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}'
     env_module = importlib.import_module(module_name)
@@ -1107,7 +1143,7 @@ def load_policy(args, vecenv, env_name=''):
 
     load_path = args['load_model_path']
     if load_path == 'latest':
-        load_path = max(glob.glob(f"experiments/{env_name}*.pt"), key=os.path.getctime)
+        load_path = max(glob.glob("experiments/*.pt"), key=os.path.getctime)
 
     if load_path is not None:
         state_dict = torch.load(load_path, map_location=device)
@@ -1160,19 +1196,24 @@ def load_config(env_name):
             raise pufferlib.APIUsageError('No config for env_name {}'.format(env_name))
 
     # Dynamic help menu from config
-    def puffer_type(value):
-        try:
-            return ast.literal_eval(value)
-        except:
-            return value
+    def auto_type(value):
+        """Type inference for numeric args that use 'auto' as a default value"""
+        if value == 'auto': return value
+        if value.isnumeric(): return int(value)
+        return float(value)
 
     for section in p.sections():
         for key in p[section]:
+            try:
+                value = ast.literal_eval(p[section][key])
+            except:
+                value = p[section][key]
+
             fmt = f'--{key}' if section == 'base' else f'--{section}.{key}'
             parser.add_argument(
                 fmt.replace('_', '-'),
-                default=puffer_type(p[section][key]),
-                type=puffer_type
+                default=value,
+                type=auto_type if value == 'auto' else type(value)
             )
 
     parser.add_argument('-h', '--help', default=argparse.SUPPRESS,
@@ -1213,6 +1254,7 @@ def main():
         export(env_name=env_name)
     else:
         raise pufferlib.APIUsageError(err)
+    print("END OF MAIN")
 
 if __name__ == '__main__':
     main()
diff --git a/setup.py b/setup.py
index 631b051ef..7597fd18e 100644
--- a/setup.py
+++ b/setup.py
@@ -184,14 +184,15 @@ def run(self):
 # Find C extensions
 c_extensions = []
 if not NO_OCEAN:
-    c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True)
+    #c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True)
+    c_extension_paths = ['pufferlib/ocean/drone_delivery/binding.c']
     c_extensions = [
         Extension(
             path.rstrip('.c').replace('/', '.'),
             sources=[path],
             **extension_kwargs,
         )
-        for path in c_extension_paths if 'matsci' not in path
+        for path in c_extension_paths# if 'matsci' not in path
     ]
     c_extension_paths = [os.path.join(*path.split('/')[:-1]) for path in c_extension_paths]