Skip to content

Commit edae760

Browse files
Eugene Vinitskyclaude
andcommitted
Log avg_distance_per_infraction (port from emerge/temp_training)
Adds the metric: avg_distance_per_infraction = total_fleet_distance / max(1, total_fleet_infractions) which tracks how far agents drive between offroad/collision/red-light events — a useful single-scalar driving-quality signal for wandb. The two underlying log fields already exist in puffer-4 and are already aggregated per-step in add_log; only the binding-side ratio was missing. my_log gains an `n` parameter and multiplies the per-agent-normalized log->total_* fields by n to recover raw fleet totals. The ratio itself is invariant to the 1/n scaling, but the un-normalization makes the fmaxf(1.0f, total_infractions) clamp behave correctly: it floors the denominator at "one infraction across the whole fleet", so a window with zero infractions reports total fleet distance instead of distance/epsilon. Both static_vec_log call sites in vecenv.h pass n. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 094a2af commit edae760

2 files changed

Lines changed: 18 additions & 4 deletions

File tree

sim/binding.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,20 @@ void my_init(Env *env, Dict *kwargs) {
182182
}
183183
}
184184

185-
void my_log(Log *log, Dict *out) {
185+
void my_log(Log *log, Dict *out, float n) {
186+
// static_vec_aggregate_logs divides every Log field by total agent count n,
187+
// so log->total_* are per-agent rates here. We want the fleet ratio
188+
// sum_distance / sum_infractions; the n cancels in that ratio so we
189+
// could compute it directly from the rates. We multiply by n anyway
190+
// to recover the raw fleet totals: this makes the fmaxf(1.0f, ...)
191+
// clamp meaningful (it floors the denominator at "1 infraction across
192+
// the whole fleet"), so a zero-infraction window reports total fleet
193+
// distance instead of distance / epsilon = absurd value.
194+
float total_distance_travelled = log->total_distance_travelled * n;
195+
float total_infractions = log->total_infractions * n;
196+
float avg_distance_per_infraction =
197+
total_distance_travelled / fmaxf(1.0f, total_infractions);
198+
186199
dict_set(out, "score", log->score);
187200
dict_set(out, "episode_return", log->episode_return);
188201
dict_set(out, "episode_length", log->episode_length);
@@ -191,6 +204,7 @@ void my_log(Log *log, Dict *out) {
191204
dict_set(out, "num_goals_reached", log->num_goals_reached);
192205
dict_set(out, "avg_speed_per_agent", log->avg_speed_per_agent);
193206
dict_set(out, "dnf_rate", log->dnf_rate);
207+
dict_set(out, "avg_distance_per_infraction", avg_distance_per_infraction);
194208
dict_set(out, "n", log->n);
195209
}
196210

src/vecenv.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ extern const char* cudaGetErrorString(cudaError_t);
238238

239239
// Forward declare env-provided functions (defined in binding.c after this include)
240240
void my_init(Env* env, Dict* kwargs);
241-
void my_log(Log* log, Dict* out);
241+
void my_log(Log* log, Dict* out, float n);
242242
void my_env_constants(void* env, Dict* out);
243243

244244
struct StaticThreading {
@@ -643,7 +643,7 @@ void static_vec_log(StaticVec* vec, Dict* out) {
643643
for (int i = 0; i < vec->size; i++) {
644644
memset(&envs[i].log, 0, sizeof(Log));
645645
}
646-
my_log(&aggregate, out);
646+
my_log(&aggregate, out, n);
647647
dict_set(out, "n", n);
648648
}
649649

@@ -653,7 +653,7 @@ void static_vec_eval_log(StaticVec* vec, Dict* out) {
653653
if (n == 0) {
654654
return;
655655
}
656-
my_log(&aggregate, out);
656+
my_log(&aggregate, out, n);
657657
dict_set(out, "n", n);
658658
}
659659

0 commit comments

Comments
 (0)