log_samples take list of metirc

DNXie · DNXie · commit 2b0496e2dd85 · 2025-11-16T17:08:27.000-08:00
diff --git a/apps/grpo/main.py b/apps/grpo/main.py
@@ -89,9 +89,11 @@ def to_dict(self, exclude: list[str] | None = None) -> dict[str, Any]:
             "request_len": self.request_len,
             "response_len": self.response_len,
             "pad_id": self.pad_id,
+            "ref_logprobs": self.ref_logprobs,
+            "completion": self.completion,
         }
 
-        if self.reward_breakdown is not None:
+        if self.reward_breakdown is not None and "reward_breakdown" not in exclude:
             result.update(self.reward_breakdown)
 
         if exclude:
diff --git a/src/forge/observability/metric_actors.py b/src/forge/observability/metric_actors.py
@@ -438,7 +438,7 @@ def extract_values_from_valuemesh(results) -> list[dict[str, Any]]:
                 m for m in reduced_metrics if m.reduction != Reduce.SAMPLE
             ]
             sample_metrics = {
-                m.key: m.value for m in reduced_metrics if m.reduction == Reduce.SAMPLE
+                m for m in reduced_metrics if m.reduction == Reduce.SAMPLE
             }
 
             # Log to global backends
diff --git a/src/forge/observability/metrics.py b/src/forge/observability/metrics.py
@@ -195,7 +195,7 @@ def record_episode_sample(table_name: str, episode):
         table_name (str): logging prefix (e.g. "rollout/sample").
         episode (Episode): episode object with filled attributes.
     """
-    sample = episode.to_dict()
+    sample = episode.to_dict(exclude=["ref_logprobs", "completion"])
     record_metric(table_name, sample, Reduce.SAMPLE)
 
 
@@ -675,9 +675,7 @@ def push(self, metric: Metric) -> None:
         for backend in self.per_rank_no_reduce_backends:
 
             if metric.reduction == Reduce.SAMPLE:
-                # Wrap singleton Metric into expected {key: [list_of_dicts]} format
-                sample = {metric.key: [metric.value]}
-                asyncio.create_task(backend.log_samples(sample, self.global_step))
+                asyncio.create_task(backend.log_samples([metric], self.global_step))
             else:
                 backend.log_stream(metric=metric, global_step=self.global_step)
 
@@ -882,11 +880,12 @@ def log_stream(self, metric: Metric, global_step: int, *args, **kwargs) -> None:
     async def finish(self) -> None:
         pass
 
-    async def log_samples(self, samples: Dict[str, List[dict]], step: int) -> None:
+    async def log_samples(self, samples: List[Metric], step: int) -> None:
         """Pretty-print sample-level logs to console."""
 
         logger.info(f"==========  SAMPLE LOGS STEP {step} ==========")
-        for table_name, table_rows in samples.items():
+        for sample in samples:
+            table_name, table_rows = sample.key, sample.value
             logger.info(f"[{table_name}] ({len(table_rows)} samples)")
             logger.info(json.dumps(table_rows, indent=2, ensure_ascii=False))
         logger.info("==============================================\n")
@@ -1038,14 +1037,15 @@ def log_stream(self, metric: Metric, global_step: int, *args, **kwargs) -> None:
         # note: here we dont use step since wandb keeps only the latest value for each step
         self.run.log(log_data)
 
-    async def log_samples(self, samples: Dict[str, List[dict]], step: int) -> None:
+    async def log_samples(self, samples: List[Metric], step: int) -> None:
         """Log sample-level data incrementally to persistent WandB Tables."""
         import wandb
 
         if not self.run:
             return
 
-        for table_name, table_rows in samples.items():
+        for sample in samples:
+            table_name, table_rows = sample.key, sample.value
             if not table_rows:
                 continue
 

Original file line number	Diff line number	Diff line change
`@@ -438,7 +438,7 @@ def extract_values_from_valuemesh(results) -> list[dict[str, Any]]:`
`438`	`438`	`m for m in reduced_metrics if m.reduction != Reduce.SAMPLE`
`439`	`439`	`]`
`440`	`440`	`sample_metrics = {`
`441`		`- m.key: m.value for m in reduced_metrics if m.reduction == Reduce.SAMPLE`
	`441`	`+ m for m in reduced_metrics if m.reduction == Reduce.SAMPLE`
`442`	`442`	`}`
`443`	`443`
`444`	`444`	`# Log to global backends`