[Online] rich wandb logger

muzhancun · muzhancun · commit ecdcb114080b · 2025-06-16T15:24:47.000+08:00
diff --git a/minestudio/online/rollout/env_worker.py b/minestudio/online/rollout/env_worker.py
@@ -181,14 +181,15 @@ def reset_state(self) -> Dict[str, torch.Tensor]:
         self.conn.send(("reset_state", None))
         return self.conn.recv()
     
-    def report_rewards(self, rewards: np.ndarray):
+    def report_rewards(self, rewards: np.ndarray, task: Optional[str] = None):
         """
         Sends the rewards for an episode to the main process.
 
         :param rewards: A NumPy array of rewards for the episode.
+        :param task: An optional string specifying the task configuration.
         :returns: The result from the main process.
         """
-        self.conn.send(("report_rewards", rewards))
+        self.conn.send(("report_rewards", rewards, task))
         return self.conn.recv()
 
     def run(self) -> None:
@@ -251,7 +252,7 @@ def run(self) -> None:
                         video_writer.close_video()
                     #_result = self.report_rewards(np.array(reward_list))
                     
-                    _result, episode_info = self.report_rewards(np.array(reward_list))
+                    _result, episode_info = self.report_rewards(np.array(reward_list), obs.get("task", None))
                     obs["online_info"] = episode_info
                     
                     if _result is not None:
diff --git a/minestudio/online/rollout/episode_statistics.py b/minestudio/online/rollout/episode_statistics.py
@@ -5,7 +5,7 @@
 import numpy as np
 from collections import deque
 import logging
-
+from rich import print
 logger = logging.getLogger("ray")
 @ray.remote
 class EpisodeStatistics:
@@ -47,18 +47,30 @@ def log_statistics(self, step: int, record_next_episode: bool):
             num_test_tasks = 0
             sum_discounted_reward = 0
             sum_episode_length = 0
+            num_valid_episode_length = 0  # Track valid episode length count
+
+            # [Change log] Move the logging of step to the beginning by zhancun
+            wandb_logger.log({
+                "episode_statistics/step": step, 
+            })  
 
             for task in self.sum_rewards_metrics.keys():
                 mean_sum_reward = self.sum_rewards_metrics[task].compute()
                 mean_discounted_reward = self.discounted_rewards_metrics[task].compute()
                 mean_episode_length = self.episode_lengths_metrics[task].compute()
 
+                # Log individual task metrics
+                if not np.isnan(mean_sum_reward):
+                    wandb_logger.log({
+                        f"episode_statistics/{task}/sum_reward": mean_sum_reward,
+                        f"episode_statistics/{task}/discounted_reward": mean_discounted_reward,
+                        f"episode_statistics/{task}/episode_length": mean_episode_length,
+                    })
+                    print(f"Task {task} - Sum Reward: {mean_sum_reward}, Discounted Reward: {mean_discounted_reward}, Episode Length: {mean_episode_length}")
+
                 self.sum_rewards_metrics[task].reset()
                 self.discounted_rewards_metrics[task].reset()
                 self.episode_lengths_metrics[task].reset()
-                wandb_logger.log({
-                     "episode_statistics/step": step, 
-                })
 
                 if not np.isnan(mean_sum_reward) and "4train" in task:
                     sum_train_reward += mean_sum_reward
@@ -67,22 +79,26 @@ def log_statistics(self, step: int, record_next_episode: bool):
                 if not np.isnan(mean_sum_reward) and "4test" in task:
                     sum_test_reward += mean_sum_reward
                     num_test_tasks += 1
-                sum_episode_length += mean_episode_length
+                
+                # Only add episode length if it's not NaN
+                if not np.isnan(mean_episode_length):
+                    sum_episode_length += mean_episode_length
+                    num_valid_episode_length += 1
 
             self.episode_info = {
                 "steps": step,
                 "episode_count": self.acc_episode_count,
                 "mean_sum_reward": sum_train_reward / num_train_tasks if num_train_tasks > 0 else 0,
                 "mean_discounted_reward": sum_discounted_reward / num_train_tasks if num_train_tasks > 0 else 0,
-                "mean_episode_length": sum_episode_length / (num_train_tasks + num_test_tasks) if num_train_tasks + num_test_tasks > 0 else 0
+                "mean_episode_length": sum_episode_length / num_valid_episode_length if num_valid_episode_length > 0 else 0
             }
             wandb_logger.log({
                 "episode_statistics/steps": step,
                 "episode_statistics/episode_count": self.acc_episode_count,
                 "episode_statistics/mean_sum_reward": sum_train_reward / num_train_tasks if num_train_tasks > 0 else 0,
                 "episode_statistics/mean_test_sum_reward": sum_test_reward / num_test_tasks if num_test_tasks > 0 else 0,
                 "episode_statistics/mean_discounted_reward": sum_discounted_reward / num_train_tasks if num_train_tasks > 0 else 0,
-                "episode_statistics/mean_episode_length": sum_episode_length / (num_train_tasks + num_test_tasks) if num_train_tasks + num_test_tasks > 0 else 0
+                "episode_statistics/mean_episode_length": sum_episode_length / num_valid_episode_length if num_valid_episode_length > 0 else 0
             })
 
             self.acc_episode_count = 0
diff --git a/minestudio/online/rollout/rollout_worker.py b/minestudio/online/rollout/rollout_worker.py
@@ -232,8 +232,9 @@ def poll_environments(self):
                     conn.send("ok")
                 elif args[0] == "report_rewards":
                     rewards = args[1]
+                    task = args[2] if len(args) > 2 else None
                     if self.episode_statistics is not None:
-                        video_step, episode_info = ray.get(self.episode_statistics.report_episode.remote(rewards))
+                        video_step, episode_info = ray.get(self.episode_statistics.report_episode.remote(rewards, its_specfg = task if task is not None else ""))
                         if video_step is not None and video_step > self.video_step:
                             self.video_step = video_step
                             conn.send((video_step, episode_info))
diff --git a/minestudio/online/trainer/ppotrainer.py b/minestudio/online/trainer/ppotrainer.py
@@ -22,6 +22,7 @@
 from minestudio.online.utils import auto_stack
 import uuid
 import copy
+import pickle
 import torch.distributed as dist
 
 VERBOSE = False
@@ -595,8 +596,8 @@ def ppo_update(self,
                 #save model
                 torch.save(self.inner_model.state_dict(), str(checkpoint_dir / "model.ckpt"))
                 torch.save(self.optimizer.state_dict(), str(checkpoint_dir / "optimizer.ckpt"))
-                with open(checkpoint_dir / "whole_config.py", "w") as f:
-                    f.write(self.whole_config)
+                with open(checkpoint_dir / "whole_config.pkl", "wb") as f:
+                    pickle.dump(self.whole_config, f)
 
                 if (
                     self.last_checkpoint_dir
diff --git a/minestudio/simulator/callbacks/callback.py b/minestudio/simulator/callbacks/callback.py
@@ -1,7 +1,7 @@
 '''
 Date: 2025-01-06 17:32:04
-LastEditors: caishaofei-mus1 1744260356@qq.com
-LastEditTime: 2025-05-09 14:54:09
+LastEditors: muzhancun muzhancun@stu.pku.edu.cn
+LastEditTime: 2025-06-15 17:02:18
 FilePath: /MineStudio/minestudio/simulator/callbacks/callback.py
 '''
 import os
@@ -152,7 +152,8 @@ def after_render(self, sim, image):
         """
         return image
 
-
+    def __repr__(self):
+        return f"{self.__class__.__name__}()"
 class Compose(MinecraftCallback):
     """
     A callback that composes multiple callbacks into a single callback.
@@ -289,4 +290,4 @@ def after_render(self, sim, image):
         for callback in self.activate_callbacks:
             image = callback.before_render(sim, image)
         return image
-
+    
diff --git a/minestudio/simulator/callbacks/fast_reset.py b/minestudio/simulator/callbacks/fast_reset.py
@@ -1,13 +1,16 @@
 '''
 Date: 2024-11-11 16:15:32
 LastEditors: muzhancun muzhancun@stu.pku.edu.cn
-LastEditTime: 2025-05-26 20:56:38
+LastEditTime: 2025-06-12 19:47:39
 FilePath: /MineStudio/minestudio/simulator/callbacks/fast_reset.py
 '''
 import random
 import numpy as np
 from minestudio.simulator.callbacks.callback import MinecraftCallback
+from minestudio.utils.register import Registers
+from rich import print
 
+@Registers.simulator_callback.register
 class FastResetCallback(MinecraftCallback):
     """Implements a fast reset mechanism for the Minecraft simulator.
 
@@ -25,6 +28,28 @@ class FastResetCallback(MinecraftCallback):
     :type start_weather: str, optional
     """
 
+    def create_from_conf(source):
+        """Creates a FastReset from a configuration.
+
+        Loads data from the source (file path or dict).
+
+        :param source: Configuration source.
+        :type source: Dict
+        :returns: FastResetCallback instance or None if no valid configuration is found.
+        :rtype: Optional[FastResetCallback]
+        """
+        essential_keys = ['biomes', 'random_tp_range']
+        for key in essential_keys:
+            if key not in source:
+                print(f"[red]Missing {key} for FastResetCallback, skipping.[/red]")
+                return None
+        return FastResetCallback(
+            biomes=source['biomes'],
+            random_tp_range=source['random_tp_range'],
+            start_time=source.get('start_time', 0),
+            start_weather=source.get('start_weather', 'clear')
+        )
+
     def __init__(self, biomes, random_tp_range, start_time=0, start_weather='clear'):
         """Initializes the FastResetCallback.
 
diff --git a/minestudio/simulator/callbacks/judgereset.py b/minestudio/simulator/callbacks/judgereset.py
@@ -1,11 +1,20 @@
+'''
+Date: 2025-06-12 19:46:03
+LastEditors: muzhancun muzhancun@stu.pku.edu.cn
+LastEditTime: 2025-06-12 19:47:07
+FilePath: /MineStudio/minestudio/simulator/callbacks/judgereset.py
+'''
 from minestudio.simulator.callbacks.callback import MinecraftCallback
 from minestudio.simulator.utils import MinecraftGUI, GUIConstants
 from minestudio.simulator.utils.gui import PointDrawCall
+from minestudio.utils.register import Registers
+from rich import print
 
 import time
 from typing import Dict, Literal, Optional, Callable, Tuple
 import cv2
 
+@Registers.simulator_callback.register
 class JudgeResetCallback(MinecraftCallback):
     """Resets the environment if a time limit is reached or episode terminates.
 
@@ -17,6 +26,20 @@ class JudgeResetCallback(MinecraftCallback):
                        Defaults to 600.
     :type time_limit: int, optional
     """
+
+    def create_from_conf(source: Dict) -> Optional['JudgeResetCallback']:
+        """Creates a JudgeResetCallback from a configuration.
+        
+        :param source: Configuration source.
+        :type source: Dict
+        :returns: JudgeResetCallback instance or None if no valid configuration is found.
+        :rtype: Optional[JudgeResetCallback]
+        """
+        if 'time_limit' not in source:
+            print("[red]Missing 'time_limit' for JudgeResetCallback, skipping.[/red]")
+            return None
+        return JudgeResetCallback(time_limit=source['time_limit'])
+
     def __init__(self, time_limit: int = 600):
         """Initializes the JudgeResetCallback.
 
diff --git a/minestudio/simulator/callbacks/prev_action.py b/minestudio/simulator/callbacks/prev_action.py
@@ -1,7 +1,7 @@
 '''
 Date: 2024-11-11 19:31:53
 LastEditors: muzhancun muzhancun@stu.pku.edu.cn
-LastEditTime: 2025-05-26 21:18:13
+LastEditTime: 2025-06-12 19:48:23
 FilePath: /MineStudio/minestudio/simulator/callbacks/prev_action.py
 '''
 import os
@@ -16,14 +16,30 @@
         "hotbar.4", "hotbar.5", "hotbar.6", "hotbar.7", "hotbar.8", "hotbar.9",
         "camera"]
 
-# @Registers.simulator_callback.register
+@Registers.simulator_callback.register
 class PrevActionCallback(MinecraftCallback):
     """
     A callback that stores the previous action and adds it to the observation.
 
     This callback is useful for tasks where the agent needs to know its previous
     action to make a decision.
     """
+
+    def create_from_conf(source):
+        """Creates a PrevActionCallback from a configuration.
+
+        Loads data from the source (file path or dict).
+
+        :param source: Configuration source.
+        :type source: Dict
+        :returns: PrevActionCallback or None if no valid configuration is found.
+        :rtype: Optional[PrevActionCallback]
+        """
+        if 'use_prev_action' in source and source['use_prev_action']:
+            return PrevActionCallback()
+        else:
+            print("[red]use_prev_action is not set to True, skipping PrevActionCallback.[/red]")
+            return None
     
     def __init__(self):
         """