opendilab
diff --git a/‎ding/entry/serial_entry_gail.py‎
Lines changed: 0 additions & 170 deletions b/‎ding/entry/serial_entry_gail.py‎
Lines changed: 0 additions & 170 deletions
diff --git a/‎dizoo/atari/config/serial/pong/pong_gail_dqn_config.py‎
Lines changed: 13 additions & 9 deletions b/‎dizoo/atari/config/serial/pong/pong_gail_dqn_config.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎dizoo/box2d/bipedalwalker/config/bipedalwalker_gail_sac_config.py‎
Lines changed: 12 additions & 5 deletions b/‎dizoo/box2d/bipedalwalker/config/bipedalwalker_gail_sac_config.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎dizoo/box2d/lunarlander/config/lunarlander_gail_dqn_config.py‎
Lines changed: 13 additions & 9 deletions b/‎dizoo/box2d/lunarlander/config/lunarlander_gail_dqn_config.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎dizoo/classic_control/cartpole/config/cartpole_dqn_gail_config.py‎
Lines changed: 19 additions & 7 deletions b/‎dizoo/classic_control/cartpole/config/cartpole_dqn_gail_config.py‎
Lines changed: 19 additions & 7 deletions
@@ -31,7 +31,7 @@
         # Users should add their own data path here. Data path should lead to a file to store data or load the stored data.
         # Absolute path is recommended.
         # In DI-engine, it is usually located in ``exp_name`` directory
-        # e.g. 'exp_name/expert_data.pkl'
+        # e.g. 'exp_name'
         data_path='data_path_placeholder',
     ),
     policy=dict(
@@ -80,13 +80,17 @@
     # or you can enter `ding -m serial_gail -c pong_gail_dqn_config.py -s 0`
     # then input the config you used to generate your expert model in the path mentioned above
     # e.g. pong_dqn_config.py
-    from ding.entry import serial_pipeline_gail
+    from ding.entry import serial_pipeline_reward_model_offpolicy, collect_demo_data
     from dizoo.atari.config.serial.pong import pong_dqn_config, pong_dqn_create_config
-    expert_main_config = pong_dqn_config
-    expert_create_config = pong_dqn_create_config
-    serial_pipeline_gail(
-        (main_config, create_config), (expert_main_config, expert_create_config),
-        max_env_step=1000000,
-        seed=0,
-        collect_data=True
+
+    # set your expert config here
+    expert_cfg = (pong_dqn_config, pong_dqn_create_config)
+    expert_data_path = main_config.reward_model.data_path + '/expert_data.pkl'
+
+    # collect expert data
+    collect_demo_data(
+        expert_cfg, seed=0, expert_data_path=expert_data_path, collect_count=main_config.reward_model.collect_count
     )
+
+    # train reward model
+    serial_pipeline_reward_model_offpolicy(main_config, create_config)
@@ -87,10 +87,17 @@
     # or you can enter `ding -m serial_gail -c bipedalwalker_sac_gail_config.py -s 0`
     # then input the config you used to generate your expert model in the path mentioned above
     # e.g. bipedalwalker_sac_config.py
-    from ding.entry import serial_pipeline_gail
+    from ding.entry import serial_pipeline_reward_model_offpolicy, collect_demo_data
     from dizoo.box2d.bipedalwalker.config import bipedalwalker_sac_config, bipedalwalker_sac_create_config
-    expert_main_config = bipedalwalker_sac_config
-    expert_create_config = bipedalwalker_sac_create_config
-    serial_pipeline_gail(
-        [main_config, create_config], [expert_main_config, expert_create_config], seed=0, collect_data=True
+
+    # set your expert config here
+    expert_cfg = (bipedalwalker_sac_config, bipedalwalker_sac_create_config)
+    expert_data_path = main_config.reward_model.data_path + '/expert_data.pkl'
+
+    # collect expert data
+    collect_demo_data(
+        expert_cfg, seed=0, expert_data_path=expert_data_path, collect_count=main_config.reward_model.collect_count
     )
+
+    # train reward model
+    serial_pipeline_reward_model_offpolicy(main_config, create_config)
@@ -29,7 +29,7 @@
         # Users should add their own data path here. Data path should lead to a file to store data or load the stored data.
         # Absolute path is recommended.
         # In DI-engine, it is usually located in ``exp_name`` directory
-        # e.g. 'exp_name/expert_data.pkl'
+        # e.g. 'exp_name'
         data_path='data_path_placeholder',
     ),
     policy=dict(
@@ -96,13 +96,17 @@
     # or you can enter `ding -m serial_gail -c lunarlander_dqn_gail_config.py -s 0`
     # then input the config you used to generate your expert model in the path mentioned above
     # e.g. lunarlander_dqn_config.py
-    from ding.entry import serial_pipeline_gail
+    from ding.entry import serial_pipeline_reward_model_offpolicy, collect_demo_data
     from dizoo.box2d.lunarlander.config import lunarlander_dqn_config, lunarlander_dqn_create_config
-    expert_main_config = lunarlander_dqn_config
-    expert_create_config = lunarlander_dqn_create_config
-    serial_pipeline_gail(
-        [main_config, create_config], [expert_main_config, expert_create_config],
-        max_env_step=1000000,
-        seed=0,
-        collect_data=True
+
+    # set your expert config here
+    expert_cfg = (lunarlander_dqn_config, lunarlander_dqn_create_config)
+    expert_data_path = main_config.reward_model.data_path + '/expert_data.pkl'
+
+    # collect expert data
+    collect_demo_data(
+        expert_cfg, seed=0, expert_data_path=expert_data_path, collect_count=main_config.reward_model.collect_count
     )
+
+    # train reward model
+    serial_pipeline_reward_model_offpolicy(main_config, create_config)
@@ -20,7 +20,10 @@
         # In DI-engine, it is ``exp_name/ckpt/ckpt_best.pth.tar``.
         # If collect_data is True, we will use this expert_model_path to collect expert data first, rather than we
         # will load data directly from user-defined data_path
-        expert_model_path='model_path_placeholder',
+        # data_path is the path to store expert policy data, which is used to train reward model
+        # so in general, data_path is the same as expert exp name
+        expert_model_path='cartpole_dqn_seed0/ckpt/ckpt_best.pth.tar',
+        data_path='cartpole_dqn_seed0',
         collect_count=1000,
     ),
     policy=dict(
@@ -68,13 +71,22 @@
     # or you can enter `ding -m serial_gail -c cartpole_dqn_gail_config.py -s 0`
     # then input the config you used to generate your expert model in the path mentioned above
     # e.g. cartpole_dqn_config.py
-    from ding.entry import serial_pipeline_gail
+    from ding.entry import serial_pipeline_reward_model_offpolicy, collect_demo_data
     from dizoo.classic_control.cartpole.config import cartpole_dqn_config, cartpole_dqn_create_config
+
+    # set expert config from policy config in dizoo
+    expert_cfg = (cartpole_dqn_config, cartpole_dqn_create_config)
     expert_main_config = cartpole_dqn_config
-    expert_create_config = cartpole_dqn_create_config
-    serial_pipeline_gail(
-        (main_config, create_config), (expert_main_config, expert_create_config),
-        max_env_step=1000000,
+    expert_data_path = main_config.reward_model.data_path + '/expert_data.pkl'
+
+    # collect expert data
+    collect_demo_data(
+        expert_cfg,
         seed=0,
-        collect_data=True
+        state_dict_path=main_config.reward_model.expert_model_path,
+        expert_data_path=expert_data_path,
+        collect_count=main_config.reward_model.collect_count
     )
+
+    # train reward model
+    serial_pipeline_reward_model_offpolicy((main_config, create_config))