Transition from Models Hub to Datasets Hub for expert trajectories (#723)

ernestum · web-flow · commit b4523848c9dc · 2023-07-07T09:50:52.000-07:00
* Remove load_rolluts_from_huggingface and replace it with code in demonstrations.py that loads demonstrations from huggingface datasets instead of huggingface models.

* Allow specifying the repo_id directly in the loader_kwargs of the demonstrations ingredient and pass remaining loader_kwargs to datasets.load_dataset.

* Simplify demonstrations ingredient configuration and make it more flexible at the same time.

* Remove now obsolete test.

* Rename rollout_type to type and rollout_path to path and make default type "generated" to match previous behavior.

* Fix documentation of the Raises: section of get_exper_trajectories() and improve wording of ValueError when n_expert_demos is missing while generating trajectories.

* Simplify unnecessarily complex regexes to match raised exceptions during testing.

* Fix regex for ValueError to reflect updated ValueError string.

* Add an edge case to accommodate the fact that the HuggingFace Hub only has an expert for seals/Cartpole while the testdata folder only has an expert for normal Cartpole.

* Make it explicit that in some tests the rollout should be loaded locally from disk.

* Fix formatting issues in test_scripts.py

* Rename demonstrations.type to demonstrations.source to overcome name-clash with build-in keyword of python.

* Make sure to load local demonstrations in quickstart.sh

* Fix formatting issue.

* Ensure the readme contains the same snippet as the examples.
diff --git a/README.md b/README.md
@@ -74,10 +74,10 @@ From [examples/quickstart.sh:](examples/quickstart.sh)
 python -m imitation.scripts.train_rl with pendulum environment.fast policy_evaluation.fast rl.fast fast logging.log_dir=quickstart/rl/
 
 # Train GAIL from demonstrations. Tensorboard logs saved in output/ (default log directory).
-python -m imitation.scripts.train_adversarial gail with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz
+python -m imitation.scripts.train_adversarial gail with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.path=quickstart/rl/rollouts/final.npz demonstrations.source=local
 
 # Train AIRL from demonstrations. Tensorboard logs saved in output/ (default log directory).
-python -m imitation.scripts.train_adversarial airl with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz
+python -m imitation.scripts.train_adversarial airl with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.path=quickstart/rl/rollouts/final.npz demonstrations.source=local
 ```
 
 Tips:
diff --git a/benchmarking/example_airl_seals_ant_best_hp_eval.json b/benchmarking/example_airl_seals_ant_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_airl_seals_half_cheetah_best_hp_eval.json b/benchmarking/example_airl_seals_half_cheetah_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_airl_seals_hopper_best_hp_eval.json b/benchmarking/example_airl_seals_hopper_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_airl_seals_swimmer_best_hp_eval.json b/benchmarking/example_airl_seals_swimmer_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "expert": {
diff --git a/benchmarking/example_airl_seals_walker_best_hp_eval.json b/benchmarking/example_airl_seals_walker_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "expert": {
diff --git a/benchmarking/example_bc_seals_ant_best_hp_eval.json b/benchmarking/example_bc_seals_ant_best_hp_eval.json
@@ -20,7 +20,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json b/benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json
@@ -20,7 +20,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_bc_seals_hopper_best_hp_eval.json b/benchmarking/example_bc_seals_hopper_best_hp_eval.json
@@ -20,7 +20,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_bc_seals_swimmer_best_hp_eval.json b/benchmarking/example_bc_seals_swimmer_best_hp_eval.json
@@ -20,7 +20,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_bc_seals_walker_best_hp_eval.json b/benchmarking/example_bc_seals_walker_best_hp_eval.json
@@ -20,7 +20,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_dagger_seals_ant_best_hp_eval.json b/benchmarking/example_dagger_seals_ant_best_hp_eval.json
@@ -24,7 +24,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json b/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json
@@ -24,7 +24,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_dagger_seals_hopper_best_hp_eval.json b/benchmarking/example_dagger_seals_hopper_best_hp_eval.json
@@ -24,7 +24,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json b/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json
@@ -24,7 +24,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_dagger_seals_walker_best_hp_eval.json b/benchmarking/example_dagger_seals_walker_best_hp_eval.json
@@ -24,7 +24,8 @@
     "use_offline_rollouts": false
   },
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "policy": {
diff --git a/benchmarking/example_gail_seals_ant_best_hp_eval.json b/benchmarking/example_gail_seals_ant_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_gail_seals_half_cheetah_best_hp_eval.json b/benchmarking/example_gail_seals_half_cheetah_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_gail_seals_hopper_best_hp_eval.json b/benchmarking/example_gail_seals_hopper_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "reward": {
diff --git a/benchmarking/example_gail_seals_swimmer_best_hp_eval.json b/benchmarking/example_gail_seals_swimmer_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "expert": {
diff --git a/benchmarking/example_gail_seals_walker_best_hp_eval.json b/benchmarking/example_gail_seals_walker_best_hp_eval.json
@@ -6,7 +6,8 @@
   },
   "checkpoint_interval": 0,
   "demonstrations": {
-    "rollout_type": "ppo-huggingface",
+    "source": "huggingface",
+    "algo_name": "ppo",
     "n_expert_demos": null
   },
   "expert": {
diff --git a/benchmarking/util.py b/benchmarking/util.py
@@ -71,7 +71,7 @@ def clean_config_file(file: pathlib.Path, write_path: pathlib.Path, /) -> None:
     # remove key 'agent_path'
     config.pop("agent_path")
     config.pop("seed")
-    config.get("demonstrations", {}).pop("rollout_path")
+    config.get("demonstrations", {}).pop("path")
     config.get("expert", {}).get("loader_kwargs", {}).pop("path", None)
     env_name = config.pop("environment").pop("gym_id")
     config["environment"] = {"gym_id": env_name}
diff --git a/examples/quickstart.sh b/examples/quickstart.sh
@@ -4,7 +4,7 @@
 python -m imitation.scripts.train_rl with pendulum environment.fast policy_evaluation.fast rl.fast fast logging.log_dir=quickstart/rl/
 
 # Train GAIL from demonstrations. Tensorboard logs saved in output/ (default log directory).
-python -m imitation.scripts.train_adversarial gail with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz
+python -m imitation.scripts.train_adversarial gail with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.path=quickstart/rl/rollouts/final.npz demonstrations.source=local
 
 # Train AIRL from demonstrations. Tensorboard logs saved in output/ (default log directory).
-python -m imitation.scripts.train_adversarial airl with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz
+python -m imitation.scripts.train_adversarial airl with pendulum environment.fast demonstrations.fast policy_evaluation.fast rl.fast fast demonstrations.path=quickstart/rl/rollouts/final.npz demonstrations.source=local
diff --git a/src/imitation/data/serialize.py b/src/imitation/data/serialize.py
@@ -5,7 +5,6 @@
 from typing import Mapping, Sequence, cast
 
 import datasets
-import huggingface_sb3 as hfsb3
 import numpy as np
 
 from imitation.data import huggingface_utils
@@ -87,14 +86,3 @@ def load_with_rewards(path: AnyPath) -> Sequence[TrajectoryWithRew]:
         )
 
     return cast(Sequence[TrajectoryWithRew], data)
-
-
-def load_rollouts_from_huggingface(
-    algo_name: str,
-    env_name: str,
-    organization: str = "HumanCompatibleAI",
-) -> str:
-    model_name = hfsb3.ModelName(algo_name, hfsb3.EnvironmentName(env_name))
-    repo_id = hfsb3.ModelRepoId(organization, model_name)
-    filename = hfsb3.load_from_hub(repo_id, "rollouts.npz")
-    return filename
diff --git a/src/imitation/scripts/ingredients/demonstrations.py b/src/imitation/scripts/ingredients/demonstrations.py
diff --git a/src/imitation/scripts/ingredients/policy_evaluation.py b/src/imitation/scripts/ingredients/policy_evaluation.py
diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py