add policy typehints

xuanlinli17 · xuanlinli17 · commit 62f9e8d5f3ba · 2024-05-01T17:36:32.000-07:00
diff --git a/simpler_env/policies/octo/octo_model.py b/simpler_env/policies/octo/octo_model.py
@@ -1,5 +1,5 @@
 from collections import deque
-from typing import Optional
+from typing import Optional, Sequence
 import os
 
 import jax
@@ -16,15 +16,15 @@
 class OctoInference:
     def __init__(
         self,
-        model_type="octo-base",
-        policy_setup="widowx_bridge",
-        horizon=2,
-        pred_action_horizon=4,
-        exec_horizon=1,
-        image_size=256,
-        action_scale=1.0,
-        init_rng=0,
-    ):
+        model_type: str = "octo-base",
+        policy_setup: str = "widowx_bridge",
+        horizon: int = 2,
+        pred_action_horizon: int = 4,
+        exec_horizon: int = 1,
+        image_size: int = 256,
+        action_scale: float = 1.0,
+        init_rng: int = 0,
+    ) -> None:
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
         if policy_setup == "widowx_bridge":
             dataset_id = "bridge_dataset"
@@ -118,7 +118,7 @@ def __init__(
         self.action_ensemble_temp = action_ensemble_temp
         self.rng = jax.random.PRNGKey(init_rng)
         for _ in range(5):
-            # to match octo server's inference seeds
+            # the purpose of this for loop is just to match octo server's inference seeds
             self.rng, _key = jax.random.split(self.rng)  # each shape [2,]
 
         self.sticky_action_is_on = False
@@ -136,7 +136,7 @@ def __init__(
             self.action_ensembler = None
         self.num_image_history = 0
 
-    def _resize_image(self, image):
+    def _resize_image(self, image: np.ndarray) -> np.ndarray:
         image = tf.image.resize(
             image,
             size=(self.image_size, self.image_size),
@@ -146,24 +146,25 @@ def _resize_image(self, image):
         image = tf.cast(tf.clip_by_value(tf.round(image), 0, 255), tf.uint8).numpy()
         return image
 
-    def _add_image_to_history(self, image):
+    def _add_image_to_history(self, image: np.ndarray) -> None:
         self.image_history.append(image)
+        # Alternative implementation below; but looks like for real eval, filling the entire buffer at the first step is not necessary
         # if self.num_image_history == 0:
         #     self.image_history.extend([image] * self.horizon)
         # else:
         #     self.image_history.append(image)
         self.num_image_history = min(self.num_image_history + 1, self.horizon)
 
-    def _obtain_image_history_and_mask(self):
+    def _obtain_image_history_and_mask(self) -> tuple[np.ndarray, np.ndarray]:
         images = np.stack(self.image_history, axis=0)
         horizon = len(self.image_history)
-        pad_mask = np.ones(horizon, dtype=np.float64)  # note: this is not np.bool
+        pad_mask = np.ones(horizon, dtype=np.float64)  # note: this should be of float type, not a bool type
         pad_mask[: horizon - min(horizon, self.num_image_history)] = 0
-        # pad_mask = np.ones(self.horizon, dtype=np.float64) # note: this is not np.bool
+        # pad_mask = np.ones(self.horizon, dtype=np.float64) # note: this should be of float type, not a bool type
         # pad_mask[:self.horizon - self.num_image_history] = 0
         return images, pad_mask
 
-    def reset(self, task_description):
+    def reset(self, task_description: str) -> None:
         if self.automatic_task_creation:
             self.task = self.model.create_tasks(texts=[task_description])
         else:
@@ -180,7 +181,7 @@ def reset(self, task_description):
         # self.gripper_is_closed = False
         self.previous_gripper_action = None
 
-    def step(self, image, task_description: Optional[str] = None, *args, **kwargs):
+    def step(self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
         """
         Input:
             image: np.ndarray of shape (H, W, 3), uint8
@@ -299,7 +300,7 @@ def step(self, image, task_description: Optional[str] = None, *args, **kwargs):
 
         return raw_action, action
 
-    def visualize_epoch(self, predicted_raw_actions, images, save_path):
+    def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
         images = [self._resize_image(image) for image in images]
         ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
 
diff --git a/simpler_env/policies/octo/octo_server_model.py b/simpler_env/policies/octo/octo_server_model.py
@@ -1,5 +1,5 @@
 from base64 import b64decode, b64encode
-from typing import Optional
+from typing import Optional, Sequence, Any
 import json
 import time
 import urllib
@@ -71,11 +71,11 @@ def patch():
 class OctoServerInference:
     def __init__(
         self,
-        model_type="octo-base",
-        policy_setup="widowx_bridge",
-        image_size=256,
-        action_scale=1.0,
-    ):
+        model_type: str = "octo-base",
+        policy_setup: str = "widowx_bridge",
+        image_size: str = 256,
+        action_scale: float = 1.0,
+    ) -> None:
         if policy_setup == "widowx_bridge":
             self.sticky_gripper_num_repeat = 1
             self.dataset_name = "bridge_dataset"
@@ -97,7 +97,7 @@ def __init__(
         self.action_scale = action_scale
         self.task = None
 
-    def _resize_image(self, image):
+    def _resize_image(self, image: np.ndarray) -> np.ndarray:
         image = tf.image.resize(
             image,
             size=(self.image_size, self.image_size),
@@ -107,7 +107,7 @@ def _resize_image(self, image):
         image = tf.cast(tf.clip_by_value(tf.round(image), 0, 255), tf.uint8).numpy()
         return image
 
-    def reset(self, task_description):
+    def reset(self, task_description: str) -> None:
         self.task = task_description
         self.sticky_action_is_on = False
         self.gripper_action_repeat = 0
@@ -120,7 +120,7 @@ def reset(self, task_description):
         )
         time.sleep(1.0)
 
-    def _get_fake_pay_load(self, image_primary, text, modality="l"):
+    def _get_fake_pay_load(self, image_primary: np.ndarray, text: str, modality: str = "l") -> dict:
         payload = {
             "dataset_name": self.dataset_name,
             "observation": {
@@ -133,7 +133,7 @@ def _get_fake_pay_load(self, image_primary, text, modality="l"):
         fake_pay_load = {"use_this": dumps(payload)}
         return fake_pay_load
 
-    def _query_for_action(self, image_primary, text, goal, modality="l"):
+    def _query_for_action(self, image_primary: np.ndarray, text: str, goal: Optional[Any], modality="l") -> list:
         del goal
         # _ = requests.post(urllib.parse.urljoin("http://ari.bair.berkeley.edu:8000", "reset"),)
         fake_pay_load = self._get_fake_pay_load(image_primary, text, modality)
@@ -145,7 +145,7 @@ def _query_for_action(self, image_primary, text, goal, modality="l"):
         # print(reply)
         return loads(reply)
 
-    def step(self, image, task_description: Optional[str] = None, *args, **kwargs):
+    def step(self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
         """
         Input:
             image: np.ndarray of shape (H, W, 3), uint8
@@ -239,7 +239,7 @@ def step(self, image, task_description: Optional[str] = None, *args, **kwargs):
 
         return raw_action, action
 
-    def visualize_epoch(self, predicted_raw_actions, images, save_path):
+    def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str):
         images = [self._resize_image(image) for image in images]
         ACTION_DIM_LABELS = ["x", "y", "z", "yaw", "pitch", "roll", "grasp"]
 
diff --git a/simpler_env/policies/rt1/rt1_model.py b/simpler_env/policies/rt1/rt1_model.py
@@ -1,5 +1,5 @@
 from collections import defaultdict
-from typing import Optional
+from typing import Optional, Sequence
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -14,13 +14,13 @@
 class RT1Inference:
     def __init__(
         self,
-        saved_model_path="rt_1_x_tf_trained_for_002272480_step",
-        lang_embed_model_path="https://tfhub.dev/google/universal-sentence-encoder-large/5",
-        image_width=320,
-        image_height=256,
-        action_scale=1.0,
-        policy_setup="google_robot",
-    ):
+        saved_model_path: str = "rt_1_x_tf_trained_for_002272480_step",
+        lang_embed_model_path: str = "https://tfhub.dev/google/universal-sentence-encoder-large/5",
+        image_width: int = 320,
+        image_height: int = 256,
+        action_scale: float = 1.0,
+        policy_setup: str = "google_robot",
+    ) -> None:
         self.lang_embed_model = hub.load(lang_embed_model_path)
         self.tfa_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
             model_path=saved_model_path,
@@ -53,10 +53,10 @@ def __init__(
 
     @staticmethod
     def _rescale_action_with_bound(
-        actions: np.ndarray,
+        actions: np.ndarray | tf.Tensor,
         low: float,
         high: float,
-        safety_margin: float = 0,
+        safety_margin: float = 0.0,
         post_scaling_max: float = 1.0,
         post_scaling_min: float = -1.0,
     ) -> np.ndarray:
@@ -68,7 +68,7 @@ def _rescale_action_with_bound(
             post_scaling_max - safety_margin,
         )
 
-    def _unnormalize_action_widowx_bridge(self, action):
+    def _unnormalize_action_widowx_bridge(self, action: dict[str, np.ndarray | tf.Tensor]) -> dict[str, np.ndarray]:
         action["world_vector"] = self._rescale_action_with_bound(
             action["world_vector"],
             low=-1.75,
@@ -85,7 +85,7 @@ def _unnormalize_action_widowx_bridge(self, action):
         )
         return action
 
-    def _initialize_model(self):
+    def _initialize_model(self) -> None:
         # Perform one step of inference using dummy input to trace the tensoflow graph
         # Obtain a dummy observation, where the features are all 0
         self.observation = tf_agents.specs.zero_spec_nest(
@@ -98,25 +98,25 @@ def _initialize_model(self):
         # Run inference using the policy
         _action = self.tfa_policy.action(self.tfa_time_step, self.policy_state)
 
-    def _resize_image(self, image):
+    def _resize_image(self, image: np.ndarray | tf.Tensor) -> tf.Tensor:
         image = tf.image.resize_with_pad(image, target_width=self.image_width, target_height=self.image_height)
         image = tf.cast(image, tf.uint8)
         return image
 
-    def _initialize_task_description(self, task_description):
+    def _initialize_task_description(self, task_description: Optional[str] = None) -> None:
         if task_description is not None:
             self.task_description = task_description
             self.task_description_embedding = self.lang_embed_model([task_description])[0]
         else:
             self.task_description = ""
             self.task_description_embedding = tf.zeros((512,), dtype=tf.float32)
 
-    def reset(self, task_description):
+    def reset(self, task_description: str) -> None:
         self._initialize_model()
         self._initialize_task_description(task_description)
 
     @staticmethod
-    def _small_action_filter_google_robot(raw_action, arm_movement=False, gripper=True):
+    def _small_action_filter_google_robot(raw_action: dict[str, np.ndarray | tf.Tensor], arm_movement: bool = False, gripper: bool = True) -> dict[str, np.ndarray | tf.Tensor]:
         # small action filtering for google robot
         if arm_movement:
             raw_action["world_vector"] = tf.where(
@@ -147,7 +147,7 @@ def _small_action_filter_google_robot(raw_action, arm_movement=False, gripper=Tr
             )
         return raw_action
 
-    def step(self, image, task_description: Optional[str] = None):
+    def step(self, image: np.ndarray, task_description: Optional[str] = None) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
         """
         Input:
             image: np.ndarray of shape (H, W, 3), uint8
@@ -179,6 +179,8 @@ def step(self, image, task_description: Optional[str] = None):
             raw_action = self._small_action_filter_google_robot(raw_action, arm_movement=False, gripper=True)
         if self.unnormalize_action:
             raw_action = self.unnormalize_action_fxn(raw_action)
+        for k in raw_action.keys():
+            raw_action[k] = np.asarray(raw_action[k])
 
         # process raw_action to obtain the action to be sent to the maniskill2 environment
         action = {}
@@ -227,7 +229,7 @@ def step(self, image, task_description: Optional[str] = None):
 
         return raw_action, action
 
-    def visualize_epoch(self, predicted_raw_actions, images, save_path):
+    def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
         images = [self._resize_image(image) for image in images]
         predicted_action_name_to_values_over_time = defaultdict(list)
         figure_layout = [