Type APIs working with policies (#66)

mtrofin · web-flow · commit d5f076d95558 · 2022-07-26T10:10:04.000-07:00
This clarifies we use TFAgents rather than raw saved models, and helps
with readability and discoverability (the latter in an IDE in
particular)
diff --git a/compiler_opt/rl/agent_creators.py b/compiler_opt/rl/agent_creators.py
@@ -87,7 +87,7 @@ def create_agent(agent_name: constant.AgentName,
                  action_spec: types.NestedTensorSpec,
                  preprocessing_layer_creator: Callable[[types.TensorSpec],
                                                        tf.keras.layers.Layer],
-                 policy_network: types.Network):
+                 policy_network: types.Network) -> TFAgent:
   """Creates a tfa.agents.TFAgent object.
 
   Args:
diff --git a/compiler_opt/rl/policy_saver.py b/compiler_opt/rl/policy_saver.py
@@ -18,8 +18,11 @@
 import os
 
 import tensorflow as tf
+from tf_agents.policies import TFPolicy
 from tf_agents.policies import policy_saver
 
+from typing import Dict, Tuple
+
 OUTPUT_SIGNATURE = 'output_spec.json'
 
 _TYPE_CONVERSION_DICT = {
@@ -74,17 +77,18 @@ class PolicySaver(object):
   ```
   """
 
-  def __init__(self, policy_dict):
+  def __init__(self, policy_dict: Dict[str, TFPolicy]):
     """Initialize the PolicySaver object.
 
     Args:
       policy_dict: A dict mapping from policy name to policy.
     """
-    self._policy_saver_dict = {
-        policy_name: (policy_saver.PolicySaver(
-            policy, batch_size=1, use_nest_path_signatures=False), policy)
-        for policy_name, policy in policy_dict.items()
-    }
+    self._policy_saver_dict: Dict[str, Tuple[
+        policy_saver.PolicySaver, TFPolicy]] = {
+            policy_name: (policy_saver.PolicySaver(
+                policy, batch_size=1, use_nest_path_signatures=False), policy)
+            for policy_name, policy in policy_dict.items()
+        }
 
   def _save_policy(self, saver, path):
     """Writes policy, model weights and model_binding.txt to path/."""
@@ -149,7 +153,7 @@ def _write_output_signature(self, saver, path):
     with tf.io.gfile.GFile(os.path.join(path, OUTPUT_SIGNATURE), 'w') as f:
       f.write(json.dumps(output_list))
 
-  def save(self, root_dir):
+  def save(self, root_dir: str):
     """Writes policy and model_binding.txt to root_dir/policy_name/."""
     for policy_name, (saver, _) in self._policy_saver_dict.items():
       self._save_policy(saver, os.path.join(root_dir, policy_name))
diff --git a/compiler_opt/rl/train_bc.py b/compiler_opt/rl/train_bc.py
@@ -30,6 +30,11 @@
 from compiler_opt.rl import registry
 from compiler_opt.rl import trainer
 
+from tf_agents.agents import TFAgent
+from tf_agents.policies import TFPolicy
+
+from typing import Dict
+
 _ROOT_DIR = flags.DEFINE_string(
     'root_dir', os.getenv('TEST_UNDECLARED_OUTPUTS_DIR'),
     'Root directory for writing logs/summaries/checkpoints.')
@@ -58,11 +63,11 @@ def train_eval(agent_name=constant.AgentName.BEHAVIORAL_CLONE,
   preprocessing_layer_creator = problem_config.get_preprocessing_layer_creator()
 
   # Initialize trainer and policy saver.
-  tf_agent = agent_creators.create_agent(agent_name, time_step_spec,
-                                         action_spec,
-                                         preprocessing_layer_creator)
+  tf_agent: TFAgent = agent_creators.create_agent(agent_name, time_step_spec,
+                                                  action_spec,
+                                                  preprocessing_layer_creator)
   llvm_trainer = trainer.Trainer(root_dir=root_dir, agent=tf_agent)
-  policy_dict = {
+  policy_dict: Dict[str, TFPolicy] = {
       'saved_policy': tf_agent.policy,
       'saved_collect_policy': tf_agent.collect_policy,
   }
diff --git a/compiler_opt/rl/train_locally.py b/compiler_opt/rl/train_locally.py
@@ -25,6 +25,7 @@
 from absl import logging
 import gin
 import tensorflow as tf
+from tf_agents.agents import TFAgent
 from tf_agents.system import system_multiprocessing as multiprocessing
 from typing import List
 
@@ -77,9 +78,9 @@ def train_eval(agent_name=constant.AgentName.PPO,
   preprocessing_layer_creator = problem_config.get_preprocessing_layer_creator()
 
   # Initialize trainer and policy saver.
-  tf_agent = agent_creators.create_agent(agent_name, time_step_spec,
-                                         action_spec,
-                                         preprocessing_layer_creator)
+  tf_agent: TFAgent = agent_creators.create_agent(agent_name, time_step_spec,
+                                                  action_spec,
+                                                  preprocessing_layer_creator)
   # create the random network distillation object
   random_network_distillation = None
   if use_random_network_distillation:
diff --git a/compiler_opt/rl/trainer.py b/compiler_opt/rl/trainer.py
@@ -20,9 +20,12 @@
 
 import gin
 import tensorflow as tf
+from compiler_opt.rl import random_net_distillation
+from tf_agents.agents import TFAgent
 from tf_agents.policies import policy_loader
 
 from tf_agents.utils import common as common_utils
+from typing import Optional
 
 _INLINING_DEFAULT_KEY = 'inlining_default'
 
@@ -43,10 +46,11 @@ class Trainer(object):
 
   def __init__(
       self,
-      root_dir,
-      agent,
-      random_network_distillation=None,
-      warmstart_policy_dir=None,
+      root_dir: str,
+      agent: TFAgent,
+      random_network_distillation: Optional[
+          random_net_distillation.RandomNetworkDistillation] = None,
+      warmstart_policy_dir: Optional[str] = None,
       # Params for summaries and logging
       checkpoint_interval=10000,
       log_interval=100,
@@ -180,7 +184,7 @@ def _save_checkpoint(self):
   def global_step_numpy(self):
     return self._global_step.numpy()
 
-  def train(self, dataset_iter, monitor_dict, num_iterations):
+  def train(self, dataset_iter, monitor_dict, num_iterations: int):
     """Trains policy with data from dataset_iter for num_iterations steps."""
     self._reset_metrics()
     # context management is implemented in decorator