google
diff --git a/‎compiler_opt/rl/agent_creators.py renamed to ‎compiler_opt/rl/agent_config.py b/‎compiler_opt/rl/agent_creators.py renamed to ‎compiler_opt/rl/agent_config.py
diff --git a/‎compiler_opt/rl/agent_creators_test.py renamed to ‎compiler_opt/rl/agent_config_test.py
Lines changed: 8 additions & 8 deletions b/‎compiler_opt/rl/agent_creators_test.py renamed to ‎compiler_opt/rl/agent_config_test.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎compiler_opt/rl/data_reader.py
Lines changed: 24 additions & 24 deletions b/‎compiler_opt/rl/data_reader.py
Lines changed: 24 additions & 24 deletions
diff --git a/‎compiler_opt/rl/data_reader_test.py
Lines changed: 11 additions & 11 deletions b/‎compiler_opt/rl/data_reader_test.py
Lines changed: 11 additions & 11 deletions
diff --git a/‎compiler_opt/rl/distributed/ppo_collect_lib.py
Lines changed: 8 additions & 8 deletions b/‎compiler_opt/rl/distributed/ppo_collect_lib.py
Lines changed: 8 additions & 8 deletions
diff --git a/‎compiler_opt/rl/distributed/ppo_eval_lib.py
Lines changed: 5 additions & 5 deletions b/‎compiler_opt/rl/distributed/ppo_eval_lib.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎compiler_opt/rl/distributed/ppo_reverb_server.py
Lines changed: 1 addition & 1 deletion b/‎compiler_opt/rl/distributed/ppo_reverb_server.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler_opt/rl/distributed/ppo_train_lib.py
Lines changed: 3 additions & 3 deletions b/‎compiler_opt/rl/distributed/ppo_train_lib.py
Lines changed: 3 additions & 3 deletions
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tests for compiler_opt.rl.agent_creators."""
+"""Tests for compiler_opt.rl.agent_config."""
 
 import gin
 import tensorflow as tf
@@ -24,7 +24,7 @@
 from tf_agents.specs import tensor_spec
 from tf_agents.trajectories import time_step
 
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 
 
 def _observation_processing_layer(obs_spec):
@@ -54,8 +54,8 @@ def test_create_behavioral_cloning_agent(self):
     gin.bind_parameter('create_agent.policy_network', q_network.QNetwork)
     gin.bind_parameter('BehavioralCloningAgent.optimizer',
                        tf.compat.v1.train.AdamOptimizer())
-    tf_agent = agent_creators.create_agent(
-        agent_creators.BCAgentConfig(
+    tf_agent = agent_config.create_agent(
+        agent_config.BCAgentConfig(
             time_step_spec=self._time_step_spec, action_spec=self._action_spec),
         preprocessing_layer_creator=_observation_processing_layer)
     self.assertIsInstance(tf_agent,
@@ -64,8 +64,8 @@ def test_create_behavioral_cloning_agent(self):
   def test_create_dqn_agent(self):
     gin.bind_parameter('create_agent.policy_network', q_network.QNetwork)
     gin.bind_parameter('DqnAgent.optimizer', tf.compat.v1.train.AdamOptimizer())
-    tf_agent = agent_creators.create_agent(
-        agent_creators.DQNAgentConfig(
+    tf_agent = agent_config.create_agent(
+        agent_config.DQNAgentConfig(
             time_step_spec=self._time_step_spec, action_spec=self._action_spec),
         preprocessing_layer_creator=_observation_processing_layer)
     self.assertIsInstance(tf_agent, dqn_agent.DqnAgent)
@@ -74,8 +74,8 @@ def test_create_ppo_agent(self):
     gin.bind_parameter('create_agent.policy_network',
                        actor_distribution_network.ActorDistributionNetwork)
     gin.bind_parameter('PPOAgent.optimizer', tf.compat.v1.train.AdamOptimizer())
-    tf_agent = agent_creators.create_agent(
-        agent_creators.PPOAgentConfig(
+    tf_agent = agent_config.create_agent(
+        agent_config.PPOAgentConfig(
             time_step_spec=self._time_step_spec, action_spec=self._action_spec),
         preprocessing_layer_creator=_observation_processing_layer)
     self.assertIsInstance(tf_agent, ppo_agent.PPOAgent)
 
@@ -19,11 +19,11 @@
 import tensorflow as tf
 from tf_agents.trajectories import trajectory
 
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 
 
 def create_parser_fn(
-    agent_config: agent_creators.AgentConfig
+    agent_cfg: agent_config.AgentConfig
 ) -> Callable[[str], trajectory.Trajectory]:
   """Create a parser function for reading from a serialized tf.SequenceExample.
 
@@ -48,16 +48,16 @@ def _parser_fn(serialized_proto):
         (tensor_spec.name,
          tf.io.FixedLenSequenceFeature(
              shape=tensor_spec.shape, dtype=tensor_spec.dtype))
-        for tensor_spec in agent_config.time_step_spec.observation.values())
+        for tensor_spec in agent_cfg.time_step_spec.observation.values())
     sequence_features[
-        agent_config.action_spec.name] = tf.io.FixedLenSequenceFeature(
-            shape=agent_config.action_spec.shape,
-            dtype=agent_config.action_spec.dtype)
-    sequence_features[agent_config.time_step_spec.reward
-                      .name] = tf.io.FixedLenSequenceFeature(
-                          shape=agent_config.time_step_spec.reward.shape,
-                          dtype=agent_config.time_step_spec.reward.dtype)
-    sequence_features.update(agent_config.get_policy_info_parsing_dict())
+        agent_cfg.action_spec.name] = tf.io.FixedLenSequenceFeature(
+            shape=agent_cfg.action_spec.shape,
+            dtype=agent_cfg.action_spec.dtype)
+    sequence_features[
+        agent_cfg.time_step_spec.reward.name] = tf.io.FixedLenSequenceFeature(
+            shape=agent_cfg.time_step_spec.reward.shape,
+            dtype=agent_cfg.time_step_spec.reward.dtype)
+    sequence_features.update(agent_cfg.get_policy_info_parsing_dict())
 
     # pylint: enable=g-complex-comprehension
     with tf.name_scope('parse'):
@@ -66,15 +66,15 @@ def _parser_fn(serialized_proto):
           context_features=context_features,
           sequence_features=sequence_features)
       # TODO(yundi): make the transformed reward configurable.
-      action = parsed_sequence[agent_config.action_spec.name]
-      reward = tf.cast(parsed_sequence[agent_config.time_step_spec.reward.name],
+      action = parsed_sequence[agent_cfg.action_spec.name]
+      reward = tf.cast(parsed_sequence[agent_cfg.time_step_spec.reward.name],
                        tf.float32)
 
-      policy_info = agent_config.process_parsed_sequence_and_get_policy_info(
+      policy_info = agent_cfg.process_parsed_sequence_and_get_policy_info(
           parsed_sequence)
 
-      del parsed_sequence[agent_config.time_step_spec.reward.name]
-      del parsed_sequence[agent_config.action_spec.name]
+      del parsed_sequence[agent_cfg.time_step_spec.reward.name]
+      del parsed_sequence[agent_cfg.action_spec.name]
       full_trajectory = trajectory.from_episode(
           observation=parsed_sequence,
           action=action,
@@ -86,7 +86,7 @@ def _parser_fn(serialized_proto):
 
 
 def create_flat_sequence_example_dataset_fn(
-    agent_config: agent_creators.AgentConfig
+    agent_cfg: agent_config.AgentConfig
 ) -> Callable[[List[str]], tf.data.Dataset]:
   """Get a function that creates a dataset from serialized sequence examples.
 
@@ -103,7 +103,7 @@ def create_flat_sequence_example_dataset_fn(
       a `tf.data.Dataset`.  Treating this dataset as an iterator yields batched
       `trajectory.Trajectory` instances with shape `[...]`.
   """
-  parser_fn = create_parser_fn(agent_config)
+  parser_fn = create_parser_fn(agent_cfg)
 
   def _sequence_example_dataset_fn(sequence_examples):
     # Data collector returns empty strings for corner cases, filter them out
@@ -123,7 +123,7 @@ def _sequence_example_dataset_fn(sequence_examples):
 
 
 def create_sequence_example_dataset_fn(
-    agent_config: agent_creators.AgentConfig, batch_size: int,
+    agent_cfg: agent_config.AgentConfig, batch_size: int,
     train_sequence_length: int) -> Callable[[List[str]], tf.data.Dataset]:
   """Get a function that creates a dataset from serialized sequence examples.
 
@@ -142,7 +142,7 @@ def create_sequence_example_dataset_fn(
   trajectory_shuffle_buffer_size = 1024
 
   flat_sequence_example_dataset_fn = create_flat_sequence_example_dataset_fn(
-      agent_config)
+      agent_cfg)
 
   def _sequence_example_dataset_fn(sequence_examples):
     # Data collector returns empty strings for corner cases, filter them out
@@ -160,7 +160,7 @@ def _sequence_example_dataset_fn(sequence_examples):
 # TODO(yundi): PyType check of input_dataset as Type[tf.data.Dataset] is not
 # working.
 def create_file_dataset_fn(
-    agent_config: agent_creators.AgentConfig,
+    agent_cfg: agent_config.AgentConfig,
     batch_size: int,
     train_sequence_length: int,
     input_dataset) -> Callable[[List[str]], tf.data.Dataset]:
@@ -185,7 +185,7 @@ def create_file_dataset_fn(
   shuffle_buffer_size = 1024
   trajectory_shuffle_buffer_size = 1024
 
-  parser_fn = create_parser_fn(agent_config)
+  parser_fn = create_parser_fn(agent_cfg)
 
   def _file_dataset_fn(data_path):
     dataset = (
@@ -213,7 +213,7 @@ def _file_dataset_fn(data_path):
 
 
 def create_tfrecord_dataset_fn(
-    agent_config: agent_creators.AgentConfig, batch_size: int,
+    agent_cfg: agent_config.AgentConfig, batch_size: int,
     train_sequence_length: int) -> Callable[[List[str]], tf.data.Dataset]:
   """Get a function that creates an dataset from tfrecord.
 
@@ -230,7 +230,7 @@ def create_tfrecord_dataset_fn(
       shape `[B, T, ...]`.
   """
   return create_file_dataset_fn(
-      agent_config,
+      agent_cfg,
       batch_size,
       train_sequence_length,
       input_dataset=tf.data.TFRecordDataset)
@@ -24,7 +24,7 @@
 from tf_agents.trajectories import time_step
 from tf_agents.trajectories import trajectory
 
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 from compiler_opt.rl import data_reader
 
 
@@ -41,8 +41,8 @@ def _define_sequence_example(agent_config_type, is_action_discrete):
       ).float_list.value.append(1.23)
     example.feature_lists.feature_list['reward'].feature.add(
     ).float_list.value.append(2.3)
-    if agent_config_type in (agent_creators.PPOAgentConfig,
-                             agent_creators.DistributedPPOAgentConfig):
+    if agent_config_type in (agent_config.PPOAgentConfig,
+                             agent_config.DistributedPPOAgentConfig):
       if is_action_discrete:
         example.feature_lists.feature_list[
             'CategoricalProjectionNetwork_logits'].feature.add(
@@ -97,20 +97,20 @@ def _create_tfrecord_datasource(self, example):
 
   _test_config = (('SequenceExampleDatasetFn',
                    data_reader.create_sequence_example_dataset_fn,
-                   agent_creators.PPOAgentConfig,
+                   agent_config.PPOAgentConfig,
                    _create_sequence_example_datasource),
                   ('TFRecordDatasetFn', data_reader.create_tfrecord_dataset_fn,
-                   agent_creators.PPOAgentConfig, _create_tfrecord_datasource))
+                   agent_config.PPOAgentConfig, _create_tfrecord_datasource))
 
   @parameterized.named_parameters(*_test_config)
   def test_create_dataset_fn(self, test_fn, _, data_source_fn):
-    agent_type_override = agent_creators.DQNAgentConfig
+    agent_type_override = agent_config.DQNAgentConfig
     example = _define_sequence_example(
         agent_type_override, is_action_discrete=True)
 
     data_source = data_source_fn(self, example)
     dataset_fn = test_fn(
-        agent_config=agent_type_override(
+        agent_cfg=agent_type_override(
             time_step_spec=self._time_step_spec,
             action_spec=self._discrete_action_spec),
         batch_size=2,
@@ -131,11 +131,11 @@ def test_create_dataset_fn(self, test_fn, _, data_source_fn):
 
   _distrib_test_config = (('SequenceExampleDatasetFnDistributed',
                            data_reader.create_sequence_example_dataset_fn,
-                           agent_creators.DistributedPPOAgentConfig,
+                           agent_config.DistributedPPOAgentConfig,
                            _create_sequence_example_datasource),
                           ('TFRecordDatasetFnDistributed',
                            data_reader.create_tfrecord_dataset_fn,
-                           agent_creators.DistributedPPOAgentConfig,
+                           agent_config.DistributedPPOAgentConfig,
                            _create_tfrecord_datasource))
 
   @parameterized.named_parameters(*(_test_config + _distrib_test_config))
@@ -147,7 +147,7 @@ def test_ppo_policy_info_discrete(self, test_fn, agent_config_type,
     data_source = data_source_fn(self, example)
 
     dataset_fn = test_fn(
-        agent_config=agent_config_type(
+        agent_cfg=agent_config_type(
             time_step_spec=self._time_step_spec,
             action_spec=self._discrete_action_spec),
         batch_size=2,
@@ -169,7 +169,7 @@ def test_ppo_policy_info_continuous(self, test_fn, agent_config_type,
     data_source = data_source_fn(self, example)
 
     dataset_fn = test_fn(
-        agent_config=agent_config_type(
+        agent_cfg=agent_config_type(
             time_step_spec=self._time_step_spec,
             action_spec=self._continuous_action_spec),
         batch_size=2,
 
@@ -41,7 +41,7 @@
 from compiler_opt.rl import data_reader
 from compiler_opt.rl import policy_saver
 from compiler_opt.rl import registry
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 from compiler_opt.rl import compilation_runner
 
 
@@ -65,7 +65,7 @@ class ReverbCompilationObserver(compilation_runner.CompilationResultObserver):
   """Observer which sends compilation results to reverb"""
 
   def __init__(self,
-               agent_config,
+               agent_cfg,
                replay_buffer_server_address: str,
                sequence_length: int,
                initial_priority: float = 0.0):
@@ -79,7 +79,7 @@ def __init__(self,
         priority=initial_priority)
 
     self._parser = data_reader.create_flat_sequence_example_dataset_fn(
-        agent_config=agent_config)
+        agent_cfg=agent_cfg)
 
   def _is_actionable_result(
       self, result: compilation_runner.CompilationResult) -> bool:
@@ -121,10 +121,10 @@ def collect(corpus_path: str, replay_buffer_server_address: str,
   logging.info('Initializing the distributed PPO agent')
   problem_config = registry.get_configuration()
   time_step_spec, action_spec = problem_config.get_signature_spec()
-  agent_config = agent_creators.DistributedPPOAgentConfig(
+  agent_cfg = agent_config.DistributedPPOAgentConfig(
       time_step_spec=time_step_spec, action_spec=action_spec)
-  agent = agent_creators.create_agent(
-      agent_config.agent,
+  agent = agent_config.create_agent(
+      agent_cfg.agent,
       preprocessing_layer_creator=problem_config
       .get_preprocessing_layer_creator())
 
@@ -145,15 +145,15 @@ def collect(corpus_path: str, replay_buffer_server_address: str,
   create_observer_fns = [
       functools.partial(
           ReverbCompilationObserver,
-          agent_config=agent_config,
+          agent_config=agent_cfg,
           replay_buffer_server_address=replay_buffer_server_address,
           sequence_length=sequence_length)
   ]
 
   # Setup the corpus
   logging.info('Constructing tf.data pipeline and module corpus')
   dataset_fn = data_reader.create_flat_sequence_example_dataset_fn(
-      agent_config=agent_config)
+      agent_cfg=agent_cfg)
 
   def sequence_example_iterator_fn(seq_ex: List[str]):
     return iter(dataset_fn(seq_ex).prefetch(tf.data.AUTOTUNE))
 
@@ -33,7 +33,7 @@
 from compiler_opt.rl import local_data_collector
 from compiler_opt.rl import gin_external_configurables  # pylint: disable=unused-import
 from compiler_opt.rl import corpus
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 from compiler_opt.rl import registry
 from compiler_opt.rl import policy_saver
 from compiler_opt.rl import data_collector
@@ -57,10 +57,10 @@ def evaluate(root_dir: str, corpus_path: str,
   logging.info('Initializing the distributed PPO agent')
   problem_config = registry.get_configuration()
   time_step_spec, action_spec = problem_config.get_signature_spec()
-  agent_config = agent_creators.DistributedPPOAgentConfig(
+  agent_cfg = agent_config.DistributedPPOAgentConfig(
       time_step_spec=time_step_spec, action_spec=action_spec)
-  agent = agent_creators.create_agent(
-      agent_config.agent,
+  agent = agent_config.create_agent(
+      agent_cfg.agent,
       preprocessing_layer_creator=problem_config
       .get_preprocessing_layer_creator())
 
@@ -85,7 +85,7 @@ def evaluate(root_dir: str, corpus_path: str,
   # Setup the corpus
   logging.info('Constructing tf.data pipeline and module corpus')
   dataset_fn = data_reader.create_flat_sequence_example_dataset_fn(
-      agent_config=agent_config)
+      agent_cfg=agent_cfg)
 
   def sequence_example_iterator_fn(seq_ex: List[str]):
     return iter(dataset_fn(seq_ex).prefetch(tf.data.AUTOTUNE))
 
@@ -22,7 +22,7 @@
 
 from compiler_opt.rl.distributed import ppo_reverb_server_lib
 from compiler_opt.rl import registry  # pylint: disable=unused-import
-from compiler_opt.rl import agent_creators  # pylint: disable=unused-import
+from compiler_opt.rl import agent_config  # pylint: disable=unused-import
 
 flags.DEFINE_string('root_dir', None,
                     'Root directory for writing logs/summaries/checkpoints.')
 
@@ -30,7 +30,7 @@
 from tf_agents.utils import common
 
 from compiler_opt.rl import gin_external_configurables  # pylint: disable=unused-import
-from compiler_opt.rl import agent_creators
+from compiler_opt.rl import agent_config
 from compiler_opt.rl import registry
 from compiler_opt.rl.distributed import learner as learner_lib
 
@@ -58,8 +58,8 @@ def train(
   # Create the agent.
   with strategy.scope():
     train_step = tf.compat.v1.train.get_or_create_global_step()
-    agent = agent_creators.create_agent(
-        agent_creators.DistributedPPOAgentConfig(
+    agent = agent_config.create_agent(
+        agent_config.DistributedPPOAgentConfig(
             time_step_spec=time_step_spec, action_spec=action_spec),
         preprocessing_layer_creator=problem_config
         .get_preprocessing_layer_creator())