Add convert_to_tflite to policy_utils

boomanaiden154 · web-flow · commit 079d4c7edad8 · 2025-03-03T09:53:16.000-08:00
This patch takes the function that converts policies serialized to numpy arrays serialized to bytes into TFLite models to policy_utils. This is necessary so that we can perform this on the worker rather than trying to perform it serially on the main invocation. Reviewers: mtrofin Reviewed By: mtrofin Pull Request: #447
diff --git a/compiler_opt/es/policy_utils.py b/compiler_opt/es/policy_utils.py
@@ -15,6 +15,7 @@
 
 from typing import Protocol
 from collections.abc import Sequence
+import os
 
 import gin
 import numpy as np
@@ -122,3 +123,32 @@ def save_policy(policy: 'tf_policy.TFPolicy | HasModelVariables',
   set_vectorized_parameters_for_policy(policy, parameters)
   saver = policy_saver.PolicySaver({policy_name: policy})
   saver.save(save_folder)
+
+
+def convert_to_tflite(policy_as_bytes: bytes, scratch_dir: str,
+                      base_policy_path: str) -> str:
+  """Converts a policy serialized to bytes to TFLite.
+
+  Args:
+    policy_as_bytes: An array of model parameters serialized to a byte stream.
+    scratch_dir: A temporary directory being used for scratch that the model
+      will get saved into.
+    base_policy_path: The path to the base TF saved model that is used to
+      determine the model architecture.
+  """
+  perturbation = np.frombuffer(policy_as_bytes, dtype=np.float32)
+
+  saved_model = tf.saved_model.load(base_policy_path)
+  set_vectorized_parameters_for_policy(saved_model, perturbation)
+
+  saved_model_dir = os.path.join(scratch_dir, 'saved_model')
+  tf.saved_model.save(
+      saved_model, saved_model_dir, signatures=saved_model.signatures)
+  source = os.path.join(base_policy_path, policy_saver.OUTPUT_SIGNATURE)
+  destination = os.path.join(saved_model_dir, policy_saver.OUTPUT_SIGNATURE)
+  tf.io.gfile.copy(source, destination)
+
+  # convert to tflite
+  tflite_dir = os.path.join(scratch_dir, 'tflite')
+  policy_saver.convert_mlgo_model(saved_model_dir, tflite_dir)
+  return tflite_dir
diff --git a/compiler_opt/es/policy_utils_test.py b/compiler_opt/es/policy_utils_test.py
@@ -220,6 +220,27 @@ def test_tfpolicy_and_loaded_policy_produce_same_variable_order(self):
     # assert that they result in the same order of values
     np.testing.assert_array_almost_equal(tf_params, loaded_params)
 
+  def test_convert_to_tflite(self):
+    policy_save_path, _, _ = self._save_inlining_policy()
+    saved_model_path = os.path.join(policy_save_path, self.POLICY_NAME)
+
+    output_bytes = self.params.tobytes()
+
+    scratch_dir = self.create_tempdir()
+    tflite_dir = policy_utils.convert_to_tflite(output_bytes, scratch_dir,
+                                                saved_model_path)
+
+    self.assertTrue(os.path.exists(os.path.join(tflite_dir, 'model.tflite')))
+    self.assertTrue(
+        os.path.exists(os.path.join(tflite_dir, 'output_spec.json')))
+
+    # Additionally assert that the saved model that we create as part of the
+    # conversion process has the correct paramters.
+    load_path = os.path.join(scratch_dir, 'saved_model')
+    sm = tf.saved_model.load(load_path)
+    loaded_params = policy_utils.get_vectorized_parameters_from_policy(sm)
+    np.testing.assert_array_almost_equal(self.params, loaded_params)
+
 
 if __name__ == '__main__':
   absltest.main()