Use tensors instead of numpy arrays in feed dicts

boomanaiden154 · web-flow · commit 2894e8cb0038 · 2025-05-19T09:19:18.000-07:00
This makes it easier to use tf.function on a training step as we are now passing tensors around rather than converting to tensors later on. Reviewers: virajbshah, ondrasej, orodley Reviewed By: ondrasej Pull Request: #344
diff --git a/gematria/model/python/model_base.py b/gematria/model/python/model_base.py
@@ -60,7 +60,6 @@
 INVALID_THROUGHPUT_VALUE = -1
 
 _BASIC_BLOCK_INDEX_TF_DTYPE = tf.dtypes.int32
-_BASIC_BLOCK_INDEX_NUMPY_DTYPE = _BASIC_BLOCK_INDEX_TF_DTYPE.as_numpy_dtype()
 
 # A type variable that is either a basic block or block with throughput. The
 # advantage over typing.Union is that in each context, the typevar represents
@@ -755,20 +754,20 @@ def _finalize_batch(self, include_expected_outputs: bool) -> FeedDict:
     """
     schedule = self._make_batch_feed_dict()
     if self._create_delta_block_index:
-      schedule['delta_block_index'] = np.array(
-          self._batch_delta_block_index, dtype=_BASIC_BLOCK_INDEX_NUMPY_DTYPE
+      schedule['delta_block_index'] = tf.constant(
+          self._batch_delta_block_index, dtype=_BASIC_BLOCK_INDEX_TF_DTYPE
       )
     if include_expected_outputs:
-      schedule['expected_outputs'] = np.reshape(
-          np.array(self._batch_expected_outputs, dtype=self.numpy_dtype),
+      schedule['expected_outputs'] = tf.reshape(
+          tf.constant(self._batch_expected_outputs, dtype=self.dtype),
           [-1, self.num_tasks],
       )
-      schedule['output_mask'] = np.array(self._batch_mask, dtype=bool)
+      schedule['output_mask'] = tf.constant(
+          self._batch_mask, dtype=tf.dtypes.bool
+      )
       if self._use_deltas:
-        schedule['expected_outputs_deltas'] = np.reshape(
-            np.array(
-                self._batch_expected_outputs_deltas, dtype=self.numpy_dtype
-            ),
+        schedule['expected_outputs_deltas'] = tf.reshape(
+            tf.constant(self._batch_expected_outputs_deltas, dtype=self.dtype),
             [-1, self.num_tasks],
         )
 
@@ -1304,16 +1303,16 @@ def _compute_loss(self, schedule: FeedDict) -> loss_utils.LossComputation:
     output = self(schedule, train=True)
     loss = loss_utils.LossComputation(
         output['output'],
-        tf.constant(schedule['expected_outputs']),
-        tf.constant(schedule['output_mask']),
+        schedule['expected_outputs'],
+        schedule['output_mask'],
         percentile_ranks=self._collected_percentile_ranks,
         dtype=self.dtype,
     )
 
     if self._use_deltas:
       self._delta_loss = loss_utils.LossComputation(
           output['output_deltas'],
-          tf.constant(schedule['expected_outputs_deltas']),
+          schedule['expected_outputs_deltas'],
           output['output_mask_deltas'],
           percentile_ranks=self._collected_percentile_ranks,
           dtype=self.dtype,