Remove model_ref in token_graph_builder_model

boomanaiden154 · web-flow · commit 08327089000e · 2025-05-19T09:16:51.000-07:00
With the port to TF2 I added a hack to get access to instruction annotations and the node mask within the call function of TokenGraphBuilderModelNodeEmbed. This was necessary due to TF2 preferring eager mode so we could not just pass tensor references around. This patch makes everything more canonically TF2 by moving the data around through standard dataflow (with some slight complexity to ensure things are getting passed around to the right places) rather than passing references around. Either one should theoretically work with tf.function annotations depending upon scope, but this will definitely work. Reviewers: orodley, virajbshah, ondrasej Reviewed By: ondrasej Pull Request: #342
diff --git a/gematria/granite/python/gnn_model_base.py b/gematria/granite/python/gnn_model_base.py
@@ -61,6 +61,8 @@ class GraphNetworkLayer(tf.Module):
       features computed by the layer. When both a residual connection and layer
       normalization are used, the layer normalization op is inserted after the
       residual connection.
+    extra_node_inputs: Names of extra feed_dict members that should be passed
+      to the node model.
   """
 
   # NOTE(ondrasej): This should be one of the classes defined in
@@ -74,6 +76,7 @@ class GraphNetworkLayer(tf.Module):
   edges_output_size: Sequence[int] | None = None
   nodes_output_size: Sequence[int] | None = None
   globals_output_size: Sequence[int] | None = None
+  extra_node_inputs: Sequence[str] | None = None
 
 
 class GnnModelBase(model_base.ModelBase):
@@ -376,7 +379,15 @@ def _execute_graph_network(self, feed_dict) -> graph_nets.graphs.GraphsTuple:
       )
       for iteration in range(num_iterations):
         residual_input = graphs_tuple
-        graphs_tuple = layer.module(graphs_tuple)
+        extra_node_args = {}
+        if layer.extra_node_inputs is not None:
+          for extra_node_arg_name in layer.extra_node_inputs:
+            extra_node_args[extra_node_arg_name] = feed_dict[
+                extra_node_arg_name
+            ]
+        graphs_tuple = layer.module(
+            graphs_tuple, node_model_kwargs=extra_node_args
+        )
         if use_residual_connections:
           residual_op_name_base = (
               f'residual_connection_{layer_index}_{iteration}'
diff --git a/gematria/granite/python/token_graph_builder_model.py b/gematria/granite/python/token_graph_builder_model.py
@@ -345,7 +345,6 @@ def _create_graph_network_modules(
                     vocab_size=len(self._token_list),
                     common_embed_dim=self._common_node_embedding_size,
                     num_annotations=self._num_annotations,
-                    model_ref=self,
                     initializer=embedding_initializer,
                 ),
                 global_model_fn=functools.partial(
@@ -364,6 +363,10 @@ def _create_graph_network_modules(
             num_iterations=1,
             layer_normalization=options.EnableFeature.NEVER,
             residual_connection=options.EnableFeature.NEVER,
+            extra_node_inputs=(
+                'instruction_node_mask',
+                'instruction_annotations',
+            ),
         ),
         gnn_model_base.GraphNetworkLayer(
             module=graph_nets.modules.GraphNetwork(
@@ -410,7 +413,6 @@ def __init__(
       self,
       common_embed_dim,
       num_annotations,
-      model_ref,
       **kwargs,
   ) -> None:
     """Initializes node embeddings.
@@ -420,11 +422,8 @@ def __init__(
         embedding vectors. The remainder of the vector is filled with
         instruction annotation.
       num_annotations: The number of annotations per instruction.
-      model_ref: A reference to the model to get the instruction node mask and
-        instruction annotations.
       kwargs: Additional arguments to be passed to the internal `snt.Embed`s.
     """
-    self._model_ref = model_ref
 
     # The first `embed_dim - num_annotations` embedding values for all nodes.
     self._common_embed = snt.Embed(
@@ -446,6 +445,8 @@ def __init__(
   def __call__(
       self,
       inputs,
+      instruction_node_mask,
+      instruction_annotations,
   ):
     if not self._extra_embed:
       return self._common_embed(inputs)
@@ -458,10 +459,8 @@ def __call__(
             common_embeddings,
             tf.tensor_scatter_nd_update(
                 extra_embeddings,
-                indices=tf.where(
-                    self._model_ref._instruction_node_mask,
-                ),
-                updates=self._model_ref._instruction_annotations,
+                indices=tf.where(instruction_node_mask),
+                updates=instruction_annotations,
             ),
         ],
         axis=1,