refactor: avoid using model_wrapper improperly

dacorvo · dacorvo · commit 8944d40392a6 · 2025-10-22T22:07:43.000+02:00
diff --git a/optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py b/optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py
@@ -338,10 +338,10 @@ def __init__(
         config: PretrainedConfig,
         neuron_config: NxDNeuronConfig,
         traced_model: torch.jit.ScriptModule,
-        model_wrappers: list[NxDGraphBuilder],
+        graph_builders: list[NxDGraphBuilder],
     ):
         super().__init__(
-            config=config, neuron_config=neuron_config, traced_model=traced_model, model_wrappers=model_wrappers
+            config=config, neuron_config=neuron_config, traced_model=traced_model, graph_builders=graph_builders
         )
         ctx_neuron_config = NxDModelForCausalLM._create_context_encoding_config(neuron_config)
         self.context_encoding_model = NxDDecoderWrapper(
@@ -617,14 +617,14 @@ def _from_pretrained(
                 traced_model = torch.jit.load(os.path.join(tmpdir, cls.COMPILED_MODEL_FILE_NAME))
         else:
             traced_model = torch.jit.load(os.path.join(model_id, cls.COMPILED_MODEL_FILE_NAME))
-        model_builders = NxDModelForCausalLM.create_graph_builders(
+        graph_builders = NxDModelForCausalLM.create_graph_builders(
             cls._model_cls, config=config, neuron_config=neuron_config
         )
         model = cls(
             config=config,
             neuron_config=neuron_config,
             traced_model=traced_model,
-            model_wrappers=model_builders,
+            graph_builders=graph_builders,
         )
         model.load_weights(
             model_id,
@@ -674,7 +674,7 @@ def _export(
         # Evaluate head_dim if it is defined but set to null (like in Mixtral for transformers 4.54+)
         if hasattr(config, "head_dim") and config.head_dim is None:
             config.head_dim = config.hidden_size // config.num_attention_heads
-        model_builders = cls.create_graph_builders(
+        graph_builders = cls.create_graph_builders(
             model_cls=cls._model_cls,
             config=config,
             neuron_config=neuron_config,
@@ -689,14 +689,14 @@ def _export(
         with hub_neuronx_cache(entry=cache_entry):
             traced_model = NxDPreTrainedModel.compile(
                 neuron_config=neuron_config,
-                model_wrappers=model_builders,
+                graph_builders=graph_builders,
                 compiler_args=cls.get_compiler_args(neuron_config),
             )
         model = cls(
             config=config,
             neuron_config=neuron_config,
             traced_model=traced_model,
-            model_wrappers=model_builders,
+            graph_builders=graph_builders,
         )
         if load_weights:
             model.load_weights(
diff --git a/optimum/neuron/models/inference/backend/pretrained_model.py b/optimum/neuron/models/inference/backend/pretrained_model.py
@@ -49,7 +49,7 @@ def get_shards_path(dest_path):
 
 def get_builder(
     neuron_config: NxDNeuronConfig,
-    model_wrappers: dict[str, NxDGraphBuilder],
+    graph_builders: dict[str, NxDGraphBuilder],
     debug: bool = False,
     checkpoint_loader=None,
     compiler_args: str = None,
@@ -63,7 +63,7 @@ def get_builder(
 
     Args:
         neuron_config (NxDNeuronConfig): The Neuron configuration.
-        model_wrappers (list[NxDGraphBuilder]): The model graphs to be added to the builder.
+        graph_builders (list[NxDGraphBuilder]): The model graphs to be added to the builder.
         debug (bool): Whether to enable debug mode.
         checkpoint_loader (callable): A function to load the model's state dictionary and weights.
         compiler_args (str): Compiler arguments to be passed to the builder.
@@ -86,13 +86,13 @@ def get_builder(
         logical_nc_config=neuron_config.logical_nc_config,
         weights_to_skip_layout_optimization=neuron_config.weights_to_skip_layout_optimization,
     )
-    for tag, model in model_wrappers.items():
+    for tag, graph_builder in graph_builders.items():
         builder.add(
             key=tag,
-            model_instance=model.get_model_instance(),
-            example_inputs=model.input_generator(),
+            model_instance=graph_builder.get_model_instance(),
+            example_inputs=graph_builder.input_generator(),
             compiler_args=compiler_args,
-            priority_model_idx=model.priority_model_idx,
+            priority_model_idx=graph_builder.priority_model_idx,
         )
     return builder
 
@@ -109,14 +109,14 @@ def __init__(
         config: PretrainedConfig,
         neuron_config: NxDNeuronConfig,
         traced_model: torch.jit.ScriptModule,
-        model_wrappers: dict[str, NxDGraphBuilder],
+        graph_builders: dict[str, NxDGraphBuilder],
     ):
         self.config = copy.deepcopy(config)
         self.neuron_config = copy.deepcopy(neuron_config)
         # Override torch_dtype in config as it is used by the neuronx_distributed code to cast weights to the correct type
         self.config.torch_dtype = self.neuron_config.torch_dtype
         self._traced_model = traced_model
-        self.model_wrappers = model_wrappers  # Required for loading weights
+        self.graph_builders = graph_builders  # Required for loading weights
 
     # NxDPretrainedModel abstract API
     @abstractmethod
@@ -131,8 +131,8 @@ def get_compiler_args(cls, neuron_config) -> str | None:
         return None
 
     @staticmethod
-    def compile(neuron_config, model_wrappers: dict[str, NxDGraphBuilder], compiler_args: str, debug: bool = False):
-        builder = get_builder(neuron_config, model_wrappers, debug=debug, compiler_args=compiler_args)
+    def compile(neuron_config, graph_builders: dict[str, NxDGraphBuilder], compiler_args: str, debug: bool = False):
+        builder = get_builder(neuron_config, graph_builders, debug=debug, compiler_args=compiler_args)
         return builder.trace(initialize_model_weights=False)
 
     def save(self, dest_path, weight_path: str | None = None):
@@ -153,7 +153,7 @@ def shard_checkpoint(self, src_path, dest_path, debug: bool = False):
         checkpoint_loader = partial(self.checkpoint_loader_fn, src_path, self.config, self.neuron_config)
         sharder = get_builder(
             self.neuron_config,
-            self.model_wrappers,
+            self.graph_builders,
             debug=debug,
             checkpoint_loader=checkpoint_loader,
             compiler_args=self.get_compiler_args(self.neuron_config),
@@ -191,7 +191,7 @@ def get_shard_name(rank):
             checkpoint_loader = partial(self.checkpoint_loader_fn, weights_path, self.config, self.neuron_config)
             sharder = get_builder(
                 self.neuron_config,
-                self.model_wrappers,
+                self.graph_builders,
                 debug=False,
                 checkpoint_loader=checkpoint_loader,
                 compiler_args=self.get_compiler_args(self.neuron_config),