pytorch
diff --git a/‎ignite/engine/__init__.py‎
Lines changed: 38 additions & 4 deletions b/‎ignite/engine/__init__.py‎
Lines changed: 38 additions & 4 deletions
@@ -48,6 +48,7 @@ def supervised_training_step(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     gradient_accumulation_steps: int = 1,
 ) -> Callable:
@@ -64,6 +65,8 @@ def supervised_training_step(
             with respect to the host. For other cases, this argument has no effect.
         prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
             tuple of tensors `(batch_x, batch_y)`.
+        model_transform: function that receives the output from the model and convert it into the form as required
+            by the loss function
         output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
             to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
         gradient_accumulation_steps: Number of steps the gradients should be accumulated across.
@@ -86,6 +89,8 @@ def supervised_training_step(
     .. versionadded:: 0.4.5
     .. versionchanged:: 0.4.7
         Added Gradient Accumulation.
+    .. versionchanged:: 0.4.11
+        Added `model_transform` to transform model's output
     """
 
     if gradient_accumulation_steps <= 0:
@@ -99,7 +104,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        y_pred = model(x)
+        output = model(x)
+        y_pred = model_transform(output)
         loss = loss_fn(y_pred, y)
         if gradient_accumulation_steps > 1:
             loss = loss / gradient_accumulation_steps
@@ -118,6 +124,7 @@ def supervised_training_step_amp(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     scaler: Optional["torch.cuda.amp.GradScaler"] = None,
     gradient_accumulation_steps: int = 1,
@@ -135,6 +142,8 @@ def supervised_training_step_amp(
             with respect to the host. For other cases, this argument has no effect.
         prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
             tuple of tensors `(batch_x, batch_y)`.
+        model_transform: function that receives the output from the model and convert it into the form as required
+            by the loss function
         output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
             to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
         scaler: GradScaler instance for gradient scaling. (default: None)
@@ -160,6 +169,8 @@ def supervised_training_step_amp(
     .. versionadded:: 0.4.5
     .. versionchanged:: 0.4.7
         Added Gradient Accumulation.
+    .. versionchanged:: 0.4.11
+        Added `model_transform` to transform model's output
     """
 
     try:
@@ -179,7 +190,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
         with autocast(enabled=True):
-            y_pred = model(x)
+            output = model(x)
+            y_pred = model_transform(output)
             loss = loss_fn(y_pred, y)
             if gradient_accumulation_steps > 1:
                 loss = loss / gradient_accumulation_steps
@@ -204,6 +216,7 @@ def supervised_training_step_apex(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     gradient_accumulation_steps: int = 1,
 ) -> Callable:
@@ -220,6 +233,8 @@ def supervised_training_step_apex(
             with respect to the host. For other cases, this argument has no effect.
         prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
             tuple of tensors `(batch_x, batch_y)`.
+        model_transform: function that receives the output from the model and convert it into the form as required
+            by the loss function
         output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
             to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
         gradient_accumulation_steps: Number of steps the gradients should be accumulated across.
@@ -243,6 +258,8 @@ def supervised_training_step_apex(
     .. versionadded:: 0.4.5
     .. versionchanged:: 0.4.7
         Added Gradient Accumulation.
+    .. versionchanged:: 0.4.11
+        Added `model_transform` to transform model's output
     """
 
     try:
@@ -261,7 +278,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        y_pred = model(x)
+        output = model(x)
+        y_pred = model_transform(output)
         loss = loss_fn(y_pred, y)
         if gradient_accumulation_steps > 1:
             loss = loss / gradient_accumulation_steps
@@ -281,6 +299,7 @@ def supervised_training_step_tpu(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     gradient_accumulation_steps: int = 1,
 ) -> Callable:
@@ -297,6 +316,8 @@ def supervised_training_step_tpu(
             with respect to the host. For other cases, this argument has no effect.
         prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
             tuple of tensors `(batch_x, batch_y)`.
+        model_transform: function that receives the output from the model and convert it into the form as required
+            by the loss function
         output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
             to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
         gradient_accumulation_steps: Number of steps the gradients should be accumulated across.
@@ -320,6 +341,8 @@ def supervised_training_step_tpu(
     .. versionadded:: 0.4.5
     .. versionchanged:: 0.4.7
        Added Gradient Accumulation argument for all supervised training methods.
+    .. versionchanged:: 0.4.11
+        Added `model_transform` to transform model's output
     """
     try:
         import torch_xla.core.xla_model as xm
@@ -337,7 +360,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        y_pred = model(x)
+        output = model(x)
+        y_pred = model_transform(output)
         loss = loss_fn(y_pred, y)
         if gradient_accumulation_steps > 1:
             loss = loss / gradient_accumulation_steps
@@ -384,6 +408,7 @@ def create_supervised_trainer(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     deterministic: bool = False,
     amp_mode: Optional[str] = None,
@@ -403,6 +428,8 @@ def create_supervised_trainer(
             with respect to the host. For other cases, this argument has no effect.
         prepare_batch: function that receives `batch`, `device`, `non_blocking` and outputs
             tuple of tensors `(batch_x, batch_y)`.
+        model_transform: function that receives the output from the model and convert it into the form as required
+            by the loss function
         output_transform: function that receives 'x', 'y', 'y_pred', 'loss' and returns value
             to be assigned to engine's state.output after each iteration. Default is returning `loss.item()`.
         deterministic: if True, returns deterministic engine of type
@@ -496,6 +523,8 @@ def output_transform_fn(x, y, y_pred, loss):
 
     .. versionchanged:: 0.4.7
         Added Gradient Accumulation argument for all supervised training methods.
+    .. versionchanged:: 0.4.11
+        Added `model_transform` to transform model's output
     """
 
     device_type = device.type if isinstance(device, torch.device) else device
@@ -510,6 +539,7 @@ def output_transform_fn(x, y, y_pred, loss):
             device,
             non_blocking,
             prepare_batch,
+            model_transform,
             output_transform,
             _scaler,
             gradient_accumulation_steps,
@@ -522,6 +552,7 @@ def output_transform_fn(x, y, y_pred, loss):
             device,
             non_blocking,
             prepare_batch,
+            model_transform,
             output_transform,
             gradient_accumulation_steps,
         )
@@ -533,6 +564,7 @@ def output_transform_fn(x, y, y_pred, loss):
             device,
             non_blocking,
             prepare_batch,
+            model_transform,
             output_transform,
             gradient_accumulation_steps,
         )
@@ -544,6 +576,7 @@ def output_transform_fn(x, y, y_pred, loss):
             device,
             non_blocking,
             prepare_batch,
+            model_transform,
             output_transform,
             gradient_accumulation_steps,
         )
@@ -662,6 +695,7 @@ def create_supervised_evaluator(
     device: Optional[Union[str, torch.device]] = None,
     non_blocking: bool = False,
     prepare_batch: Callable = _prepare_batch,
+    model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any], Any] = lambda x, y, y_pred: (y_pred, y),
     amp_mode: Optional[str] = None,
 ) -> Engine: