fix typo (#39936)

Tialo · y.korobko · web-flow · commit 9ab75fc42815 · 2025-08-06T16:21:24.000Z
* fix typo

* fix modular instead

* fix

---------

Co-authored-by: y.korobko &lt;y.korobko@tbank.ru&gt;
diff --git a/src/transformers/models/gpt_oss/modeling_gpt_oss.py b/src/transformers/models/gpt_oss/modeling_gpt_oss.py
@@ -75,7 +75,7 @@ def __init__(self, config):
 
     def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
         """
-        When training is is more efficient to just loop over the experts and compute the output for each expert
+        When training it is more efficient to just loop over the experts and compute the output for each expert
         as otherwise the memory would explode.
 
         For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.
diff --git a/src/transformers/models/gpt_oss/modular_gpt_oss.py b/src/transformers/models/gpt_oss/modular_gpt_oss.py
@@ -73,7 +73,7 @@ def __init__(self, config):
 
     def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
         """
-        When training is is more efficient to just loop over the experts and compute the output for each expert
+        When training it is more efficient to just loop over the experts and compute the output for each expert
         as otherwise the memory would explode.
 
         For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.