run formatter

anhuong · anhuong · commit bfb8a8f16a25 · 2025-02-04T22:12:47.000-07:00
Signed-off-by: Anh Uong &lt;anh.uong@ibm.com&gt;
diff --git a/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/multipack_sampler.py b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/multipack_sampler.py
@@ -20,7 +20,7 @@
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-taken from https://github.com/imoneoi/multipack_sampler with some modifications 
+taken from https://github.com/imoneoi/multipack_sampler with some modifications
 taken from https://github.com/instructlab/training/blob/main/src/instructlab/training/multipack_sampler.py
 """
 
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granite.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granite.py
@@ -40,8 +40,8 @@
     KEY_QKV,
     build_lora_fused_ops,
     get_hidden_activation_fn_key,
-    trigger_fused_ops,
     get_transformers_version,
+    trigger_fused_ops,
 )
 
 
@@ -127,22 +127,24 @@ def get_mp_rules(base_type: str, config: PretrainedConfig = None):
             ),
         ),
         *[
-            ModelPatcherRule(
-                rule_id="granite-custom-loss",
-                trigger=ModelPatcherTrigger(
-                    check=replace_custom_loss_when_triggered(
-                        GraniteForCausalLM, custom_loss_type="granite-custom-loss"
-                    )
-                ),
-            )
-            if get_transformers_version() >= "4.46" else
-            ModelPatcherRule(
-                rule_id="granite-cross-ent",
-                import_and_maybe_reload=(
-                    "torch.nn.CrossEntropyLoss",
-                    FastCrossEntropyLoss,
-                    "transformers.models.granite.modeling_granite",
-                ),
+            (
+                ModelPatcherRule(
+                    rule_id="granite-custom-loss",
+                    trigger=ModelPatcherTrigger(
+                        check=replace_custom_loss_when_triggered(
+                            GraniteForCausalLM, custom_loss_type="granite-custom-loss"
+                        )
+                    ),
+                )
+                if get_transformers_version() >= "4.46"
+                else ModelPatcherRule(
+                    rule_id="granite-cross-ent",
+                    import_and_maybe_reload=(
+                        "torch.nn.CrossEntropyLoss",
+                        FastCrossEntropyLoss,
+                        "transformers.models.granite.modeling_granite",
+                    ),
+                )
             )
         ],
         ModelPatcherRule(
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/llama.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/llama.py
@@ -46,8 +46,8 @@
     KEY_QKV,
     build_lora_fused_ops,
     get_hidden_activation_fn_key,
-    trigger_fused_ops,
     get_transformers_version,
+    trigger_fused_ops,
 )
 
 
@@ -127,22 +127,24 @@ def get_mp_rules(base_type: str, config: PretrainedConfig = None):
             forward=lce_forward,
         ),
         *[
-            ModelPatcherRule(
-                rule_id="llama-custom-loss",
-                trigger=ModelPatcherTrigger(
-                    check=replace_custom_loss_when_triggered(
-                        LlamaForCausalLM, custom_loss_type="llama-custom-loss"
-                    )
-                ),
-            )
-            if get_transformers_version() >= "4.46" else
-            ModelPatcherRule(
-                rule_id="llama-cross-ent",
-                import_and_maybe_reload=(
-                    "torch.nn.CrossEntropyLoss",
-                    FastCrossEntropyLoss,
-                    "transformers.models.llama.modeling_llama",
-                ),
+            (
+                ModelPatcherRule(
+                    rule_id="llama-custom-loss",
+                    trigger=ModelPatcherTrigger(
+                        check=replace_custom_loss_when_triggered(
+                            LlamaForCausalLM, custom_loss_type="llama-custom-loss"
+                        )
+                    ),
+                )
+                if get_transformers_version() >= "4.46"
+                else ModelPatcherRule(
+                    rule_id="llama-cross-ent",
+                    import_and_maybe_reload=(
+                        "torch.nn.CrossEntropyLoss",
+                        FastCrossEntropyLoss,
+                        "transformers.models.llama.modeling_llama",
+                    ),
+                )
             )
         ],
         # TODO: have a generic version of this rule
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mistral.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mistral.py
@@ -46,8 +46,8 @@
     KEY_QKV,
     build_lora_fused_ops,
     get_hidden_activation_fn_key,
-    trigger_fused_ops,
     get_transformers_version,
+    trigger_fused_ops,
 )
 
 
@@ -119,22 +119,24 @@ def get_mp_rules(base_type: str, config: PretrainedConfig = None):
             ),
         ),
         *[
-            ModelPatcherRule(
-                rule_id="mistral-custom-loss",
-                trigger=ModelPatcherTrigger(
-                    check=replace_custom_loss_when_triggered(
-                        MistralForCausalLM, custom_loss_type="mistral-custom-loss"
-                    )
-                ),
-            )
-            if get_transformers_version() >= "4.46" else
-            ModelPatcherRule(
-                rule_id="mistral-cross-ent",
-                import_and_maybe_reload=(
-                    "torch.nn.CrossEntropyLoss",
-                    FastCrossEntropyLoss,
-                    "transformers.models.mistral.modeling_mistral",
-                ),
+            (
+                ModelPatcherRule(
+                    rule_id="mistral-custom-loss",
+                    trigger=ModelPatcherTrigger(
+                        check=replace_custom_loss_when_triggered(
+                            MistralForCausalLM, custom_loss_type="mistral-custom-loss"
+                        )
+                    ),
+                )
+                if get_transformers_version() >= "4.46"
+                else ModelPatcherRule(
+                    rule_id="mistral-cross-ent",
+                    import_and_maybe_reload=(
+                        "torch.nn.CrossEntropyLoss",
+                        FastCrossEntropyLoss,
+                        "transformers.models.mistral.modeling_mistral",
+                    ),
+                )
             )
         ],
         ModelPatcherRule(
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mixtral.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mixtral.py
@@ -24,8 +24,8 @@
 )
 from transformers.models.mixtral.modeling_mixtral import (
     MixtralAttention,
-    MixtralRMSNorm,
     MixtralForCausalLM,
+    MixtralRMSNorm,
 )
 
 # Local
@@ -35,7 +35,13 @@
 )
 from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
 from ..kernels.unsloth.rope_embedding import fast_rope_embedding
-from .utils import KEY_O, KEY_QKV, build_lora_fused_ops, trigger_fused_ops, get_transformers_version
+from .utils import (
+    KEY_O,
+    KEY_QKV,
+    build_lora_fused_ops,
+    get_transformers_version,
+    trigger_fused_ops,
+)
 
 
 def get_mp_rules(base_type):
@@ -90,22 +96,24 @@ def get_mp_rules(base_type):
             ),
         ),
         *[
-            ModelPatcherRule(
-                rule_id="mixtral-custom-loss",
-                trigger=ModelPatcherTrigger(
-                    check=replace_custom_loss_when_triggered(
-                        MixtralForCausalLM, custom_loss_type="mixtral-custom-loss"
-                    )
-                ),
-            )
-            if get_transformers_version() >= "4.46" else
-            ModelPatcherRule(
-                rule_id="mixtral-cross-ent",
-                import_and_maybe_reload=(
-                    "torch.nn.CrossEntropyLoss",
-                    FastCrossEntropyLoss,
-                    "transformers.models.mixtral.modeling_mixtral",
-                ),
+            (
+                ModelPatcherRule(
+                    rule_id="mixtral-custom-loss",
+                    trigger=ModelPatcherTrigger(
+                        check=replace_custom_loss_when_triggered(
+                            MixtralForCausalLM, custom_loss_type="mixtral-custom-loss"
+                        )
+                    ),
+                )
+                if get_transformers_version() >= "4.46"
+                else ModelPatcherRule(
+                    rule_id="mixtral-cross-ent",
+                    import_and_maybe_reload=(
+                        "torch.nn.CrossEntropyLoss",
+                        FastCrossEntropyLoss,
+                        "transformers.models.mixtral.modeling_mixtral",
+                    ),
+                )
             )
         ],
         ModelPatcherRule(
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/utils.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/utils.py
@@ -216,6 +216,9 @@ def get_hidden_activation_fn_key(config: PretrainedConfig):
         f"architecture {config.architectures}."
     )
 
+
 def get_transformers_version():
-    _, _transformers_version = _is_package_available("transformers", return_version=True)
-    return _transformers_version
+    _, _transformers_version = _is_package_available(
+        "transformers", return_version=True
+    )
+    return _transformers_version
diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
@@ -171,9 +171,7 @@ def __init__(
     ) -> None:
 
         self.dataset_split = datasets.load_dataset(
-            dataset_name,
-            split=dataset_split,
-            **additional_dataset_kwargs
+            dataset_name, split=dataset_split, **additional_dataset_kwargs
         )
 
         self.kwargs = {
@@ -206,9 +204,8 @@ def prepare_dataset(
                 )
             response_template = self.response_template
 
-        if (
-            self.kwargs['tokenize']
-            or (not self.kwargs['tokenize'] and self.kwargs['chat_template'])
+        if self.kwargs["tokenize"] or (
+            not self.kwargs["tokenize"] and self.kwargs["chat_template"]
         ):
             tokenizer = AutoTokenizer.from_pretrained(model_name)
             # for now, if pad_token_id is None, will just do a replacement