[kernels] Kernel Config (#41232)

MekkCyber · web-flow · commit da7b8ce11f5e · 2025-10-07T13:58:20.000+02:00
* first config

* add kernel_config

* add import logic

* fixing style

* compare class name

* add comments

* rm import

* adding kernel md files

* add to toctree

* adding to main_classes

* simplify required config

* add to doc

* style

* store the mapping

* remove nested func

* add hub mixin

* fix

* imports

* fix
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -216,6 +216,11 @@
   - local: quantization/contribute
     title: Contribute
   title: Quantization
+- isExpanded: false
+  sections:
+  - local: kernel_doc/overview
+    title: Kernels in transformers
+  title: Kernels
 - isExpanded: false
   sections:
   - local: serialization
@@ -368,6 +373,8 @@
       title: Image Processor
     - local: main_classes/video_processor
       title: Video Processor
+    - local: main_classes/kernels
+      title: Kernels
     title: Main Classes
   - sections:
     - sections:
diff --git a/docs/source/en/kernel_doc/overview.md b/docs/source/en/kernel_doc/overview.md
@@ -0,0 +1,3 @@
+# Overview
+
+Kernels in transformers are used to optimize the performance of models with custom layers from the hub and very low effort.
diff --git a/docs/source/en/main_classes/kernels.md b/docs/source/en/main_classes/kernels.md
@@ -0,0 +1,7 @@
+## Kernels
+
+This page documents the kernels configuration utilities.
+
+### KernelConfig
+
+[[autodoc]] KernelConfig
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -265,6 +265,7 @@
         "VptqConfig",
     ],
     "video_utils": [],
+    "utils.kernel_config": ["KernelConfig"],
 }
 
 # tokenizers-backed objects
@@ -754,6 +755,7 @@
     from .utils import is_torch_npu_available as is_torch_npu_available
     from .utils import is_torch_xla_available as is_torch_xla_available
     from .utils import is_torch_xpu_available as is_torch_xpu_available
+    from .utils.kernel_config import KernelConfig as KernelConfig
 
     # bitsandbytes config
     from .utils.quantization_config import AqlmConfig as AqlmConfig
@@ -775,7 +777,6 @@
     from .utils.quantization_config import TorchAoConfig as TorchAoConfig
     from .utils.quantization_config import VptqConfig as VptqConfig
     from .video_processing_utils import BaseVideoProcessor as BaseVideoProcessor
-
 else:
     import sys
 
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -84,6 +84,7 @@
     WEIGHTS_INDEX_NAME,
     WEIGHTS_NAME,
     ContextManagers,
+    KernelConfig,
     PushToHubMixin,
     cached_file,
     check_torch_load_is_safe,
@@ -4503,6 +4504,7 @@ def from_pretrained(
         device_mesh = kwargs.pop("device_mesh", None)
         trust_remote_code = kwargs.pop("trust_remote_code", None)
         use_kernels = kwargs.pop("use_kernels", False)
+        kernel_config = kwargs.pop("kernel_config", None)
 
         key_mapping = kwargs.pop("key_mapping", None)
         # Load models with hardcoded key mapping on class for VLMs only, to keep BC and standardize model
@@ -4895,7 +4897,26 @@ def _assign_original_dtype(module):
 
         # check if using kernels
         if use_kernels:
-            model.use_kernels = True
+            if not is_kernels_available():
+                raise ValueError(
+                    "Kernels are not available. To use kernels, please install kernels using `pip install kernels`"
+                )
+            from kernels import use_kernel_mapping
+
+            if kernel_config is not None and isinstance(kernel_config, KernelConfig):
+                # This will make sure the mapping is valid, and the layers are registered in the model
+                kernel_config.sanitize_kernel_mapping(model)
+
+                # This will create a compatible mapping for the model with the kernels library
+                kernel_config.create_compatible_mapping(model)
+
+                # This is a context manager to override the default kernel mapping
+                # We are calling kernelize inside this context manager using the use_kernels setter
+                with use_kernel_mapping(kernel_config.kernel_mapping):
+                    model.use_kernels = True
+            # We use the default kernel mapping in .integrations.hub_kernels
+            else:
+                model.use_kernels = True
 
         # If it is a model with generation capabilities, attempt to load generation files (generation config,
         # custom generate function)
@@ -5506,14 +5527,14 @@ def loss_function(self):
     def loss_function(self, value):
         self._loss_function = value
 
-    def kernelize(self):
+    def kernelize(self, mode=None):
         if not is_kernels_available():
             raise ValueError(
                 "Kernels are not available. To use kernels, please install kernels using `pip install kernels`"
             )
         from kernels import Device, Mode, kernelize
 
-        mode = Mode.INFERENCE if not self.training else Mode.TRAINING
+        mode = Mode.INFERENCE if not self.training else Mode.TRAINING if mode is None else mode
         kernelize(self, device=Device(type=self.device.type), mode=mode)
         self._use_kernels = True
 
diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
@@ -252,6 +252,7 @@
     requires_backends,
     torch_only_method,
 )
+from .kernel_config import KernelConfig
 from .peft_utils import (
     ADAPTER_CONFIG_NAME,
     ADAPTER_SAFE_WEIGHTS_NAME,
diff --git a/src/transformers/utils/kernel_config.py b/src/transformers/utils/kernel_config.py
@@ -0,0 +1,222 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ..utils import PushToHubMixin, is_kernels_available, is_torch_available
+
+
+if is_kernels_available():
+    from kernels import LayerRepository, Mode
+
+if is_torch_available():
+    import torch
+
+
+def infer_device(model):
+    """
+    Infers the device type from the model parameters.
+    Args:
+        model: The model instance.
+
+    Returns:
+        The device type.
+    """
+    EXAMPLE_MAPPING = """
+    {
+        "RMSNorm": {
+            "cuda":
+                "kernels-community/layer_norm:LlamaRMSNorm",
+            ...
+        },
+        ...
+    }
+    """
+    try:
+        param = next(model.parameters())
+    except StopIteration:
+        raise ValueError(
+            f"Cannot determine model device, please provide a device to the mapping. Example: {EXAMPLE_MAPPING}"
+        )
+
+    dev_type = param.device.type
+    if dev_type == "cuda":
+        # Refine based on actual platform
+        if torch.version.hip is not None:
+            return "rocm"
+
+    return dev_type
+
+
+def add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping):
+    if device not in ["cuda", "rocm", "xpu"]:
+        raise ValueError(f"Only cuda, rocm, and xpu devices supported, got: {device}")
+    repo_layer_name = repo_name.split(":")[1]
+    repo_id = repo_name.split(":")[0]
+    compatible_mapping[layer_name] = {
+        device: {
+            mode: LayerRepository(
+                repo_id=repo_id,
+                layer_name=repo_layer_name,
+            )
+        }
+    }
+
+
+class KernelConfig(PushToHubMixin):
+    """
+    Kernel configuration class. This class is used to configure the kernel mapping for a model.
+    """
+
+    def __init__(self, kernel_mapping={}):
+        self.kernel_mapping = kernel_mapping
+        self.registered_layer_names = {}
+
+    def update_kernel(self, repo_id, registered_name, layer_name, device, mode, revision=None):
+        self.kernel_mapping[registered_name] = {
+            device: {
+                mode: LayerRepository(
+                    repo_id=repo_id,
+                    layer_name=layer_name,
+                    revision=revision,
+                )
+            }
+        }
+
+    def store_registered_layer_names(self, model):
+        for name, module in model.named_modules():
+            if hasattr(module, "kernel_layer_name"):
+                self.registered_layer_names[name] = module.kernel_layer_name
+
+    def sanitize_kernel_mapping(self, model):
+        """
+        Validates the kernel_mapping to ensure that:
+        1. Each layer_name in the mapping is registered in the model (i.e., the model contains a module with a matching kernel_layer_name).
+        2. Each kernel value is either a string of the form 'org/repo:layer_name' or a dict mapping device types ("cuda", "rocm", "xpu") to such strings.
+        3. Each device key in a dict is one of "cuda", "rocm", or "xpu".
+        4. Each repo_name is a valid repository and layer name in the format 'org/repo:layer_name' (i.e., a string containing both a slash and a colon).
+
+        Args:
+            model: The model instance whose modules are checked for registered kernel_layer_name attributes.
+
+        Raises:
+            ValueError: If a layer_name is not registered in the model, if a device is not supported,
+                        or if a repo_name is not a valid 'org/repo:layer_name' string.
+        """
+        MAPPING_FORMAT = """
+        {
+            "RMSNorm":
+                "kernels-community/layer_norm:LlamaRMSNorm",
+            ...
+        },
+
+        or
+
+        {
+            "RMSNorm": {
+                "cuda":
+                    "kernels-community/layer_norm:LlamaRMSNorm",
+                "rocm":
+                    "kernels-community/layer_norm:LlamaRMSNorm",
+                ...
+            },
+            ...
+        }
+        """
+        self.store_registered_layer_names(model)
+        # Validate that the kernel mapping is a dict
+        if not isinstance(self.kernel_mapping, dict):
+            raise ValueError(
+                f"Kernel mapping must be a dict of the following format: {MAPPING_FORMAT}, got: {type(self.kernel_mapping)}"
+            )
+
+        for layer_name, kernel in self.kernel_mapping.items():
+            if layer_name not in self.registered_layer_names.values():
+                raise ValueError(
+                    f"Layer {layer_name} is not registered in the model, please register it first using register_kernel_forward_from_hub"
+                )
+
+            if isinstance(kernel, str):
+                if "/" not in kernel or ":" not in kernel:
+                    raise ValueError(
+                        f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name'), got: {kernel}"
+                    )
+
+            elif isinstance(kernel, dict):
+                for device, repo_name in kernel.items():
+                    if device not in ["cuda", "rocm", "xpu"]:
+                        raise ValueError(f"Only cuda, rocm, and xpu devices supported, got: {device}")
+
+                    if not isinstance(repo_name, str) or "/" not in repo_name or ":" not in repo_name:
+                        raise ValueError(
+                            f"Kernel mapping for '{layer_name}' must be a valid repo name with a layer name (e.g., 'org/repo:layer_name'), got: {repo_name}"
+                        )
+
+            else:
+                raise ValueError(f"Kernel mapping must follow the format: {MAPPING_FORMAT}, got: {kernel}")
+
+    def create_compatible_mapping(self, model, compile=False):
+        """
+        Transforms a simple kernel_mapping of the form:
+            {
+                "RMSNorm":
+                    "kernels-community/layer_norm:LlamaRMSNorm",
+                ...
+            },
+
+            or
+
+            {
+                "RMSNorm": {
+                    "cuda":
+                        "kernels-community/layer_norm:LlamaRMSNorm",
+                    "rocm":
+                        "kernels-community/layer_norm:LlamaRMSNorm",
+                    ...
+                },
+                ...
+            }
+
+        into a nested mapping:
+
+            {
+                "RMSNorm": {
+                    "cuda": {
+                        Mode.INFERENCE: LayerRepository(
+                            repo_id="kernels-community/layer_norm",
+                            layer_name="LlamaRMSNorm",
+                        )
+                    }
+                }
+            }
+
+        that's compatible with the kernels library.
+
+        The device is inferred from the model's parameters if not provided.
+        The Mode is inferred from the model's training state.
+        """
+        compatible_mapping = {}
+        for layer_name, kernel in self.kernel_mapping.items():
+            # Infer Mode: use Mode.TRAINING if model is training, else use Mode.INFERENCE
+            mode = Mode.TRAINING if model.training else Mode.INFERENCE
+            if compile:
+                mode = mode | Mode.TORCH_COMPILE
+
+            if isinstance(kernel, str):
+                repo_name = kernel
+                device = infer_device(model)
+                add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping)
+            elif isinstance(kernel, dict):
+                for device, repo_name in kernel.items():
+                    add_to_mapping(layer_name, device, repo_name, mode, compatible_mapping)
+
+        self.kernel_mapping = compatible_mapping

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Overview`
	`2`	`+`
	`3`	`+Kernels in transformers are used to optimize the performance of models with custom layers from the hub and very low effort.`
Original file line number	Diff line number	Diff line change
`@@ -252,6 +252,7 @@`
`252`	`252`	`requires_backends,`
`253`	`253`	`torch_only_method,`
`254`	`254`	`)`
	`255`	`+from .kernel_config import KernelConfig`
`255`	`256`	`from .peft_utils import (`
`256`	`257`	`ADAPTER_CONFIG_NAME,`
`257`	`258`	`ADAPTER_SAFE_WEIGHTS_NAME,`