keras-team
diff --git a/‎keras_nlp/models/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎keras_nlp/models/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎keras_nlp/models/deberta/deberta_backbone.py‎
Lines changed: 62 additions & 9 deletions b/‎keras_nlp/models/deberta/deberta_backbone.py‎
Lines changed: 62 additions & 9 deletions
diff --git a/‎keras_nlp/models/deberta/deberta_classifier.py‎
Lines changed: 270 additions & 0 deletions b/‎keras_nlp/models/deberta/deberta_classifier.py‎
Lines changed: 270 additions & 0 deletions
@@ -17,6 +17,7 @@
 from keras_nlp.models.bert.bert_preprocessor import BertPreprocessor
 from keras_nlp.models.bert.bert_tokenizer import BertTokenizer
 from keras_nlp.models.deberta.deberta_backbone import DebertaBackbone
+from keras_nlp.models.deberta.deberta_classifier import DebertaClassifier
 from keras_nlp.models.deberta.deberta_preprocessor import DebertaPreprocessor
 from keras_nlp.models.deberta.deberta_tokenizer import DebertaTokenizer
 from keras_nlp.models.distil_bert.distil_bert_backbone import DistilBertBackbone
 
@@ -14,14 +14,19 @@
 
 """DeBERTa backbone model."""
 
+import copy
+import os
+
 import tensorflow as tf
 from tensorflow import keras
 
+from keras_nlp.models.deberta.deberta_presets import backbone_presets
 from keras_nlp.models.deberta.disentangled_attention_encoder import (
     DisentangledAttentionEncoder,
 )
 from keras_nlp.models.deberta.relative_embedding import RelativeEmbedding
 from keras_nlp.utils.python_utils import classproperty
+from keras_nlp.utils.python_utils import format_docstring
 
 
 def deberta_kernel_initializer(stddev=0.02):
@@ -54,13 +59,12 @@ class DebertaBackbone(keras.Model):
         hidden_dim: int. The size of the transformer encoding layer.
         intermediate_dim: int. The output dimension of the first Dense layer in
             a two-layer feedforward network for each transformer.
-        dropout: float, defaults to 0.1. Dropout probability for the
-            DeBERTa model.
-        max_sequence_length: int, defaults to 512. The maximum sequence length
-            this encoder can consume. The sequence length of the input must be
-            less than `max_sequence_length`.
-        bucket_size: int, defaults to 256. The size of the relative position
-            buckets. Generally equal to `max_sequence_length // 2`.
+        dropout: float. Dropout probability for the DeBERTa model.
+        max_sequence_length: int. The maximum sequence length this encoder can
+            consume. The sequence length of the input must be less than
+            `max_sequence_length`.
+        bucket_size: int. The size of the relative position buckets. Generally
+            equal to `max_sequence_length // 2`.
 
     Example usage:
     ```python
@@ -172,6 +176,7 @@ def __init__(
         self.dropout = dropout
         self.max_sequence_length = max_sequence_length
         self.bucket_size = bucket_size
+        self.start_token_index = 0
 
     def get_config(self):
         return {
@@ -193,13 +198,61 @@ def from_config(cls, config):
 
     @classproperty
     def presets(cls):
-        return {}
+        return copy.deepcopy(backbone_presets)
 
     @classmethod
+    @format_docstring(names=", ".join(backbone_presets))
     def from_preset(
         cls,
         preset,
         load_weights=True,
         **kwargs,
     ):
-        raise NotImplementedError
+        """Instantiate DeBERTa model from preset architecture and weights.
+
+        Args:
+            preset: string. Must be one of {{names}}.
+            load_weights: Whether to load pre-trained weights into model.
+                Defaults to `True`.
+
+        Examples:
+        ```python
+        input_data = {
+            "token_ids": tf.ones(shape=(1, 12), dtype=tf.int64),
+            "padding_mask": tf.constant(
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], shape=(1, 12)
+            ),
+        }
+
+        # Load architecture and weights from preset
+        model = keras_nlp.models.DebertaBackbone.from_preset("deberta_base")
+        output = model(input_data)
+
+        # Load randomly initialized model from preset architecture
+        model = keras_nlp.models.DebertaBackbone.from_preset(
+            "deberta_base", load_weights=False
+        )
+        output = model(input_data)
+        ```
+        """
+        if preset not in cls.presets:
+            raise ValueError(
+                "`preset` must be one of "
+                f"""{", ".join(cls.presets)}. Received: {preset}."""
+            )
+        metadata = cls.presets[preset]
+        config = metadata["config"]
+        model = cls.from_config({**config, **kwargs})
+
+        if not load_weights:
+            return model
+
+        weights = keras.utils.get_file(
+            "model.h5",
+            metadata["weights_url"],
+            cache_subdir=os.path.join("models", preset),
+            file_hash=metadata["weights_hash"],
+        )
+
+        model.load_weights(weights)
+        return model
@@ -0,0 +1,270 @@
+# Copyright 2022 The KerasNLP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""DeBERTa classification model."""
+
+import copy
+
+from tensorflow import keras
+
+from keras_nlp.models.deberta.deberta_backbone import DebertaBackbone
+from keras_nlp.models.deberta.deberta_backbone import deberta_kernel_initializer
+from keras_nlp.models.deberta.deberta_preprocessor import DebertaPreprocessor
+from keras_nlp.models.deberta.deberta_presets import backbone_presets
+from keras_nlp.utils.pipeline_model import PipelineModel
+from keras_nlp.utils.python_utils import classproperty
+from keras_nlp.utils.python_utils import format_docstring
+
+
+@keras.utils.register_keras_serializable(package="keras_nlp")
+class DebertaClassifier(PipelineModel):
+    """An end-to-end DeBERTa model for classification tasks.
+
+    This model attaches a classification head to a
+    `keras_nlp.model.DebertaBackbone` model, mapping from the backbone
+    outputs to logit output suitable for a classification task. For usage of
+    this model with pre-trained weights, see the `from_preset()` method.
+
+    This model can optionally be configured with a `preprocessor` layer, in
+    which case it will automatically apply preprocessing to raw inputs during
+    `fit()`, `predict()`, and `evaluate()`. This is done by default when
+    creating the model with `from_preset()`.
+
+    Disclaimer: Pre-trained models are provided on an "as is" basis, without
+    warranties or conditions of any kind.
+
+    Args:
+        backbone: A `keras_nlp.models.Deberta` instance.
+        num_classes: int. Number of classes to predict.
+        hidden_dim: int. The size of the pooler layer.
+        dropout: float. Dropout probability applied to the pooled output. For
+            the second dropout layer, `backbone.dropout` is used.
+        preprocessor: A `keras_nlp.models.DebertaPreprocessor` or `None`. If
+            `None`, this model will not apply preprocessing, and inputs should
+            be preprocessed before calling the model.
+
+    Example usage:
+    ```python
+    preprocessed_features = {
+        "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
+        "padding_mask": tf.constant(
+            [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(1, 12)),
+    }
+    labels = [0, 3]
+
+    # Randomly initialized DeBERTa encoder
+    backbone = keras_nlp.models.DebertaBackbone(
+        vocabulary_size=128100,
+        num_layers=12,
+        num_heads=12,
+        hidden_dim=768,
+        intermediate_dim=3072,
+        max_sequence_length=12,
+        bucket_size=6,
+    )
+
+    # Create a DeBERTa classifier and fit your data.
+    classifier = keras_nlp.models.DebertaClassifier(
+        backbone,
+        num_classes=4,
+        preprocessor=None,
+    )
+    classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
+
+    # Access backbone programatically (e.g., to change `trainable`)
+    classifier.backbone.trainable = False
+    ```
+    """
+
+    def __init__(
+        self,
+        backbone,
+        num_classes=2,
+        hidden_dim=None,
+        dropout=0.0,
+        preprocessor=None,
+        **kwargs,
+    ):
+        inputs = backbone.input
+        if hidden_dim is None:
+            hidden_dim = backbone.hidden_dim
+
+        x = backbone(inputs)[:, backbone.start_token_index, :]
+        x = keras.layers.Dropout(dropout, name="pooled_dropout")(x)
+        x = keras.layers.Dense(
+            hidden_dim,
+            activation=lambda x: keras.activations.gelu(x, approximate=False),
+            name="pooled_dense",
+        )(x)
+        x = keras.layers.Dropout(backbone.dropout, name="classifier_dropout")(x)
+        outputs = keras.layers.Dense(
+            num_classes,
+            kernel_initializer=deberta_kernel_initializer(),
+            name="logits",
+        )(x)
+
+        # Instantiate using Functional API Model constructor
+        super().__init__(
+            inputs=inputs,
+            outputs=outputs,
+            include_preprocessing=preprocessor is not None,
+            **kwargs,
+        )
+        # All references to `self` below this line
+        self._backbone = backbone
+        self._preprocessor = preprocessor
+        self.num_classes = num_classes
+        self.hidden_dim = hidden_dim
+        self.dropout = dropout
+
+    def preprocess_samples(self, x, y=None, sample_weight=None):
+        return self.preprocessor(x, y=y, sample_weight=sample_weight)
+
+    @property
+    def backbone(self):
+        """A `keras_nlp.models.DebertaBackbone` submodel."""
+        return self._backbone
+
+    @property
+    def preprocessor(self):
+        """A `keras_nlp.models.DebertaPreprocessor` preprocessing layer."""
+        return self._preprocessor
+
+    def get_config(self):
+        return {
+            "backbone": keras.layers.serialize(self.backbone),
+            "preprocessor": keras.layers.serialize(self.preprocessor),
+            "num_classes": self.num_classes,
+            "hidden_dim": self.hidden_dim,
+            "dropout": self.dropout,
+            "name": self.name,
+            "trainable": self.trainable,
+        }
+
+    @classmethod
+    def from_config(cls, config):
+        if "backbone" in config and isinstance(config["backbone"], dict):
+            config["backbone"] = keras.layers.deserialize(config["backbone"])
+        if "preprocessor" in config and isinstance(
+            config["preprocessor"], dict
+        ):
+            config["preprocessor"] = keras.layers.deserialize(
+                config["preprocessor"]
+            )
+        return cls(**config)
+
+    @classproperty
+    def presets(cls):
+        return copy.deepcopy(backbone_presets)
+
+    @classmethod
+    @format_docstring(names=", ".join(backbone_presets))
+    def from_preset(
+        cls,
+        preset,
+        load_weights=True,
+        **kwargs,
+    ):
+        """Create a classification model from a preset architecture and weights.
+
+        By default, this method will automatically create a `preprocessor`
+        layer to preprocess raw inputs during `fit()`, `predict()`, and
+        `evaluate()`. If you would like to disable this behavior, pass
+        `preprocessor=None`.
+
+        Args:
+            preset: string. Must be one of {{names}}.
+            load_weights: Whether to load pre-trained weights into model.
+                Defaults to `True`.
+
+        Examples:
+
+        Raw string inputs.
+        ```python
+        # Create a dataset with raw string features in an `(x, y)` format.
+        features = ["The quick brown fox jumped.", "I forgot my homework."]
+        labels = [0, 3]
+
+        # Create a DebertaClassifier and fit your data.
+        classifier = keras_nlp.models.DebertaClassifier.from_preset(
+            "deberta_base",
+            num_classes=4,
+        )
+        classifier.compile(
+            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        )
+        classifier.fit(x=features, y=labels, batch_size=2)
+        ```
+
+        Raw string inputs with customized preprocessing.
+        ```python
+        # Create a dataset with raw string features in an `(x, y)` format.
+        features = ["The quick brown fox jumped.", "I forgot my homework."]
+        labels = [0, 3]
+
+        # Use a shorter sequence length.
+        preprocessor = keras_nlp.models.DebertaPreprocessor.from_preset(
+            "deberta_base",
+            sequence_length=128,
+        )
+
+        # Create a DebertaClassifier and fit your data.
+        classifier = keras_nlp.models.DebertaClassifier.from_preset(
+            "deberta_base",
+            num_classes=4,
+            preprocessor=preprocessor,
+        )
+        classifier.compile(
+            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        )
+        classifier.fit(x=features, y=labels, batch_size=2)
+        ```
+
+        Preprocessed inputs.
+        ```python
+        # Create a dataset with preprocessed features in an `(x, y)` format.
+        preprocessed_features = {
+            "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64),
+            "padding_mask": tf.constant(
+                [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12)
+            ),
+        }
+        labels = [0, 3]
+
+        # Create a DebertaClassifier and fit your data.
+        classifier = keras_nlp.models.DebertaClassifier.from_preset(
+            "deberta_base",
+            num_classes=4,
+            preprocessor=None,
+        )
+        classifier.compile(
+            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        )
+        classifier.fit(x=preprocessed_features, y=labels, batch_size=2)
+        ```
+        """
+        if "preprocessor" not in kwargs:
+            kwargs["preprocessor"] = DebertaPreprocessor.from_preset(preset)
+
+        # Check if preset is backbone-only model
+        if preset in DebertaBackbone.presets:
+            backbone = DebertaBackbone.from_preset(preset, load_weights)
+            return cls(backbone, **kwargs)
+
+        # Otherwise must be one of class presets
+        # Currently no classifier-level presets, so must throw.
+        if preset not in cls.presets:
+            raise ValueError(
+                "`preset` must be one of "
+                f"""{", ".join(cls.presets)}. Received: {preset}."""
+            )