Skip to content

Commit 30cb703

Browse files
Adding an AlbertMaskedLM task model and preprocessor (#725)
* albert lm init commit * fixing preprocessor tests * fixing the main model test + formatting + docstrings * fixing bug in masked lm head * fixing none condition in masked_lm_head_test * fixing formatting * fixing test_valid_call_with_embedding_weights * minor docstring changes * Minor fixes * addressing some comments * working on fixing unit tests for masking * working on fixing unit tests for masking * adding mask to preprocessor + fixing tests * code format * fixing classifier test failures * fixing formatting --------- Co-authored-by: Matt Watson <[email protected]>
1 parent 6b1e37d commit 30cb703

11 files changed

+701
-20
lines changed

keras_nlp/layers/masked_lm_head.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,10 @@ def __init__(
140140
self.vocabulary_size = shape[0]
141141

142142
def build(self, input_shapes):
143-
feature_size = input_shapes[-1]
143+
if self.embedding_weights is not None:
144+
feature_size = self.embedding_weights.shape[-1]
145+
else:
146+
feature_size = input_shapes[-1]
144147

145148
self._dense = keras.layers.Dense(
146149
feature_size,

keras_nlp/layers/masked_lm_head_test.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,17 @@ def test_valid_call_with_embedding_weights(self):
4646
embedding_weights=embedding.embeddings,
4747
activation="softmax",
4848
)
49-
encoded_tokens = keras.Input(shape=(10, 16))
49+
# Use a difference "hidden dim" for the model than "embedding dim", we
50+
# need to support this in the layer.
51+
sequence = keras.Input(shape=(10, 32))
5052
positions = keras.Input(shape=(5,), dtype="int32")
51-
outputs = head(encoded_tokens, mask_positions=positions)
52-
model = keras.Model((encoded_tokens, positions), outputs)
53-
token_data = tf.random.uniform(shape=(4, 10, 16))
53+
outputs = head(sequence, mask_positions=positions)
54+
model = keras.Model((sequence, positions), outputs)
55+
sequence_data = tf.random.uniform(shape=(4, 10, 32))
5456
position_data = tf.random.uniform(
5557
shape=(4, 5), maxval=10, dtype="int32"
5658
)
57-
model((token_data, position_data))
59+
model((sequence_data, position_data))
5860

5961
def test_get_config_and_from_config(self):
6062
head = masked_lm_head.MaskedLMHead(

keras_nlp/models/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
# limitations under the License.
1414

1515
from keras_nlp.models.albert.albert_backbone import AlbertBackbone
16+
from keras_nlp.models.albert.albert_masked_lm import AlbertMaskedLM
17+
from keras_nlp.models.albert.albert_masked_lm_preprocessor import (
18+
AlbertMaskedLMPreprocessor,
19+
)
1620
from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
1721
from keras_nlp.models.albert.albert_tokenizer import AlbertTokenizer
1822
from keras_nlp.models.bart.bart_backbone import BartBackbone

keras_nlp/models/albert/albert_classifier_test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
"""Tests for BERT classification model."""
14+
"""Tests for ALBERT classification model."""
1515

1616
import io
1717
import os
@@ -57,6 +57,7 @@ def setUp(self):
5757
unk_piece="<unk>",
5858
bos_piece="[CLS]",
5959
eos_piece="[SEP]",
60+
user_defined_symbols="[MASK]",
6061
)
6162
self.proto = bytes_io.getvalue()
6263

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Copyright 2023 The KerasNLP Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""ALBERT masked LM model."""
16+
17+
import copy
18+
19+
from tensorflow import keras
20+
21+
from keras_nlp.layers.masked_lm_head import MaskedLMHead
22+
from keras_nlp.models.albert.albert_backbone import AlbertBackbone
23+
from keras_nlp.models.albert.albert_backbone import albert_kernel_initializer
24+
from keras_nlp.models.albert.albert_masked_lm_preprocessor import (
25+
AlbertMaskedLMPreprocessor,
26+
)
27+
from keras_nlp.models.albert.albert_presets import backbone_presets
28+
from keras_nlp.models.task import Task
29+
from keras_nlp.utils.python_utils import classproperty
30+
31+
32+
@keras.utils.register_keras_serializable(package="keras_nlp")
33+
class AlbertMaskedLM(Task):
34+
"""An end-to-end ALBERT model for the masked language modeling task.
35+
36+
This model will train ALBERT on a masked language modeling task.
37+
The model will predict labels for a number of masked tokens in the
38+
input data. For usage of this model with pre-trained weights, see the
39+
`from_preset()` method.
40+
41+
This model can optionally be configured with a `preprocessor` layer, in
42+
which case inputs can be raw string features during `fit()`, `predict()`,
43+
and `evaluate()`. Inputs will be tokenized and dynamically masked during
44+
training and evaluation. This is done by default when creating the model
45+
with `from_preset()`.
46+
47+
Disclaimer: Pre-trained models are provided on an "as is" basis, without
48+
warranties or conditions of any kind.
49+
50+
Args:
51+
backbone: A `keras_nlp.models.AlbertBackbone` instance.
52+
preprocessor: A `keras_nlp.models.AlbertMaskedLMPreprocessor` or
53+
`None`. If `None`, this model will not apply preprocessing, and
54+
inputs should be preprocessed before calling the model.
55+
56+
Example usage:
57+
58+
Raw string inputs and pretrained backbone.
59+
```python
60+
# Create a dataset with raw string features. Labels are inferred.
61+
features = ["The quick brown fox jumped.", "I forgot my homework."]
62+
63+
# Create a AlbertMaskedLM with a pretrained backbone and further train
64+
# on an MLM task.
65+
masked_lm = keras_nlp.models.AlbertMaskedLM.from_preset(
66+
"albert_base_en_uncased",
67+
)
68+
masked_lm.compile(
69+
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
70+
)
71+
masked_lm.fit(x=features, batch_size=2)
72+
```
73+
74+
Preprocessed inputs and custom backbone.
75+
```python
76+
# Create a preprocessed dataset where 0 is the mask token.
77+
preprocessed_features = {
78+
"segment_ids": tf.constant(
79+
[[1, 0, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8)
80+
),
81+
"token_ids": tf.constant(
82+
[[1, 2, 0, 4, 0, 6, 7, 8]] * 2, shape=(2, 8)
83+
),
84+
"padding_mask": tf.constant(
85+
[[1, 1, 1, 1, 1, 1, 1, 1]] * 2, shape=(2, 8)
86+
),
87+
"mask_positions": tf.constant([[2, 4]] * 2, shape=(2, 2))
88+
}
89+
# Labels are the original masked values.
90+
labels = [[3, 5]] * 2
91+
92+
# Randomly initialize a ALBERT encoder
93+
backbone = keras_nlp.models.AlbertBackbone(
94+
vocabulary_size=1000,
95+
num_layers=2,
96+
num_heads=2,
97+
embedding_dim=64,
98+
hidden_dim=64,
99+
intermediate_dim=128,
100+
max_sequence_length=128)
101+
102+
# Create a ALBERT masked LM and fit the data.
103+
masked_lm = keras_nlp.models.AlbertMaskedLM(
104+
backbone,
105+
preprocessor=None,
106+
)
107+
masked_lm.compile(
108+
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
109+
jit_compile=True
110+
)
111+
masked_lm.fit(x=preprocessed_features, y=labels, batch_size=2)
112+
```
113+
"""
114+
115+
def __init__(self, backbone, preprocessor=None, **kwargs):
116+
inputs = {
117+
**backbone.input,
118+
"mask_positions": keras.Input(
119+
shape=(None,), dtype="int32", name="mask_positions"
120+
),
121+
}
122+
123+
backbone_outputs = backbone(backbone.input)
124+
outputs = MaskedLMHead(
125+
vocabulary_size=backbone.vocabulary_size,
126+
embedding_weights=backbone.token_embedding.embeddings,
127+
intermediate_activation=lambda x: keras.activations.gelu(
128+
x, approximate=True
129+
),
130+
kernel_initializer=albert_kernel_initializer(),
131+
name="mlm_head",
132+
)(backbone_outputs["sequence_output"], inputs["mask_positions"])
133+
134+
super().__init__(
135+
inputs=inputs,
136+
outputs=outputs,
137+
include_preprocessing=preprocessor is not None,
138+
**kwargs
139+
)
140+
141+
self.backbone = backbone
142+
self.preprocessor = preprocessor
143+
144+
@classproperty
145+
def backbone_cls(cls):
146+
return AlbertBackbone
147+
148+
@classproperty
149+
def preprocessor_cls(cls):
150+
return AlbertMaskedLMPreprocessor
151+
152+
@classproperty
153+
def presets(cls):
154+
return copy.deepcopy(backbone_presets)

0 commit comments

Comments
 (0)