Skip to content

Commit c7f11c7

Browse files
authored
KerasNLP 0.4 API documentation (#1157)
* KerasNLP 0.4 API documentation * Address comments * Fixes * Remove deberta_v3 for now * Go back to pypi keras-nlp * Preset renames
1 parent 2481e80 commit c7f11c7

File tree

8 files changed

+389
-36
lines changed

8 files changed

+389
-36
lines changed

scripts/autogen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
"keras": "https://github.com/keras-team/keras/tree/v2.11.0/",
4343
"keras_tuner": "https://github.com/keras-team/keras-tuner/tree/1.1.3/",
4444
"keras_cv": "https://github.com/keras-team/keras-cv/tree/v0.3.4/",
45-
"keras_nlp": "https://github.com/keras-team/keras-nlp/tree/v0.3.1/",
45+
"keras_nlp": "https://github.com/keras-team/keras-nlp/tree/v0.4.0/",
4646
}
4747

4848

scripts/nlp_api_master.py

Lines changed: 248 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,179 @@
1+
MODELS_MASTER = {
2+
"path": "models/",
3+
"title": "Models",
4+
"toc": True,
5+
"children": [
6+
{
7+
"path": "bert/",
8+
"title": "Bert",
9+
"toc": True,
10+
"children": [
11+
{
12+
"path": "bert_tokenizer",
13+
"title": "BertTokenizer",
14+
"generate": [
15+
"keras_nlp.models.BertTokenizer",
16+
"keras_nlp.models.BertTokenizer.from_preset",
17+
],
18+
},
19+
{
20+
"path": "bert_preprocessor",
21+
"title": "BertPreprocessor layer",
22+
"generate": [
23+
"keras_nlp.models.BertPreprocessor",
24+
"keras_nlp.models.BertPreprocessor.from_preset",
25+
"keras_nlp.models.BertPreprocessor.tokenizer",
26+
],
27+
},
28+
{
29+
"path": "bert_backbone",
30+
"title": "BertBackbone model",
31+
"generate": [
32+
"keras_nlp.models.BertBackbone",
33+
"keras_nlp.models.BertBackbone.from_preset",
34+
],
35+
},
36+
{
37+
"path": "bert_classifier",
38+
"title": "BertClassifier model",
39+
"generate": [
40+
"keras_nlp.models.BertClassifier",
41+
"keras_nlp.models.BertClassifier.from_preset",
42+
"keras_nlp.models.BertClassifier.backbone",
43+
"keras_nlp.models.BertClassifier.preprocessor",
44+
],
45+
},
46+
],
47+
},
48+
{
49+
"path": "distil_bert/",
50+
"title": "DistilBert",
51+
"toc": True,
52+
"children": [
53+
{
54+
"path": "distil_bert_tokenizer",
55+
"title": "DistilBertTokenizer",
56+
"generate": [
57+
"keras_nlp.models.DistilBertTokenizer",
58+
"keras_nlp.models.DistilBertTokenizer.from_preset",
59+
],
60+
},
61+
{
62+
"path": "distil_bert_preprocessor",
63+
"title": "DistilBertPreprocessor layer",
64+
"generate": [
65+
"keras_nlp.models.DistilBertPreprocessor",
66+
"keras_nlp.models.DistilBertPreprocessor.from_preset",
67+
"keras_nlp.models.DistilBertPreprocessor.tokenizer",
68+
],
69+
},
70+
{
71+
"path": "distil_bert_backbone",
72+
"title": "DistilBertBackbone model",
73+
"generate": [
74+
"keras_nlp.models.DistilBertBackbone",
75+
"keras_nlp.models.DistilBertBackbone.from_preset",
76+
],
77+
},
78+
{
79+
"path": "distil_bert_classifier",
80+
"title": "DistilBertClassifier model",
81+
"generate": [
82+
"keras_nlp.models.DistilBertClassifier",
83+
"keras_nlp.models.DistilBertClassifier.from_preset",
84+
"keras_nlp.models.DistilBertClassifier.backbone",
85+
"keras_nlp.models.DistilBertClassifier.preprocessor",
86+
],
87+
},
88+
],
89+
},
90+
{
91+
"path": "roberta/",
92+
"title": "Roberta",
93+
"toc": True,
94+
"children": [
95+
{
96+
"path": "roberta_tokenizer",
97+
"title": "RobertaTokenizer",
98+
"generate": [
99+
"keras_nlp.models.RobertaTokenizer",
100+
"keras_nlp.models.RobertaTokenizer.from_preset",
101+
],
102+
},
103+
{
104+
"path": "roberta_preprocessor",
105+
"title": "RobertaPreprocessor layer",
106+
"generate": [
107+
"keras_nlp.models.RobertaPreprocessor",
108+
"keras_nlp.models.RobertaPreprocessor.from_preset",
109+
"keras_nlp.models.RobertaPreprocessor.tokenizer",
110+
],
111+
},
112+
{
113+
"path": "roberta_backbone",
114+
"title": "RobertaBackbone model",
115+
"generate": [
116+
"keras_nlp.models.RobertaBackbone",
117+
"keras_nlp.models.RobertaBackbone.from_preset",
118+
],
119+
},
120+
{
121+
"path": "roberta_classifier",
122+
"title": "RobertaClassifier model",
123+
"generate": [
124+
"keras_nlp.models.RobertaClassifier",
125+
"keras_nlp.models.RobertaClassifier.from_preset",
126+
"keras_nlp.models.RobertaClassifier.backbone",
127+
"keras_nlp.models.RobertaClassifier.preprocessor",
128+
],
129+
},
130+
],
131+
},
132+
{
133+
"path": "xlm_roberta/",
134+
"title": "XLMRoberta",
135+
"toc": True,
136+
"children": [
137+
{
138+
"path": "xlm_roberta_tokenizer",
139+
"title": "XLMRobertaTokenizer",
140+
"generate": [
141+
"keras_nlp.models.XLMRobertaTokenizer",
142+
"keras_nlp.models.XLMRobertaTokenizer.from_preset",
143+
],
144+
},
145+
{
146+
"path": "xlm_roberta_preprocessor",
147+
"title": "XLMRobertaPreprocessor layer",
148+
"generate": [
149+
"keras_nlp.models.XLMRobertaPreprocessor",
150+
"keras_nlp.models.XLMRobertaPreprocessor.from_preset",
151+
"keras_nlp.models.XLMRobertaPreprocessor.tokenizer",
152+
],
153+
},
154+
{
155+
"path": "xlm_roberta_backbone",
156+
"title": "XLMRobertaBackbone model",
157+
"generate": [
158+
"keras_nlp.models.XLMRobertaBackbone",
159+
"keras_nlp.models.XLMRobertaBackbone.from_preset",
160+
],
161+
},
162+
{
163+
"path": "xlm_roberta_classifier",
164+
"title": "XLMRobertaClassifier model",
165+
"generate": [
166+
"keras_nlp.models.XLMRobertaClassifier",
167+
"keras_nlp.models.XLMRobertaClassifier.from_preset",
168+
"keras_nlp.models.XLMRobertaClassifier.backbone",
169+
"keras_nlp.models.XLMRobertaClassifier.preprocessor",
170+
],
171+
},
172+
],
173+
},
174+
],
175+
}
176+
1177
TOKENIZERS_MASTER = {
2178
"path": "tokenizers/",
3179
"title": "Tokenizers",
@@ -69,24 +245,67 @@
69245
],
70246
},
71247
{
72-
"path": "unicode_character_tokenizer",
73-
"title": "UnicodeCharacterTokenizer",
248+
"path": "unicode_codepoint_tokenizer",
249+
"title": "UnicodeCodepointTokenizer",
74250
"generate": [
75-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer",
76-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.tokenize",
77-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.detokenize",
78-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.get_vocabulary",
79-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.vocabulary_size",
80-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.token_to_id",
81-
"keras_nlp.tokenizers.UnicodeCharacterTokenizer.id_to_token",
251+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer",
252+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.tokenize",
253+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.detokenize",
254+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.get_vocabulary",
255+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.vocabulary_size",
256+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.token_to_id",
257+
"keras_nlp.tokenizers.UnicodeCodepointTokenizer.id_to_token",
82258
],
83259
},
260+
{
261+
"path": "compute_word_piece_vocabulary",
262+
"title": "compute_word_piece_vocabulary function",
263+
"generate": ["keras_nlp.tokenizers.compute_word_piece_vocabulary"],
264+
},
265+
{
266+
"path": "compute_sentence_piece_proto",
267+
"title": "compute_sentence_piece_proto function",
268+
"generate": ["keras_nlp.tokenizers.compute_sentence_piece_proto"],
269+
},
84270
],
85271
}
86272

87-
LAYERS_MASTER = {
88-
"path": "layers/",
89-
"title": "Layers",
273+
PREPROCESSING_LAYERS_MASTER = {
274+
"path": "preprocessing_layers/",
275+
"title": "Preprocessing Layers",
276+
"toc": True,
277+
"children": [
278+
{
279+
"path": "start_end_packer",
280+
"title": "StartEndPacker layer",
281+
"generate": ["keras_nlp.layers.StartEndPacker"],
282+
},
283+
{
284+
"path": "multi_segment_packer",
285+
"title": "MultiSegmentPacker layer",
286+
"generate": ["keras_nlp.layers.MultiSegmentPacker"],
287+
},
288+
{
289+
"path": "random_swap",
290+
"title": "RandomSwap layer",
291+
"generate": ["keras_nlp.layers.RandomSwap"],
292+
},
293+
{
294+
"path": "random_deletion",
295+
"title": "RandomDeletion layer",
296+
"generate": ["keras_nlp.layers.RandomDeletion"],
297+
},
298+
{
299+
"path": "masked_lm_mask_generator",
300+
"title": "MaskedLMMaskGenerator layer",
301+
"generate": ["keras_nlp.layers.MaskedLMMaskGenerator"],
302+
},
303+
],
304+
}
305+
306+
MODELING_LAYERS_MASTER = {
307+
"path": "modeling_layers/",
308+
"title": "Modeling Layers",
90309
"toc": True,
91310
"children": [
92311
{
@@ -126,24 +345,9 @@
126345
"generate": ["keras_nlp.layers.TokenAndPositionEmbedding"],
127346
},
128347
{
129-
"path": "mlm_mask_generator",
130-
"title": "MLMMaskGenerator layer",
131-
"generate": ["keras_nlp.layers.MLMMaskGenerator"],
132-
},
133-
{
134-
"path": "mlm_head",
135-
"title": "MLMHead layer",
136-
"generate": ["keras_nlp.layers.MLMHead"],
137-
},
138-
{
139-
"path": "start_end_packer",
140-
"title": "StartEndPacker layer",
141-
"generate": ["keras_nlp.layers.StartEndPacker"],
142-
},
143-
{
144-
"path": "multi_segment_packer",
145-
"title": "MultiSegmentPacker layer",
146-
"generate": ["keras_nlp.layers.MultiSegmentPacker"],
348+
"path": "masked_lm_head",
349+
"title": "MaskedLMHead layer",
350+
"generate": ["keras_nlp.layers.MaskedLMHead"],
147351
},
148352
],
149353
}
@@ -169,6 +373,17 @@
169373
"title": "RougeN metric",
170374
"generate": ["keras_nlp.metrics.RougeN"],
171375
},
376+
{
377+
"path": "bleu",
378+
"title": "Bleu metric",
379+
"generate": ["keras_nlp.metrics.Bleu"],
380+
},
381+
{
382+
"path": "edit_distance",
383+
"title": "EditDistance metric",
384+
"generate": ["keras_nlp.metrics.EditDistance"],
385+
},
386+
172387
],
173388
}
174389

@@ -210,8 +425,10 @@
210425
"title": "KerasNLP",
211426
"toc": True,
212427
"children": [
428+
MODELS_MASTER,
213429
TOKENIZERS_MASTER,
214-
LAYERS_MASTER,
430+
PREPROCESSING_LAYERS_MASTER,
431+
MODELING_LAYERS_MASTER,
215432
METRICS_MASTER,
216433
UTILS_MASTER,
217434
],

templates/api/keras_nlp/index.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# KerasNLP
22

3-
KerasNLP is a toolbox of modular building blocks (layers, metrics, etc.) that
4-
NLP engineers can leverage to develop
5-
production-grade, state-of-the-art training and inference pipelines for common NLP workflows.
3+
KerasNLP is a toolbox of modular building blocks ranging from pretrained
4+
state-of-the-art models, to low-level Transformer Encoder layers. For an
5+
introduction to the library see the [KerasNLP home page](/keras_nlp). For a
6+
high-level introduction to the API see our
7+
[getting started guide](guides/keras_nlp/getting_started/).
68

79
{{toc}}
8-
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# BERT
2+
3+
Models, tokenizers, and preprocessing layers for BERT,
4+
as described in ["BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805).
5+
6+
For a full list of available **presets**, see the
7+
[models page](/api/keras_nlp/models).
8+
9+
{{toc}}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# DistilBERT
2+
3+
Models, tokenizers, and preprocessing layers for DistilBERT,
4+
as described in ["DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter"](https://arxiv.org/abs/1910.01108).
5+
6+
For a full list of available **presets**, see the
7+
[models page](/api/keras_nlp/models).
8+
9+
{{toc}}

0 commit comments

Comments
 (0)