Skip to content

Commit 1396724

Browse files
committed
pre-processing layers vignette tweaks
1 parent 97779e9 commit 1396724

File tree

1 file changed

+16
-67
lines changed

1 file changed

+16
-67
lines changed

vignettes/new-guides/preprocessing_layers.Rmd

Lines changed: 16 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ data <- rbind(c(0.1, 0.2, 0.3),
111111
c(1.5, 1.6, 1.7))
112112
layer <- layer_normalization()
113113
adapt(layer, data)
114-
normalized_data <- layer(data)
114+
normalized_data <- as.array(layer(data))
115115
116-
sprintf("Features mean: %.2f", mean(as.array(normalized_data)))
117-
sprintf("Features std: %.2f", sd(as.array(normalized_data)))
116+
sprintf("Features mean: %.2f", mean(normalized_data))
117+
sprintf("Features std: %.2f", sd(normalized_data))
118118
```
119119

120120

@@ -186,7 +186,7 @@ augmentation layers.
186186
**Option 2:** apply it to your `tf.data.Dataset`, so as to obtain a dataset that yields
187187
batches of preprocessed data, like this:
188188

189-
```{r}
189+
```{r, eval = FALSE}
190190
library(tfdatasets)
191191
dataset <- ... # define dataset
192192
dataset <- dataset %>%
@@ -198,7 +198,7 @@ will be buffered before going into the model. In addition, if you call
198198
`tfdatasets::dataset_prefetch()` on your dataset, the preprocessing will happen
199199
efficiently in parallel with training:
200200

201-
```{r}
201+
```{r, eval = FALSE}
202202
dataset <- dataset %>%
203203
dataset_map(function(x, y) list(preprocessing_layer(x), y)) %>%
204204
dataset_prefetch()
@@ -236,7 +236,7 @@ you can export an inference model that packages the preprocessing.
236236
Simply instantiate a new model that chains
237237
your preprocessing layers and your training model:
238238

239-
```{r}
239+
```{r, eval = FALSE}
240240
input <- layer_input(shape = input_shape)
241241
output <- input %>%
242242
preprocessing_layer(input) %>%
@@ -451,7 +451,7 @@ train_dataset <- train_dataset %>%
451451
dataset_map(~list(text_vectorizer(.x), .y))
452452
453453
# Train the model on the int sequences
454-
cat("\nTraining model...\n")
454+
cat("Training model...\n")
455455
model %>%
456456
compile(optimizer = "rmsprop", loss = "mse") %>%
457457
fit(train_dataset)
@@ -465,10 +465,10 @@ output <- input %>%
465465
end_to_end_model <- keras_model(input, output)
466466
467467
# Call the end-to-end model on test data (which includes unknown tokens)
468-
cat("\nCalling end-to-end model on test string...\n")
468+
cat("Calling end-to-end model on test string...\n")
469469
test_data <- tf$constant(matrix("The one the other will absorb"))
470470
test_output <- end_to_end_model(test_data)
471-
cat("Model output:", format(test_output), "\n")
471+
cat("Model output:", as.array(test_output), "\n")
472472
```
473473

474474
You can see the `layer_text_vectorization()` layer in action, combined with an `Embedding` mode,
@@ -524,7 +524,7 @@ train_dataset <- train_dataset %>%
524524
dataset_map(~list(text_vectorizer(.x), .y))
525525
526526
# Train the model on the int sequences
527-
cat("\nTraining model...\n")
527+
cat("Training model...\n")
528528
model %>%
529529
compile(optimizer="rmsprop", loss="mse") %>%
530530
fit(train_dataset)
@@ -539,68 +539,17 @@ output <- input %>%
539539
end_to_end_model = keras_model(input, output)
540540
541541
# Call the end-to-end model on test data (which includes unknown tokens)
542-
cat("\nCalling end-to-end model on test string...\n")
542+
cat("Calling end-to-end model on test string...\n")
543543
test_data <- tf$constant(matrix("The one the other will absorb"))
544544
test_output <- end_to_end_model(test_data)
545-
cat("Model output:", format(test_output), "\n")
545+
cat("Model output: "); print(test_output); cat("\n")
546546
```
547547

548548

549549

550550
### Encoding text as a dense matrix of ngrams with TF-IDF weighting
551551

552-
This is an alternative way of preprocessing text before passing it to a `Dense` layer.
553-
554-
```python
555-
# Define some text data to adapt the layer
556-
adapt_data = tf.constant(
557-
[
558-
"The Brain is wider than the Sky",
559-
"For put them side by side",
560-
"The one the other will contain",
561-
"With ease and You beside",
562-
]
563-
)
564-
# Instantiate layer_text_vectorization() with "tf-idf" output_mode
565-
# (multi-hot with TF-IDF weighting) and ngrams=2 (index all bigrams)
566-
text_vectorizer = layers.layer_text_vectorization()(output_mode="tf-idf", ngrams=2)
567-
# Index the bigrams and learn the TF-IDF weights via `adapt()`
568-
text_vectorizer.adapt(adapt_data)
569-
570-
# Try out the layer
571-
print(
572-
"Encoded text:\n", text_vectorizer(["The Brain is deeper than the sea"]).numpy(),
573-
)
574-
575-
# Create a simple model
576-
inputs = keras.Input(shape=(text_vectorizer.vocabulary_size(),))
577-
outputs = layers.Dense(1)(inputs)
578-
model = keras.Model(inputs, outputs)
579-
580-
# Create a labeled dataset (which includes unknown tokens)
581-
train_dataset = tf.data.Dataset.from_tensor_slices(
582-
(["The Brain is deeper than the sea", "for if they are held Blue to Blue"], [1, 0])
583-
)
584-
585-
# Preprocess the string inputs, turning them into int sequences
586-
train_dataset = train_dataset.batch(2).map(lambda x, y: (text_vectorizer(x), y))
587-
# Train the model on the int sequences
588-
print("\nTraining model...")
589-
model.compile(optimizer="rmsprop", loss="mse")
590-
model.fit(train_dataset)
591-
592-
# For inference, you can export a model that accepts strings as input
593-
inputs = keras.Input(shape=(1,), dtype="string")
594-
x = text_vectorizer(inputs)
595-
outputs = model(x)
596-
end_to_end_model = keras.Model(inputs, outputs)
597-
598-
# Call the end-to-end model on test data (which includes unknown tokens)
599-
print("\nCalling end-to-end model on test string...")
600-
test_data = tf.constant(["The one the other will absorb"])
601-
test_output = end_to_end_model(test_data)
602-
print("Model output:", test_output)
603-
```
552+
This is an alternative way of preprocessing text before passing it to a `layer_dense` layer.
604553

605554
```{r}
606555
# Define some text data to adapt the layer
@@ -640,7 +589,7 @@ train_dataset <- train_dataset %>%
640589
641590
642591
# Train the model on the int sequences
643-
cat("\nTraining model...")
592+
cat("Training model...")
644593
model %>%
645594
compile(optimizer="rmsprop", loss="mse") %>%
646595
fit(train_dataset)
@@ -655,10 +604,10 @@ output <- input %>%
655604
end_to_end_model = keras_model(input, output)
656605
657606
# Call the end-to-end model on test data (which includes unknown tokens)
658-
cat("\nCalling end-to-end model on test string...\n")
607+
cat("Calling end-to-end model on test string...\n")
659608
test_data <- tf$constant(matrix("The one the other will absorb"))
660609
test_output <- end_to_end_model(test_data)
661-
cat("Model output:", format(test_output))
610+
cat("Model output: "); print(test_output)
662611
```
663612

664613

0 commit comments

Comments
 (0)