Skip to content

Commit 43e7094

Browse files
committed
minor vignette fixes
1 parent 1906e09 commit 43e7094

File tree

3 files changed

+33
-35
lines changed

3 files changed

+33
-35
lines changed

vignettes-src/distribution.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ d2 <- get_layer(model, "d2")
240240
d2$kernel$value |> jax$debug$visualize_array_sharding()
241241
d2$bias$value |> jax$debug$visualize_array_sharding()
242242
243-
x_batch <- dataset |>
244-
as_iterator() |> iter_next() |>
243+
x_batch <- dataset |>
244+
as_iterator() |> iter_next() |>
245245
_[[1]] |> op_convert_to_tensor()
246246
247247
output_array <- model(x_batch)

vignettes-src/examples/nlp/neural_machine_translation_with_transformer.Rmd

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,11 @@ We'll be working with an English-to-Spanish translation dataset
5050
provided by [Anki](https://www.manythings.org/anki/). Let's download it:
5151

5252
```{r}
53-
zipfile <- get_file("spa-eng.zip", origin =
54-
"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip")
53+
zip_path <-
54+
"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip" |>
55+
get_file(origin = _, extract = TRUE)
5556
56-
zip::zip_list(zipfile) # See what's in the zipfile
57-
zip::unzip(zipfile, exdir = ".") # unzip into the current directory
58-
59-
text_file <- fs::path("./spa-eng/spa.txt")
57+
text_path <- fs::path(zip_path, "spa-eng/spa.txt")
6058
```
6159

6260
## Parsing the data
@@ -209,36 +207,31 @@ it provides the next words in the target sentence -- what the model will try to
209207

210208
```{r}
211209
format_pair <- function(pair) {
212-
# the vectorization layers requrie batched inputs,
213-
# reshape scalar string tensor to add a batch dim
214-
pair %<>% lapply(op_expand_dims, 1)
215-
216-
# vectorize
217-
eng <- eng_vectorization(pair$english)
218-
spa <- spa_vectorization(pair$spanish)
219-
220-
# drop the batch dim
221-
eng %<>% tf$ensure_shape(shape(1, sequence_length)) %>% op_squeeze(1)
222-
spa %<>% tf$ensure_shape(shape(1, sequence_length+1)) %>% op_squeeze(1)
223-
224-
inputs <- list(encoder_inputs = eng,
225-
decoder_inputs = spa[NA:-2])
226-
targets <- spa[2:NA]
227-
list(inputs, targets)
228-
}
210+
eng <- pair$english |> eng_vectorization()
211+
spa <- pair$spanish |> spa_vectorization()
212+
213+
spa_feature <- spa@r[NA:-2] # <1>
214+
spa_target <- spa@r[2:NA] # <2>
229215
216+
features <- list(encoder_inputs = eng, decoder_inputs = spa_feature)
217+
labels <- spa_target
218+
sample_weight <- labels != 0
219+
220+
tuple(features, labels, sample_weight)
221+
}
230222
231223
batch_size <- 64
232224
233225
library(tfdatasets, exclude = "shape")
234226
make_dataset <- function(pairs) {
235-
tensor_slices_dataset(pairs) %>%
236-
dataset_map(format_pair, num_parallel_calls = 4) %>%
237-
dataset_cache() %>%
238-
dataset_shuffle(2048) %>%
239-
dataset_batch(batch_size) %>%
240-
dataset_prefetch(2)
227+
tensor_slices_dataset(pairs) |>
228+
dataset_map(format_pair, num_parallel_calls = 4) |>
229+
dataset_cache() |>
230+
dataset_shuffle(2048) |>
231+
dataset_batch(batch_size) |>
232+
dataset_prefetch(16)
241233
}
234+
242235
train_ds <- make_dataset(train_pairs)
243236
val_ds <- make_dataset(val_pairs)
244237
```
@@ -248,7 +241,7 @@ Let's take a quick look at the sequence shapes
248241
(we have batches of 64 pairs, and all sequences are 20 steps long):
249242

250243
```{r}
251-
c(inputs, targets) %<-% iter_next(as_iterator(train_ds))
244+
c(inputs, targets, weights) %<-% iter_next(as_iterator(train_ds))
252245
str(inputs)
253246
str(targets)
254247
```
@@ -346,7 +339,7 @@ layer_transformer_decoder <- Layer(
346339
get_causal_attention_mask = function(inputs) {
347340
c(batch_size, sequence_length, encoding_length) %<-% op_shape(inputs)
348341
349-
x <- op_arange(sequence_length)
342+
x <- op_arange(0L, sequence_length, include_end = FALSE)
350343
i <- x[, NULL]
351344
j <- x[NULL, ]
352345
mask <- op_cast(i >= j, "int32")
@@ -398,7 +391,7 @@ layer_positional_embedding <- Layer(
398391
399392
call = function(inputs) {
400393
c(., len) %<-% op_shape(inputs) # (batch_size, seq_len)
401-
positions <- op_arange(0, len, dtype = "int32")
394+
positions <- op_arange(0, len, dtype = "int32", include_end = FALSE)
402395
embedded_tokens <- self$token_embeddings(inputs)
403396
embedded_positions <- self$position_embeddings(positions)
404397
embedded_tokens + embedded_positions
@@ -476,6 +469,7 @@ transformer |> compile(
476469
loss = "sparse_categorical_crossentropy",
477470
metrics = "accuracy"
478471
)
472+
479473
transformer |> fit(train_ds, epochs = epochs,
480474
validation_data = val_ds)
481475
```
@@ -544,3 +538,7 @@ English: I'm sure everything will be fine.
544538
Correct Translation: [start] estoy segura de que todo irá bien. [end]
545539
Model Translation: [start] estoy seguro de que todo va bien [end]
546540
```
541+
```{r}
542+
543+
```
544+

vignettes-src/transfer_learning.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ dataset small, we will use 40% of the original training data (25,000 images) for
331331
training, 10% for validation, and 10% for testing.
332332

333333
```{r}
334-
# reticulate::py_install("tensorflow-datasets")
334+
reticulate::py_require("tensorflow-datasets")
335335
tfds <- reticulate::import("tensorflow_datasets")
336336
337337
c(train_ds, validation_ds, test_ds) %<-% tfds$load(

0 commit comments

Comments
 (0)