rstudio
diff --git a/‎vignettes/custom_train_step_in_tensorflow.Rmd‎
Lines changed: 6 additions & 6 deletions b/‎vignettes/custom_train_step_in_tensorflow.Rmd‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎vignettes/distributed_training_with_tensorflow.Rmd‎
Lines changed: 17 additions & 14 deletions b/‎vignettes/distributed_training_with_tensorflow.Rmd‎
Lines changed: 17 additions & 14 deletions
diff --git a/‎vignettes/distribution.Rmd‎
Lines changed: 91 additions & 11 deletions b/‎vignettes/distribution.Rmd‎
Lines changed: 91 additions & 11 deletions
diff --git a/‎vignettes/examples/index.Rmd‎
Lines changed: 1 addition & 1 deletion b/‎vignettes/examples/index.Rmd‎
Lines changed: 1 addition & 1 deletion
@@ -127,7 +127,7 @@ model |> fit(x, y, epochs = 3)
 
 ```
 ## Epoch 1/3
-## 32/32 - 1s - 29ms/step - mae: 1.4339 - loss: 3.2271
+## 32/32 - 1s - 23ms/step - mae: 1.4339 - loss: 3.2271
 ## Epoch 2/3
 ## 32/32 - 0s - 2ms/step - mae: 1.3605 - loss: 2.9034
 ## Epoch 3/3
@@ -282,11 +282,11 @@ model |> fit(x, y, sample_weight = sw, epochs = 3)
 
 ```
 ## Epoch 1/3
-## 32/32 - 1s - 28ms/step - mae: 1.3434 - loss: 0.1681
+## 32/32 - 1s - 23ms/step - mae: 1.3434 - loss: 0.1681
 ## Epoch 2/3
-## 32/32 - 0s - 3ms/step - mae: 1.3364 - loss: 0.1394
+## 32/32 - 0s - 2ms/step - mae: 1.3364 - loss: 0.1394
 ## Epoch 3/3
-## 32/32 - 0s - 3ms/step - mae: 1.3286 - loss: 0.1148
+## 32/32 - 0s - 4ms/step - mae: 1.3286 - loss: 0.1148
 ```
 
 ## Providing your own evaluation step
@@ -332,7 +332,7 @@ model |> evaluate(x, y)
 ```
 
 ```
-## 32/32 - 0s - 9ms/step - mae: 1.3871 - loss: 0.0000e+00
+## 32/32 - 0s - 10ms/step - mae: 1.3871 - loss: 0.0000e+00
 ```
 
 ```
@@ -508,7 +508,7 @@ gan |> fit(
 ```
 
 ```
-## 100/100 - 6s - 57ms/step - d_loss: 0.0000e+00 - g_loss: 0.0000e+00
+## 100/100 - 5s - 53ms/step - d_loss: 0.0000e+00 - g_loss: 0.0000e+00
 ```
 
 The ideas behind deep learning are simple, so why should their implementation be painful?
@@ -124,20 +124,16 @@ get_compiled_model <- function() {
   model |> compile(
     optimizer = optimizer_adam(),
     loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
-    metrics = list(metric_sparse_categorical_accuracy()),
-
-    # XLA compilation is temporarily disabled due to a bug
-    # https://github.com/keras-team/keras/issues/19005
-    jit_compile = FALSE
+    metrics = list(metric_sparse_categorical_accuracy())
   )
   model
 }
 
 get_dataset <- function(batch_size = 64) {
 
   c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
-  x_train <- array_reshape(x_train, c(-1, 784))
-  x_test <- array_reshape(x_test, c(-1, 784))
+  x_train <- array_reshape(x_train, c(-1, 784)) / 255
+  x_test <- array_reshape(x_test, c(-1, 784)) / 255
 
   # Reserve 10,000 samples for validation.
   val_i <- sample.int(nrow(x_train), 10000)
@@ -146,18 +142,25 @@ get_dataset <- function(batch_size = 64) {
   x_train = x_train[-val_i,]
   y_train = y_train[-val_i]
 
+  y_train <- array_reshape(y_train, c(-1, 1))
+  y_val <- array_reshape(y_val, c(-1, 1))
+  y_test <- array_reshape(y_test, c(-1, 1))
+
   # Prepare the training dataset.
   train_dataset <- list(x_train, y_train) |>
+    lapply(np_array, "float32") |>
     tensor_slices_dataset() |>
     dataset_batch(batch_size)
 
   # Prepare the validation dataset.
   val_dataset <- list(x_val, y_val) |>
+    lapply(np_array, "float32") |>
     tensor_slices_dataset() |>
     dataset_batch(batch_size)
 
   # Prepare the test dataset.
   test_dataset <- list(x_test, y_test) |>
+    lapply(np_array, "float32") |>
     tensor_slices_dataset() |>
     dataset_batch(batch_size)
 
@@ -193,18 +196,18 @@ with(strategy$scope(), {
 
 ```
 ## Epoch 1/2
-## 782/782 - 4s - 6ms/step - loss: 2.1409 - sparse_categorical_accuracy: 0.8896 - val_loss: 0.7223 - val_sparse_categorical_accuracy: 0.9216
+## 782/782 - 7s - 9ms/step - loss: nan - sparse_categorical_accuracy: nan - val_loss: nan - val_sparse_categorical_accuracy: nan
 ## Epoch 2/2
-## 782/782 - 3s - 4ms/step - loss: 0.4292 - sparse_categorical_accuracy: 0.9387 - val_loss: 0.3693 - val_sparse_categorical_accuracy: 0.9404
-## 157/157 - 0s - 2ms/step - loss: 0.3976 - sparse_categorical_accuracy: 0.9386
+## 782/782 - 5s - 7ms/step - loss: nan - sparse_categorical_accuracy: nan - val_loss: nan - val_sparse_categorical_accuracy: nan
+## 157/157 - 1s - 5ms/step - loss: nan - sparse_categorical_accuracy: nan
 ```
 
 ```
 ## $loss
-## [1] 0.3976028
+## [1] NaN
 ##
 ## $sparse_categorical_accuracy
-## [1] 0.9386
+## [1] NaN
 ```
 
 ## Using callbacks to ensure fault tolerance
@@ -274,7 +277,7 @@ run_training(epochs = 1)
 ```
 
 ```
-## 782/782 - 4s - 5ms/step - loss: 0.1485 - sparse_categorical_accuracy: 0.9627 - val_loss: 0.2062 - val_sparse_categorical_accuracy: 0.9560
+## 782/782 - 5s - 7ms/step - loss: nan - sparse_categorical_accuracy: nan - val_loss: nan - val_sparse_categorical_accuracy: nan
 ```
 
 ``` r
@@ -283,7 +286,7 @@ run_training(epochs = 1)
 ```
 
 ```
-## 782/782 - 4s - 5ms/step - loss: 0.1227 - sparse_categorical_accuracy: 0.9673 - val_loss: 0.2007 - val_sparse_categorical_accuracy: 0.9602
+## 782/782 - 6s - 7ms/step - loss: nan - sparse_categorical_accuracy: nan - val_loss: nan - val_sparse_categorical_accuracy: nan
 ```
 
 ## `tf$data` performance tips
 
@@ -56,7 +56,7 @@ Sys.setenv("XLA_FLAGS" = "--xla_force_host_platform_device_count=8")
 library(keras3)
 
 # The distribution API is only implemented for the JAX backend for now.
-use_backend("jax")
+use_backend("jax", FALSE)
 jax <- reticulate::import("jax")
 
 library(tfdatasets, exclude = "shape") # For dataset input.
@@ -184,24 +184,24 @@ model |> fit(dataset, epochs = 3)
 
 ```
 ## Epoch 1/3
-## 8/8 - 0s - 38ms/step - loss: 1.1533
+## 8/8 - 0s - 40ms/step - loss: 1.1536
 ## Epoch 2/3
-## 8/8 - 0s - 5ms/step - loss: 1.0621
+## 8/8 - 0s - 5ms/step - loss: 1.0540
 ## Epoch 3/3
-## 8/8 - 0s - 7ms/step - loss: 1.0163
+## 8/8 - 0s - 6ms/step - loss: 1.0072
 ```
 
 ``` r
 model |> evaluate(dataset)
 ```
 
 ```
-## 8/8 - 0s - 7ms/step - loss: 0.9673
+## 8/8 - 0s - 9ms/step - loss: 0.9620
 ```
 
 ```
 ## $loss
-## [1] 0.9673058
+## [1] 0.9620273
 ```
 
 
@@ -269,7 +269,85 @@ outputs <- inputs |>
               name = "d2")
 
 model <- keras_model(inputs = inputs, outputs = outputs)
+```
+
+We can visualize how individual weights will be sharded
+
+``` r
+d1 <- get_layer(model, "d1")
+d1$kernel$value |> jax$debug$visualize_array_sharding()
+```
+
+```
+## ┌───────┬───────┬───────┬───────┐
+## │       │       │       │       │
+## │       │       │       │       │
+## │       │       │       │       │
+## │       │       │       │       │
+## │CPU 0,4│CPU 1,5│CPU 2,6│CPU 3,7│
+## │       │       │       │       │
+## │       │       │       │       │
+## │       │       │       │       │
+## │       │       │       │       │
+## └───────┴───────┴───────┴───────┘
+```
+
+``` r
+d2 <- get_layer(model, "d2")
+d2$kernel$value |> jax$debug$visualize_array_sharding()
+```
+
+```
+## ┌───────────────────┐
+## │                   │
+## │                   │
+## │                   │
+## │                   │
+## │CPU 0,1,2,3,4,5,6,7│
+## │                   │
+## │                   │
+## │                   │
+## │                   │
+## └───────────────────┘
+```
+
+``` r
+d2$bias$value |> jax$debug$visualize_array_sharding()
+```
 
+```
+## ┌───────────────────┐
+## │CPU 0,1,2,3,4,5,6,7│
+## └───────────────────┘
+```
+
+``` r
+x_batch <- dataset |>
+  as_iterator() |> iter_next() |>
+  _[[1]] |> op_convert_to_tensor()
+
+output_array <- model(x_batch)
+output_array |> jax$debug$visualize_array_sharding()
+```
+
+```
+## ┌─────────────┐
+## │             │
+## │ CPU 0,1,2,3 │
+## │             │
+## │             │
+## ├─────────────┤
+## │             │
+## │ CPU 4,5,6,7 │
+## │             │
+## │             │
+## └─────────────┘
+```
+
+
+
+
+``` r
 # The data will be sharded across the "data" dimension of the method, which
 # has 2 devices.
 model |> compile(loss = "mse")
@@ -278,24 +356,24 @@ model |> fit(dataset, epochs = 3)
 
 ```
 ## Epoch 1/3
-## 8/8 - 0s - 42ms/step - loss: 1.1424
+## 8/8 - 0s - 46ms/step - loss: 1.1676
 ## Epoch 2/3
-## 8/8 - 0s - 7ms/step - loss: 1.0528
+## 8/8 - 0s - 4ms/step - loss: 1.1134
 ## Epoch 3/3
-## 8/8 - 0s - 7ms/step - loss: 1.0393
+## 8/8 - 0s - 5ms/step - loss: 1.1034
 ```
 
 ``` r
 model |> evaluate(dataset)
 ```
 
 ```
-## 8/8 - 0s - 9ms/step - loss: 1.0088
+## 8/8 - 0s - 8ms/step - loss: 1.0676
 ```
 
 ```
 ## $loss
-## [1] 1.008847
+## [1] 1.067567
 ```
 
 
@@ -335,3 +413,5 @@ full_model_parallel_mesh <- keras$distribution$DeviceMesh(
 3. [TensorFlow Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial)
 4. [TensorFlow DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview)
 5. [Using DTensors with tf.keras](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial)
+
+
@@ -1,7 +1,7 @@
 ---
 title: Keras examples
 output: rmarkdown::html_vignette
-date: 'Last Modified: 2023-11-30; Last Rendered: 2025-01-23'
+date: 'Last Modified: 2023-11-30; Last Rendered: 2025-05-02'
 vignette: >
   %\VignetteIndexEntry{Keras examples}
   %\VignetteEngine{knitr::rmarkdown}