use return_dict=FALSE in evaluate() and related test functions

t-kalinowski · t-kalinowski · commit 45ca0fc09e4b · 2024-05-17T10:18:38.000-04:00
diff --git a/R/model-training.R b/R/model-training.R
@@ -269,13 +269,17 @@ function (object, x = NULL, y = NULL, ..., batch_size = NULL,
                                verbose = as_model_verbose_arg),
                           ignore = "object",
                           force = "verbose")
-    args[["return_dict"]] <- TRUE
+    args[["return_dict"]] <- FALSE
 
     if(inherits(args$x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") &&
        !is.null(args$batch_size))
       stop("batch_size can not be specified with a TF Dataset")
 
     result <- do.call(object$evaluate, args)
+    if (length(result) > 1L) {
+      result <- as.list(result)
+      names(result) <- object$metrics_names
+    }
 
     tfruns::write_run_metadata("evaluation", unlist(result))
 
@@ -756,8 +760,15 @@ function (object, x, y = NULL, sample_weight = NULL, ...)
 {
     result <- object$test_on_batch(as_array(x),
                                    as_array(y),
-                                   as_array(sample_weight), ..., return_dict = TRUE)
-    if (is_scalar(result)) result[[1L]] else result
+                                   as_array(sample_weight), ...,
+                                   return_dict = FALSE)
+    if (length(result) > 1L) {
+      result <- as.list(result)
+      names(result) <- object$metrics_names
+    } else if (is_scalar(result)) {
+      result <- result[[1L]]
+    }
+    result
 }
 
 # ---- test_on_batch ----
@@ -813,8 +824,15 @@ function (object, x, y = NULL, sample_weight = NULL, class_weight = NULL)
                                     as_array(y),
                                     as_array(sample_weight),
                                     class_weight = as_class_weight(class_weight),
-                                    return_dict = TRUE)
-    if(is_scalar(result)) result[[1L]] else result
+                                    return_dict = FALSE)
+    if (length(result) > 1L) {
+      result <- as.list(result)
+      names(result) <- object$metrics_names
+    } else if (is_scalar(result)) {
+      result <- result[[1L]]
+    }
+
+    result
 }
 
 
diff --git a/tools/archive/make.R b/tools/archive/make.R
@@ -111,10 +111,6 @@ if(!"source:tools/utils.R" %in% search()) envir::attach_source("tools/utils.R")
 # }
 #
 # TODO: layer_category_encoding()(count_weights) call arg example not working
-# TODO: backout usage of `return_dict=TRUE` in evaluate() and friends - the output order is not stable.
-#       use `setNames(as.list())`
-#       ## Deferred until upstream bug fixed,
-#       ## model.metrics_names returns wrong result
 
 ## Docs ----
 
diff --git a/vignettes-src/training_with_built_in_methods.Rmd b/vignettes-src/training_with_built_in_methods.Rmd
@@ -23,7 +23,7 @@ when using built-in APIs for training & validation (such as `fit()`,
 `evaluate()` and `predict()`).
 
 If you are interested in leveraging `fit()` while specifying your
-own training step function, see the 
+own training step function, see the
 [Customizing what happens in `fit()` guide](custom_train_step_in_tensorflow.html).
 
 <!--  guides on customizing what happens in `fit()`: -->
@@ -134,7 +134,7 @@ We evaluate the model on the test data via `evaluate()`:
 ```{r}
 # Evaluate the model on the test data using `evaluate`
 results <- model |> evaluate(x_test, y_test, batch_size=128)
-results
+str(results)
 
 # Generate predictions (probabilities -- the output of the last layer)
 # on new data using `predict`

Original file line number	Diff line number	Diff line change
`@@ -111,10 +111,6 @@ if(!"source:tools/utils.R" %in% search()) envir::attach_source("tools/utils.R")`
`111`	`111`	`# }`
`112`	`112`	`#`
`113`	`113`	`# TODO: layer_category_encoding()(count_weights) call arg example not working`
`114`		-# TODO: backout usage of `return_dict=TRUE` in evaluate() and friends - the output order is not stable.
`115`		-# use `setNames(as.list())`
`116`		`-# ## Deferred until upstream bug fixed,`
`117`		`-# ## model.metrics_names returns wrong result`
`118`	`114`
`119`	`115`	`## Docs ----`
`120`	`116`