add test for get_vocabulary()

t-kalinowski · t-kalinowski · commit d669bfba9ee0 · 2024-05-17T12:28:35.000-04:00
diff --git a/R/layers-preprocessing.R b/R/layers-preprocessing.R
@@ -2337,11 +2337,6 @@ function (object, max_tokens = NULL, standardize = "lower_and_strip_punctuation"
 }
 
 
-
-# TODO: add tests/ confirm that `get_vocabulary()` returns an R character
-# vector. In older TF versions it used to return python byte objects, which
-# needed `x.decode("UTF-8") for x in vocab]`
-
 #' @param include_special_tokens If TRUE, the returned vocabulary will include
 #'   the padding and OOV tokens, and a term's index in the vocabulary will equal
 #'   the term's index when calling the layer. If FALSE, the returned vocabulary
diff --git a/tests/testthat/test-layer-text_vectorization.R b/tests/testthat/test-layer-text_vectorization.R
@@ -114,3 +114,18 @@ test_call_succeeds("can create a tf-idf layer", {
   expect_s3_class(x, "tensorflow.tensor")
 
 })
+
+
+
+test_call_succeeds("get_vocabulary() returns R character vector", {
+
+  text_vectorization <- layer_text_vectorization()
+  with(tf$device("/cpu:0"), {
+    text_vectorization %>% adapt(c("hello world", "hello"))
+  })
+  vocab <- get_vocabulary(text_vectorization)
+
+  expect_type(vocab, "character")
+  expect_contains(vocab, c("hello", "world"))
+
+})