Improve handling for nested objects (#774)

hadley · web-flow · commit 1556345e9248 · 2025-10-09T11:47:00.000-05:00
* Let `convert_from_type()` created packed data frames
* Document that `type_object()` supports dynamic dots
* Add some advice
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,7 +33,8 @@ Imports:
     promises (>= 1.3.1),
     R6,
     rlang (>= 1.1.0),
-    S7 (>= 0.2.0)
+    S7 (>= 0.2.0),
+    vctrs
 Suggests:
     connectcreds,
     curl (>= 6.0.1),
diff --git a/R/chat-structured.R b/R/chat-structured.R
@@ -51,7 +51,7 @@ convert_from_type <- function(x, type) {
           convert_from_type(vals, type_array(type@items@properties[[name]]))
         })
         names(cols) <- names(type@items@properties)
-        list2DF(cols)
+        vctrs::new_data_frame(cols)
       }
     } else {
       x
diff --git a/R/types.R b/R/types.R
@@ -180,8 +180,8 @@ type_array <- function(items, description = NULL, required = TRUE) {
   TypeArray(items = items, description = description, required = required)
 }
 
-#' @param ... Name-type pairs defining the components that the object must
-#'   possess.
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Name-type pairs defining
+#'   the components that the object must possess.
 #' @param .additional_properties Can the object have arbitrary additional
 #'   properties that are not explicitly listed? Only supported by Claude.
 #' @export
diff --git a/man/type_boolean.Rd b/man/type_boolean.Rd
diff --git a/tests/testthat/test-chat-structured.R b/tests/testthat/test-chat-structured.R
@@ -197,6 +197,26 @@ test_that("can convert arrays of objects to data frames", {
   )
 })
 
+test_that("array of object with nested objects becomes packed data frame", {
+  type <- type_array(
+    type_object(
+      x = type_object(a = type_integer()),
+      y = type_object(a = type_integer())
+    )
+  )
+
+  data <- list(
+    list(x = list(a = 1), y = list(a = 3)),
+    list(x = list(a = 5), y = list(a = 7))
+  )
+
+  out <- convert_from_type(data, type)
+  expect_equal(nrow(out), 2)
+  expect_named(out, c("x", "y"))
+  expect_equal(out$x, data.frame(a = c(1, 5)))
+  expect_equal(out$y, data.frame(a = c(3, 7)))
+})
+
 test_that("can recursively convert objects contents", {
   expect_equal(
     convert_from_type(
diff --git a/vignettes/structured-data.Rmd b/vignettes/structured-data.Rmd
@@ -93,28 +93,28 @@ To define your desired type specification (also known as a schema), you use the
 * **Arrays** represent a vector of values of the same type. They are created with `type_array()` and require the `item` argument which specifies the type of each element. Arrays of scalars are very similar to R's atomic vectors:
 
   ```{r}
-  type_logical_vector <- type_array(type_boolean())
-  type_integer_vector <- type_array(type_integer())
-  type_double_vector <- type_array(type_number())
-  type_character_vector <- type_array(type_string())
+type_logical_vector <- type_array(type_boolean())
+type_integer_vector <- type_array(type_integer())
+type_double_vector <- type_array(type_number())
+type_character_vector <- type_array(type_string())
   ```
 
   You can also have arrays of arrays resemble lists with well defined structures:
 
   ```{r}
-  list_of_integers <- type_array(type_integer_vector)
+list_of_integers <- type_array(type_integer_vector)
   ```
 
   Arrays of objects (described next) are equivalent to data frames.
 
 * **Objects** represent a collection of named values. They are created with `type_object()`. Objects can contain any number of scalars, arrays, and other objects. They are similar to named lists in R.
 
   ```{r}
-  type_person2 <- type_object(
-    name = type_string(),
-    age = type_integer(),
-    hobbies = type_array(type_string())
-  )
+type_person2 <- type_object(
+  name = type_string(),
+  age = type_integer(),
+  hobbies = type_array(type_string())
+)
   ```
 
 Under the hood, these type specifications ensures that the LLM returns correctly structured JSON. But ellmer goes one step further and converts the JSON to the closest R analog. This means:
@@ -223,7 +223,10 @@ chat$chat_structured(prompt, type = type_people)
 
 Now ellmer knows what you want and gives you a data frame.
 
-If you're familiar with the terms row-oriented and column-oriented data frames, this is the same idea. Since most languages don't possess vectorisation like R, row-oriented data frames are more common.
+If you're familiar with the terms row-oriented and column-oriented data frames, this is the same idea. Since most languages don't possess
+vectorisation like R, row-oriented data frames are more common.
+
+Note that you'll generally wanted to avoid nesting objects in inside of objects as this will generate a data frame where each column is itself a data frame. This is a valid data structure in R, but you'll find it easier to understand if you convert it to a tibble, since the tibble print method makes it more clear what's going on. You can also use `tidyr::unpack()` to unpack these df-columns back into a regular flat data frame.
 
 ## Examples
 

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ convert_from_type <- function(x, type) {`
`51`	`51`	`convert_from_type(vals, type_array(type@items@properties[[name]]))`
`52`	`52`	`})`
`53`	`53`	`names(cols) <- names(type@items@properties)`
`54`		`- list2DF(cols)`
	`54`	`+ vctrs::new_data_frame(cols)`
`55`	`55`	`}`
`56`	`56`	`} else {`
`57`	`57`	`x`