diff --git a/DESCRIPTION b/DESCRIPTION index 2273f3f..1a43195 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Imports: - nanoarrow (>= 0.3.0), + nanoarrow (>= 0.5.0), wk (>= 0.6.0) LinkingTo: wk diff --git a/NAMESPACE b/NAMESPACE index 3ff17b0..0a28610 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method("[",geoarrow_vctr) -S3method("[<-",geoarrow_vctr) -S3method("[[<-",geoarrow_vctr) S3method(as.character,geoarrow_vctr) S3method(as_geoarrow_array,character) S3method(as_geoarrow_array,default) @@ -18,8 +15,6 @@ S3method(as_geoarrow_array_stream,geoarrow_vctr) S3method(as_geoarrow_array_stream,nanoarrow_array_stream) S3method(as_nanoarrow_array,sfc) S3method(as_nanoarrow_array_extension,geoarrow_extension_spec) -S3method(as_nanoarrow_array_stream,geoarrow_vctr) -S3method(as_nanoarrow_schema,geoarrow_vctr) S3method(convert_array,geoarrow_vctr) S3method(convert_array,sfc) S3method(convert_array,wk_wkb) @@ -31,7 +26,6 @@ S3method(infer_geoarrow_schema,default) S3method(infer_geoarrow_schema,nanoarrow_array) S3method(infer_geoarrow_schema,nanoarrow_array_stream) S3method(infer_nanoarrow_ptype_extension,geoarrow_extension_spec) -S3method(infer_nanoarrow_schema,geoarrow_vctr) S3method(infer_nanoarrow_schema,sfc) S3method(infer_nanoarrow_schema,wk_wkb) S3method(infer_nanoarrow_schema,wk_wkt) @@ -53,7 +47,6 @@ export(na_extension_wkb) export(na_extension_wkt) importFrom(nanoarrow,as_nanoarrow_array) importFrom(nanoarrow,as_nanoarrow_array_extension) -importFrom(nanoarrow,as_nanoarrow_array_stream) importFrom(nanoarrow,as_nanoarrow_schema) importFrom(nanoarrow,convert_array) importFrom(nanoarrow,convert_array_extension) diff --git a/R/pkg-arrow.R b/R/pkg-arrow.R index 9874e16..905f3cb 100644 --- a/R/pkg-arrow.R +++ b/R/pkg-arrow.R @@ -8,6 +8,7 @@ as_arrow_array.geoarrow_vctr <- function(x, ..., type = NULL) { } } +#' @importFrom nanoarrow as_nanoarrow_schema as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) { if (is.null(type)) { schema <- NULL @@ -18,7 +19,7 @@ as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) { } # as_nanoarrow_array_stream() applies the indices if vctr is sliced - stream <- as_nanoarrow_array_stream(x, schema = schema) + stream <- as_geoarrow_array_stream(x, schema = schema) chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE) type <- arrow::as_data_type(type) diff --git a/R/pkg-nanoarrow.R b/R/pkg-nanoarrow.R index 9b004e5..f340631 100644 --- a/R/pkg-nanoarrow.R +++ b/R/pkg-nanoarrow.R @@ -20,7 +20,7 @@ register_geoarrow_extension <- function() { #' @importFrom nanoarrow infer_nanoarrow_ptype_extension #' @export infer_nanoarrow_ptype_extension.geoarrow_extension_spec <- function(extension_spec, x, ...) { - new_geoarrow_vctr(list(), x, integer()) + nanoarrow::nanoarrow_vctr(schema = x, subclass = "geoarrow_vctr") } #' @importFrom nanoarrow convert_array_extension diff --git a/R/vctr.R b/R/vctr.R index 63f9c4e..94e31d4 100644 --- a/R/vctr.R +++ b/R/vctr.R @@ -18,48 +18,7 @@ as_geoarrow_vctr <- function(x, ..., schema = NULL) { } stream <- as_geoarrow_array_stream(x, ..., schema = schema) - chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE) - new_geoarrow_vctr(chunks, stream$get_schema()) -} - -new_geoarrow_vctr <- function(chunks, schema, indices = NULL) { - offsets <- .Call(geoarrow_c_vctr_chunk_offsets, chunks) - if (is.null(indices)) { - indices <- seq_len(offsets[length(offsets)]) - } - - structure( - indices, - schema = schema, - chunks = chunks, - offsets = offsets, - class = c("geoarrow_vctr", "wk_vctr") - ) -} - -#' @export -`[.geoarrow_vctr` <- function(x, i) { - attrs <- attributes(x) - x <- NextMethod() - - if (is.null(vctr_as_slice(x))) { - stop( - "Can't subset geoarrow_vctr with non-slice (e.g., only i:j indexing is supported)" - ) - } - - attributes(x) <- attrs - x -} - -#' @export -`[<-.geoarrow_vctr` <- function(x, i, value) { - stop("subset assignment for geoarrow_vctr is not supported") -} - -#' @export -`[[<-.geoarrow_vctr` <- function(x, i, value) { - stop("subset assignment for geoarrow_vctr is not supported") + nanoarrow::as_nanoarrow_vctr(stream, subclass = "geoarrow_vctr") } #' @export @@ -102,129 +61,8 @@ as.character.geoarrow_vctr <- function(x, ...) { format(x, ...) } -#' @export -infer_nanoarrow_schema.geoarrow_vctr <- function(x, ...) { - attr(x, "schema", exact = TRUE) -} - -# Because zero-length vctrs are R's way of communicating "type", implement -# as_nanoarrow_schema() here so that it works in places that expect a type -#' @importFrom nanoarrow as_nanoarrow_schema -#' @export -as_nanoarrow_schema.geoarrow_vctr <- function(x, ...) { - attr(x, "schema", exact = TRUE) -} - #' @export as_geoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) { - as_nanoarrow_array_stream.geoarrow_vctr(x, ..., schema = schema) -} - -#' @importFrom nanoarrow as_nanoarrow_array_stream -#' @export -as_nanoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) { - if (!is.null(schema)) { - stream <- as_nanoarrow_array_stream(x, schema = NULL) - return(as_geoarrow_array_stream(stream, schema = schema)) - } - - slice <- vctr_as_slice(x) - if (is.null(slice)) { - stop("Can't resolve non-slice geoarrow_vctr to nanoarrow_array_stream") - } - - x_schema <- attr(x, "schema", exact = TRUE) - - # Zero-size slice can be an array stream with zero batches - if (slice[2] == 0) { - return(nanoarrow::basic_array_stream(list(), schema = x_schema)) - } - - # Full slice doesn't need slicing logic - offsets <- attr(x, "offsets", exact = TRUE) - batches <- attr(x, "chunks", exact = TRUE) - if (slice[1] == 1 && slice[2] == max(offsets)) { - return( - nanoarrow::basic_array_stream( - batches, - schema = x_schema, - validate = FALSE - ) - ) - } - - # Calculate first and last slice information - first_index <- slice[1] - 1L - end_index <- first_index + slice[2] - last_index <- end_index - 1L - first_chunk_index <- vctr_resolve_chunk(first_index, offsets) - last_chunk_index <- vctr_resolve_chunk(last_index, offsets) - - first_chunk_offset <- first_index - offsets[first_chunk_index + 1L] - first_chunk_length <- offsets[first_chunk_index + 2L] - first_index - last_chunk_offset <- 0L - last_chunk_length <- end_index - offsets[last_chunk_index + 1L] - - # Calculate first and last slices - if (first_chunk_index == last_chunk_index) { - batch <- vctr_array_slice( - batches[[first_chunk_index + 1L]], - first_chunk_offset, - last_chunk_length - first_chunk_offset - ) - - return( - nanoarrow::basic_array_stream( - list(batch), - schema = x_schema, - validate = FALSE - ) - ) - } - - batch1 <- vctr_array_slice( - batches[[first_chunk_index + 1L]], - first_chunk_offset, - first_chunk_length - ) - - batchn <- vctr_array_slice( - batches[[last_chunk_index + 1L]], - last_chunk_offset, - last_chunk_length - ) - - seq_mid <- seq_len(last_chunk_index - first_chunk_index - 1) - batch_mid <- batches[first_chunk_index + seq_mid] - - nanoarrow::basic_array_stream( - c( - list(batch1), - batch_mid, - list(batchn) - ), - schema = x_schema, - validate = FALSE - ) -} - - -# Utilities for vctr methods - -vctr_resolve_chunk <- function(x, offsets) { - .Call(geoarrow_c_vctr_chunk_resolve, x, offsets) -} - -vctr_as_slice <- function(x) { - .Call(geoarrow_c_vctr_as_slice, x) -} - -vctr_array_slice <- function(x, offset, length) { - new_offset <- x$offset + offset - new_length <- length - nanoarrow::nanoarrow_array_modify( - x, - list(offset = new_offset, length = new_length), - validate = FALSE - ) + stream <- nanoarrow::as_nanoarrow_array_stream(x) + as_geoarrow_array_stream(stream, schema = schema) } diff --git a/src/r-init.c b/src/r-init.c index c84efda..957a656 100644 --- a/src/r-init.c +++ b/src/r-init.c @@ -14,9 +14,6 @@ extern SEXP geoarrow_c_make_type(SEXP geometry_type_sexp, SEXP dimensions_sexp, SEXP coord_type_sexp); extern SEXP geoarrow_c_schema_init_extension(SEXP schema_xptr, SEXP type_sexp); extern SEXP geoarrow_c_schema_parse(SEXP schema_xptr, SEXP extension_name_sexp); -extern SEXP geoarrow_c_vctr_chunk_offsets(SEXP array_list); -extern SEXP geoarrow_c_vctr_chunk_resolve(SEXP indices_sexp, SEXP offsets_sexp); -extern SEXP geoarrow_c_vctr_as_slice(SEXP indices_sexp); extern SEXP geoarrow_c_handle_stream(SEXP data, SEXP handler_xptr); extern SEXP geoarrow_c_writer_new(SEXP schema_xptr, SEXP array_out_xptr); @@ -28,9 +25,6 @@ static const R_CallMethodDef CallEntries[] = { {"geoarrow_c_make_type", (DL_FUNC)&geoarrow_c_make_type, 3}, {"geoarrow_c_schema_init_extension", (DL_FUNC)&geoarrow_c_schema_init_extension, 2}, {"geoarrow_c_schema_parse", (DL_FUNC)&geoarrow_c_schema_parse, 2}, - {"geoarrow_c_vctr_chunk_offsets", (DL_FUNC)&geoarrow_c_vctr_chunk_offsets, 1}, - {"geoarrow_c_vctr_chunk_resolve", (DL_FUNC)&geoarrow_c_vctr_chunk_resolve, 2}, - {"geoarrow_c_vctr_as_slice", (DL_FUNC)&geoarrow_c_vctr_as_slice, 1}, {"geoarrow_c_handle_stream", (DL_FUNC)&geoarrow_c_handle_stream, 2}, {"geoarrow_c_writer_new", (DL_FUNC)&geoarrow_c_writer_new, 2}, {NULL, NULL, 0}}; diff --git a/src/r-vctr.c b/src/r-vctr.c deleted file mode 100644 index 26c38f4..0000000 --- a/src/r-vctr.c +++ /dev/null @@ -1,115 +0,0 @@ - -#define R_NO_REMAP -#include -#include - -#include "geoarrow.h" - -SEXP geoarrow_c_vctr_chunk_offsets(SEXP array_list) { - int num_chunks = Rf_length(array_list); - SEXP offsets_sexp = PROTECT(Rf_allocVector(INTSXP, num_chunks + 1)); - int* offsets = INTEGER(offsets_sexp); - offsets[0] = 0; - int64_t cumulative_offset = 0; - - struct ArrowArray* array; - for (int i = 0; i < num_chunks; i++) { - array = (struct ArrowArray*)R_ExternalPtrAddr(VECTOR_ELT(array_list, i)); - cumulative_offset += array->length; - if (cumulative_offset > INT_MAX) { - Rf_error("Can't build geoarrow_vctr with length > INT_MAX"); // # nocov - } - - offsets[i + 1] = cumulative_offset; - } - - UNPROTECT(1); - return offsets_sexp; -} - -static int resolve_chunk(int* sorted_offsets, int index, int start_offset_i, - int end_offset_i) { - if (start_offset_i >= (end_offset_i - 1)) { - return start_offset_i; - } - - int mid_offset_i = start_offset_i + (end_offset_i - start_offset_i) / 2; - int mid_index = sorted_offsets[mid_offset_i]; - if (index < mid_index) { - return resolve_chunk(sorted_offsets, index, start_offset_i, mid_offset_i); - } else { - return resolve_chunk(sorted_offsets, index, mid_offset_i, end_offset_i); - } -} - -SEXP geoarrow_c_vctr_chunk_resolve(SEXP indices_sexp, SEXP offsets_sexp) { - int* offsets = INTEGER(offsets_sexp); - int end_offset_i = Rf_length(offsets_sexp) - 1; - int last_offset = offsets[end_offset_i]; - - int n = Rf_length(indices_sexp); - SEXP chunk_indices_sexp = PROTECT(Rf_allocVector(INTSXP, n)); - int* chunk_indices = INTEGER(chunk_indices_sexp); - - int buf[1024]; - for (int i = 0; i < n; i++) { - if (i % 1024 == 0) { - INTEGER_GET_REGION(indices_sexp, i, 1024, buf); - } - int index0 = buf[i % 1024]; - - if (index0 < 0 || index0 > last_offset) { - chunk_indices[i] = NA_INTEGER; - } else { - chunk_indices[i] = resolve_chunk(offsets, index0, 0, end_offset_i); - } - } - - UNPROTECT(1); - return chunk_indices_sexp; -} - -SEXP geoarrow_c_vctr_as_slice(SEXP indices_sexp) { - if (TYPEOF(indices_sexp) != INTSXP) { - return R_NilValue; - } - SEXP slice_sexp = PROTECT(Rf_allocVector(INTSXP, 2)); - int* slice = INTEGER(slice_sexp); - - int n = Rf_length(indices_sexp); - slice[1] = n; - - if (n == 1) { - slice[0] = INTEGER_ELT(indices_sexp, 0); - UNPROTECT(1); - return slice_sexp; - } else if (n == 0) { - slice[0] = NA_INTEGER; - UNPROTECT(1); - return slice_sexp; - } - - int buf[1024]; - INTEGER_GET_REGION(indices_sexp, 0, 1024, buf); - slice[0] = buf[0]; - - int last_value = buf[0]; - int this_value = 0; - - for (int i = 1; i < n; i++) { - if (i % 1024 == 0) { - INTEGER_GET_REGION(indices_sexp, i, 1024, buf); - } - - this_value = buf[i % 1024]; - if ((this_value - last_value) != 1) { - UNPROTECT(1); - return R_NilValue; - } - - last_value = this_value; - } - - UNPROTECT(1); - return slice_sexp; -} diff --git a/tests/testthat/test-pkg-arrow.R b/tests/testthat/test-pkg-arrow.R index 2229e57..24835db 100644 --- a/tests/testthat/test-pkg-arrow.R +++ b/tests/testthat/test-pkg-arrow.R @@ -80,12 +80,14 @@ test_that("as_arrow_array() works for geoarrow_vctr", { expect_s3_class(array, "Array") expect_equal(array$length(), 1) - vctr2 <- new_geoarrow_vctr( - list( - as_geoarrow_array("POINT (0 1)"), - as_geoarrow_array("POINT (1 2)") - ), - schema = na_extension_wkt() + vctr2 <- as_geoarrow_vctr( + nanoarrow::basic_array_stream( + list( + as_geoarrow_array("POINT (0 1)"), + as_geoarrow_array("POINT (1 2)") + ), + schema = na_extension_wkt() + ) ) array <- arrow::as_arrow_array(vctr2) diff --git a/tests/testthat/test-pkg-sf.R b/tests/testthat/test-pkg-sf.R index 6953a3e..f74cfde 100644 --- a/tests/testthat/test-pkg-sf.R +++ b/tests/testthat/test-pkg-sf.R @@ -15,6 +15,7 @@ test_that("st_as_sfc() works for geoarrow_vctr()", { test_that("arrow package objects can be converted to and from sf objects", { skip_if_not_installed("sf") skip_if_not_installed("arrow") + skip_if_not(arrow::arrow_info()$capabilities["dataset"]) sfc <- sf::st_sfc(sf::st_point(c(0, 1))) sf <- sf::st_as_sf(data.frame(geometry = sfc)) diff --git a/tests/testthat/test-vctr.R b/tests/testthat/test-vctr.R index 7dd4ad7..79530aa 100644 --- a/tests/testthat/test-vctr.R +++ b/tests/testthat/test-vctr.R @@ -41,144 +41,20 @@ test_that("wk crs/edge getters are implemented for geoarrow_vctr", { expect_true(wk::wk_is_geodesic(x)) }) -test_that("geoarrow_vctr to stream generates an empty stream for empty slice", { - vctr <- new_geoarrow_vctr(list(), na_extension_wkt()) - stream <- nanoarrow::as_nanoarrow_array_stream(vctr) - schema_out <- stream$get_schema() - expect_identical(schema_out$format, "u") - expect_identical(nanoarrow::collect_array_stream(stream), list()) -}) - -test_that("geoarrow_vctr to stream generates identical stream for identity slice", { - array <- as_geoarrow_array("POINT (0 1)") - vctr <- new_geoarrow_vctr(list(array), infer_nanoarrow_schema(array)) - - stream <- nanoarrow::as_nanoarrow_array_stream(vctr) - schema_out <- stream$get_schema() - expect_identical(schema_out$format, "u") - - collected <- nanoarrow::collect_array_stream(stream) - expect_length(collected, 1) - expect_identical( - nanoarrow::convert_buffer(array$buffers[[3]]), - "POINT (0 1)" - ) -}) - -test_that("geoarrow_vctr to stream works for arbitrary slices", { - array1 <- as_geoarrow_array(c("POINT (0 1)", "POINT (1 2)", "POINT (2 3)")) - array2 <- as_geoarrow_array( - c("POINT (4 5)", "POINT (5 6)", "POINT (6 7)", "POINT (7 8") - ) - vctr <- new_geoarrow_vctr(list(array1, array2), infer_nanoarrow_schema(array1)) - - chunks16 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[1:6]) - ) - expect_length(chunks16, 2) - expect_identical(chunks16[[1]]$offset, 0L) - expect_identical(chunks16[[1]]$length, 3L) - expect_identical(chunks16[[2]]$offset, 0L) - expect_identical(chunks16[[2]]$length, 3L) - - chunks34 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[3:4]) - ) - expect_length(chunks34, 2) - expect_identical(chunks34[[1]]$offset, 2L) - expect_identical(chunks34[[1]]$length, 1L) - expect_identical(chunks34[[2]]$offset, 0L) - expect_identical(chunks34[[2]]$length, 1L) - - chunks13 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[1:3]) - ) - expect_length(chunks13, 1) - expect_identical(chunks13[[1]]$offset, 0L) - expect_identical(chunks13[[1]]$length, 3L) - - chunks46 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[4:6]) - ) - expect_length(chunks46, 1) - expect_identical(chunks46[[1]]$offset, 0L) - expect_identical(chunks46[[1]]$length, 3L) - - chunks56 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[5:6]) - ) - expect_length(chunks56, 1) - expect_identical(chunks56[[1]]$offset, 1L) - expect_identical(chunks56[[1]]$length, 2L) - - chunks57 <- nanoarrow::collect_array_stream( - nanoarrow::as_nanoarrow_array_stream(vctr[5:7]) - ) - expect_length(chunks57, 1) - expect_identical(chunks57[[1]]$offset, 1L) - expect_identical(chunks57[[1]]$length, 3L) -}) - test_that("Errors occur for unsupported subset operations", { vctr <- as_geoarrow_vctr("POINT (0 1)") expect_error( vctr[5:1], - "Can't subset geoarrow_vctr with non-slice" + "Can't subset nanoarrow_vctr with non-slice" ) expect_error( vctr[1] <- "something", - "subset assignment for geoarrow_vctr is not supported" + "subset assignment for nanoarrow_vctr is not supported" ) expect_error( vctr[[1]] <- "something", - "subset assignment for geoarrow_vctr is not supported" - ) -}) - -test_that("slice detector works", { - expect_identical( - vctr_as_slice(logical()), - NULL - ) - - expect_identical( - vctr_as_slice(2:1), - NULL - ) - - expect_identical( - vctr_as_slice(integer()), - c(NA_integer_, 0L) - ) - - expect_identical( - vctr_as_slice(2L), - c(2L, 1L) - ) - - expect_identical( - vctr_as_slice(1:10), - c(1L, 10L) - ) - - expect_identical( - vctr_as_slice(10:2048), - c(10L, (2048L - 10L + 1L)) - ) -}) - -test_that("chunk resolver works", { - chunk_offset1 <- 0:10 - - expect_identical( - vctr_resolve_chunk(c(-1L, 11L), chunk_offset1), - c(NA_integer_, NA_integer_) - ) - - expect_identical( - vctr_resolve_chunk(9:0, chunk_offset1), - 9:0 + "subset assignment for nanoarrow_vctr is not supported" ) })