diff --git a/NEWS.md b/NEWS.md index a9dfad30..7b8e4f7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ # bcdata (development version) +* Fix bugs where `bcdc_search_facets()`, `bcdc_list_group_records()` and + `bcdc_list_organization_records()` were not returning all records after + the upgrade to CKAN 2.9 (#353). Includes adding tests and bcdata-specific + options for these queries, see `bcdc_options()`. # bcdata 0.5.0 diff --git a/R/bcdc_options.R b/R/bcdc_options.R index ebaa7b6e..1a456fa9 100644 --- a/R/bcdc_options.R +++ b/R/bcdc_options.R @@ -34,6 +34,21 @@ #' using this option you can set the size of the chunk requested. On slower #' connections, or when having problems, it may help to lower the chunk limit. #' +#' `bcdata.max_package_search_limit` is an option for setting the maximum number of +#' datasets returned when querying by organization with the package_search API endpoint. The +#' default limit (1000) is purposely set high to return all datasets for a +#' given organization. +#' +#' `bcdata.max_package_search_facet_limit` is an option for setting the maximum number of +#' values returned when querying facet fields with the package_search API endpoint. The +#' default limit (1000) is purposely set high to return all values for each facet field +#' ("license_id", "download_audience", "res_format", "publish_state", "organization", "groups"). +#' +#' `bcdata.max_group_package_show_limit` is an option for setting the maximum number of +#' datasets returned when querying by group with the group_package_show API endpoint. The +#' default limit (1000) is purposely set high to return all datasets for a +#' given group. +#' #' `bcdata.single_download_limit` *Deprecated*. This is the maximum number of #' records an object can be before forcing a paginated download; it is set by #' querying the server capabilities. This option is deprecated and will be @@ -81,7 +96,10 @@ bcdc_options <- function() { "bcdata.max_geom_pred_size", null_to_na(getOption("bcdata.max_geom_pred_size")), 5E5, "bcdata.chunk_limit", null_to_na(getOption("bcdata.chunk_limit")), server_single_download_limit, "bcdata.single_download_limit", - null_to_na(deprecate_single_download_limit_option()), server_single_download_limit + null_to_na(deprecate_single_download_limit_option()), server_single_download_limit, + "bcdata.max_package_search_limit", null_to_na(getOption("bcdata.max_package_search_limit")), 1000, + "bcdata.max_package_search_facet_limit", null_to_na(getOption("bcdata.max_package_search_facet_limit")), 1000, + "bcdata.max_group_package_show_limit", null_to_na(getOption("bcdata.max_group_package_show_limit")), 1000 ) } diff --git a/R/bcdc_search.R b/R/bcdc_search.R index aaee28f2..263ec52d 100644 --- a/R/bcdc_search.R +++ b/R/bcdc_search.R @@ -40,7 +40,9 @@ bcdc_search_facets <- function(facet = c("license_id", "download_audience", cli <- bcdc_catalogue_client("action/package_search") - r <- cli$get(query = list(facet.field = query, rows = 0)) + option_facet_limit <- getOption("bcdata.max_package_search_facet_limit", 1000) + + r <- cli$get(query = list(facet.field = query, rows = 0, facet.limit = option_facet_limit)) r$raise_for_status() res <- jsonlite::fromJSON(r$parse("UTF-8")) @@ -80,9 +82,11 @@ bcdc_list_groups <- function() bcdc_search_facets("groups") bcdc_list_group_records <- function(group) { if(!has_internet()) stop("No access to internet", call. = FALSE) # nocov - cli <- bcdc_catalogue_client("action/group_show") + cli <- bcdc_catalogue_client("action/group_package_show") + + option_group_limit <- getOption("bcdata.max_group_package_show_limit", 1000) - r <- cli$get(query = list(id = group, include_datasets = 'true')) + r <- cli$get(query = list(id = group, limit = option_group_limit)) if (r$status_code == 404){ stop("404: URL not found - you may have specified an invalid group?", call. = FALSE) @@ -93,7 +97,7 @@ bcdc_list_group_records <- function(group) { res <- jsonlite::fromJSON(r$parse("UTF-8")) stopifnot(res$success) - d <- tibble::as_tibble(res$result$packages) + d <- tibble::as_tibble(res$result) as.bcdc_group(d, description = res$result$description) } @@ -120,9 +124,14 @@ bcdc_list_organizations <- function() bcdc_search_facets("organization") bcdc_list_organization_records <- function(organization) { if(!has_internet()) stop("No access to internet", call. = FALSE) # nocov - cli <- bcdc_catalogue_client("action/organization_show") + option_package_limit <- getOption("bcdata.max_package_search_limit", 1000) + + cli <- bcdc_catalogue_client("action/package_search") - r <- cli$get(query = list(id = organization, include_datasets = 'true')) + r <- cli$get(query = list( + fq = paste0("organization:", organization), # filter query for the organization + rows = option_package_limit + )) if (r$status_code == 404){ stop("404: URL not found - you may have specified an invalid organization?", call. = FALSE) @@ -133,7 +142,7 @@ bcdc_list_organization_records <- function(organization) { res <- jsonlite::fromJSON(r$parse("UTF-8")) stopifnot(res$success) - d <- tibble::as_tibble(res$result$packages) + d <- tibble::as_tibble(res$result$results) as.bcdc_organization(d, description = res$result$description) } diff --git a/man/bcdc_options.Rd b/man/bcdc_options.Rd index 2ef8aef1..85156e32 100644 --- a/man/bcdc_options.Rd +++ b/man/bcdc_options.Rd @@ -29,6 +29,21 @@ downloaded. This is called "pagination". bcdata does this all for you, however b using this option you can set the size of the chunk requested. On slower connections, or when having problems, it may help to lower the chunk limit. +\code{bcdata.max_package_search_limit} is an option for setting the maximum number of +datasets returned when querying by organization with the package_search API endpoint. The +default limit (1000) is purposely set high to return all datasets for a +given organization. + +\code{bcdata.max_package_search_facet_limit} is an option for setting the maximum number of +values returned when querying facet fields with the package_search API endpoint. The +default limit (1000) is purposely set high to return all values for each facet field +("license_id", "download_audience", "res_format", "publish_state", "organization", "groups"). + +\code{bcdata.max_group_package_show_limit} is an option for setting the maximum number of +datasets returned when querying by group with the group_package_show API endpoint. The +default limit (1000) is purposely set high to return all datasets for a +given group. + \code{bcdata.single_download_limit} \emph{Deprecated}. This is the maximum number of records an object can be before forcing a paginated download; it is set by querying the server capabilities. This option is deprecated and will be diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index a4982bfd..72007c1b 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -17,7 +17,6 @@ test_that("bcdc_options() returns a tibble",{ expect_s3_class(opts, "tbl_df") }) - test_that("bcdata.chunk_limit",{ skip_if_net_down() skip_on_cran() @@ -30,6 +29,33 @@ test_that("bcdata.chunk_limit",{ }) }) +test_that("bcdata.max_package_search_limit works",{ + skip_if_net_down() + skip_on_cran() + withr::with_options(list(bcdata.max_package_search_limit = 10), { + bcstats <- bcdc_list_organization_records("bc-stats") + expect_lte(nrow(bcstats), 10) + }) +}) + +test_that("bcdata.max_package_search_facet_limit works",{ + skip_if_net_down() + skip_on_cran() + withr::with_options(list(bcdata.max_package_search_facet_limit = 10), { + orgs <- bcdc_search_facets("organization") + expect_lte(nrow(orgs), 10) + }) +}) + +test_that("bcdata.max_group_package_show_limit works",{ + skip_if_net_down() + skip_on_cran() + withr::with_options(list(bcdata.max_group_package_show_limit = 10), { + census <- bcdc_list_group_records("census-profiles") + expect_lte(nrow(census), 10) + }) +}) + test_that("bcdata.single_download_limit is deprecated but works", { # This can be removed when bcdata.single_download_limit is removed skip_if_net_down() @@ -37,7 +63,7 @@ test_that("bcdata.single_download_limit is deprecated but works", { withr::local_options(list(bcdata.single_download_limit = 1)) withr::local_envvar(list(BCDC_KEY = NULL)) # so snapshot not affected by message expect_s3_class( - bcdc_query_geodata(record = '76b1b7a3-2112-4444-857a-afccf7b20da8'), + bcdc_query_geodata(record = '76b1b7a3-2112-4444-857a-afccf7b20da8'), "bcdc_promise" ) }) diff --git a/tests/testthat/test-search.R b/tests/testthat/test-search.R index 9f9ee3c4..86191e4e 100644 --- a/tests/testthat/test-search.R +++ b/tests/testthat/test-search.R @@ -37,3 +37,39 @@ test_that("bcdc_search works with zero results", { expect_output(print(res), "returned no results") }) + +test_that('bcdc_search_facets works', { + skip_on_cran() + skip_if_net_down() + orgs <- bcdc_search_facets("organization") + grps <- bcdc_search_facets("groups") + licences <- bcdc_search_facets("license_id") + expect_s3_class(orgs, "data.frame") + expect_s3_class(grps, "data.frame") + expect_s3_class(licences, "data.frame") + expect_gte(nrow(orgs), 1) + expect_gte(nrow(grps), 1) + expect_gte(nrow(licences), 1) +}) + +test_that('bcdc_list_group_records works', { + skip_on_cran() + skip_if_net_down() + census <- bcdc_list_group_records("census-profiles") + grps <- bcdc_search_facets("groups") + census_count <- grps |> dplyr::filter(name == "census-profiles") |> dplyr::pull(count) + expect_s3_class(census, "data.frame") + expect_gte(nrow(census), 1) + expect_equal(nrow(census), census_count) +}) + +test_that('bcdc_list_organization_records works', { + skip_on_cran() + skip_if_net_down() + bcstats <- bcdc_list_organization_records("bc-stats") + orgs <- bcdc_search_facets("organization") + bcstats_count <- orgs |> dplyr::filter(name == "bc-stats") |> dplyr::pull(count) + expect_s3_class(bcstats, "data.frame") + expect_gte(nrow(bcstats), 1) + expect_equal(nrow(bcstats), bcstats_count) +})