Skip to content

Commit dc7b28d

Browse files
authored
More graceful errors (#4)
* Update fetch_ucirepo() to have a softer error if the internet resource cannot be reached. * Update list_available_datasets() to have a softer error if no data sets are found. * Add two tests that mimic a bad connection/failure of httr2::request() using mocking. * Add NEWS file * Bump version
1 parent 18df44e commit dc7b28d

File tree

9 files changed

+215
-23
lines changed

9 files changed

+215
-23
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: ucimlrepo
22
Title: Explore UCI ML Repository Datasets
3-
Version: 0.0.1
3+
Version: 0.0.2
44
Authors@R: c(
55
person("James Joseph", "Balamuta", email = "[email protected]",
66
role = c("aut", "cre", "cph"),
@@ -23,7 +23,7 @@ Imports:
2323
utils
2424
Encoding: UTF-8
2525
Roxygen: list(markdown = TRUE)
26-
RoxygenNote: 7.3.1
26+
RoxygenNote: 7.3.2
2727
Collate:
2828
'constants.R'
2929
'fetch-ucirepo.R'

NEWS.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# ucimlrepo 0.0.2
2+
3+
## Features
4+
5+
- Improved graceful errors for `fetch_ucirepo()` and `list_available_datasets()`
6+
when resources are not found/available. ([#3](https://github.com/coatless-rpkg/ucimlrepo/issues/3),
7+
thanks Prof. Ripley!)
8+
9+
# ucimlrepo 0.0.1
10+
11+
## Features
12+
13+
- `fetch_ucirepo()`: Download a dataset from the UCI ML Repository.
14+
- `list_available_datasets()`: List all available datasets from the UCI ML Repository.
15+

R/fetch-ucirepo.R

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,78 @@
1+
#' Empty Fetch Response
2+
#'
3+
#' Internal function to create an empty fetch response.
4+
#'
5+
#' @return
6+
#' A list containing dataset metadata, dataframes, and variable info in its properties.
7+
#'
8+
#' @seealso
9+
#' [`fetch_ucirepo()`]
10+
#'
11+
#' @keywords internal
12+
empty_fetch_response <- function() {
13+
list(
14+
data = list(
15+
ids = data.frame(),
16+
features = data.frame(),
17+
targets = data.frame(),
18+
original = data.frame(),
19+
headers = character()
20+
),
21+
metadata = list(
22+
uci_id = integer(),
23+
name = character(),
24+
repository_url = character(),
25+
data_url = character(),
26+
abstract = character(),
27+
area = character(),
28+
tasks = list(),
29+
characteristics = list(),
30+
num_instances = integer(),
31+
num_features = integer(),
32+
feature_types = list(),
33+
demographics = list(),
34+
target_col = list(),
35+
index_col = NULL,
36+
has_missing_values = character(),
37+
missing_values_symbol = character(),
38+
year_of_dataset_creation = integer(),
39+
last_updated = character(),
40+
dataset_doi = character(),
41+
creators = list(),
42+
intro_paper = list(
43+
title = character(),
44+
authors = character(),
45+
published_in = character(),
46+
year = integer(),
47+
url = character(),
48+
doi = NULL
49+
),
50+
additional_info = list(
51+
summary = character(),
52+
purpose = NULL,
53+
funded_by = NULL,
54+
instances_represent = NULL,
55+
recommended_data_splits = NULL,
56+
sensitive_data = NULL,
57+
preprocessing_description = NULL,
58+
variable_info = character(),
59+
citation = NULL
60+
)
61+
),
62+
variables = data.frame(
63+
name = character(),
64+
role = character(),
65+
type = character(),
66+
demographic = character(),
67+
description = character(),
68+
units = character(),
69+
missing_values = character(),
70+
stringsAsFactors = FALSE
71+
)
72+
)
73+
}
74+
75+
176
#' Fetch UCI ML Repository Dataset
277
#'
378
#' Loads a dataset from the UCI ML Repository, including the dataframes and
@@ -103,18 +178,20 @@ fetch_ucirepo <- function(name, id) {
103178
}
104179

105180
# Fetch metadata from API
106-
response <- tryCatch({
181+
response <- try({
107182
httr2::request(API_BASE_URL) |>
108183
httr2::req_url_query(!!!query_params) |>
109184
httr2::req_perform()
110-
}, error = function(e) {
111-
message('Error connecting to server')
112-
return()
113185
})
114186

187+
if (inherits(response, "try-error")) {
188+
message('Error connecting to server')
189+
return(invisible(empty_fetch_response()))
190+
}
191+
115192
if (response$status_code != 200) {
116193
message('Dataset not found in repository')
117-
return()
194+
return(invisible(empty_fetch_response()))
118195
}
119196

120197
data <- response |> httr2::resp_body_json(check_type = FALSE)
@@ -132,20 +209,20 @@ fetch_ucirepo <- function(name, id) {
132209
# No data URL means that the dataset cannot be imported into R
133210
if (is.null(data_url)) {
134211
message(paste0('"', name, '" dataset (id=', id, ') exists in the repository, but is not available for import. Please select a dataset from this list: https://archive.ics.uci.edu/datasets?skip=0&take=10&sort=desc&orderBy=NumHits&search=&Python=true'))
135-
return()
212+
return(invisible(empty_fetch_response()))
136213
}
137214

138215
# Parse into dataframe using read.csv
139216
df <- tryCatch({
140217
utils::read.csv(data_url, check.names = FALSE)
141218
}, error = function(e) {
142219
message(paste0('Error reading data csv file for "', name, '" dataset (id=', id, ').'))
143-
return()
220+
return(invisible(empty_fetch_response))
144221
})
145222

146223
if (nrow(df) == 0) {
147224
message(paste0('Error reading data csv file for "', name, '" dataset (id=', id, ').'))
148-
return()
225+
return(invisible(empty_fetch_response()))
149226
}
150227

151228
# Header line should be variable names

R/list-available-datasets.R

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
#' Empty Search Response
2+
#'
3+
#' Internal function to create an empty search response.
4+
#'
5+
#' @return
6+
#' A data frame with three empty columns of 'id', 'name', and 'url'.
7+
#'
8+
#' @seealso
9+
#' [`list_available_datasets()`]
10+
#'
11+
#' @keywords internal
12+
empty_search_response <- function() {
13+
search <- data.frame(
14+
id = integer(),
15+
name = character(),
16+
url = character(),
17+
stringsAsFactors = FALSE
18+
)
19+
20+
# Nullify attributes
21+
attr(search, 'filter') <- NULL
22+
attr(search, 'search') <- NULL
23+
attr(search, 'area') <- NULL
24+
25+
search
26+
}
27+
28+
29+
130
#' List Available Datasets from UCI ML Repository
231
#'
332
#' Prints a list of datasets that can be imported via the \code{fetch_ucirepo} function.
@@ -7,15 +36,15 @@
736
#' @param area Character. Optional query to filter available datasets based on subject area.
837
#'
938
#' @return
10-
#' Prints the list of available datasets.
11-
#'
12-
#' Invisibly returns a data frame containing the list of available datasets
39+
#' A data frame containing the list of available datasets
1340
#' with columns of:
1441
#'
1542
#' - **id**: Integer ID for the data set.
1643
#' - **name**: Name of Dataset
1744
#' - **url**: Download location of the data set
1845
#'
46+
#' In the event the search fails, the data frame returned will be empty.
47+
#'
1948
#' @include constants.R
2049
#' @export
2150
#' @examples
@@ -56,28 +85,29 @@ list_available_datasets <- function(filter, search, area) {
5685
}
5786

5887
# Fetch list of datasets from API
59-
response <- tryCatch({
88+
response <- try({
6089
httr2::request(API_LIST_URL) |>
6190
httr2::req_url_query(!!!query_params) |>
6291
httr2::req_perform()
63-
}, error = function(e) {
64-
message('Error connecting to server')
65-
message(e)
66-
return()
6792
})
6893

94+
if (inherits(response, "try-error")) {
95+
message('Error connecting to server')
96+
return(invisible(empty_search_response()))
97+
}
98+
6999
# Convert body to JSON
70100
# Avoid enforcing the application/json format response
71101
data <- response |> httr2::resp_body_json(check_type = FALSE) |> {\(x) x$data}()
72102

73103
if (response$status_code != 200) {
74104
message('Error fetching datasets with status code: ', response$status_code)
75-
return()
105+
return(invisible(empty_search_response()))
76106
}
77107

78108
if (length(data) == 0) {
79109
message('No datasets found')
80-
return()
110+
return(invisible(empty_search_response()))
81111
}
82112

83113
# Create and return a table of data

man/empty_fetch_response.Rd

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/empty_search_response.Rd

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/list_available_datasets.Rd

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-fetch-ucirepo.R

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
test_that("fetch_ucirepo(): Graceful errors", {
2+
skip_on_cran()
3+
4+
with_mocked_bindings(
5+
code = {
6+
# Check we ommit a diagnostic message instead of a hard error
7+
expect_message(result <- fetch_ucirepo(id = -5))
8+
9+
# Verify empty variables data frame
10+
# (This indicates an empty/failed response)
11+
expect_equal(nrow(result$variables), 0)
12+
},
13+
request = function(...) { stop("Error!") },
14+
.package = "httr2"
15+
)
16+
})
17+
118
test_that("fetch_ucirepo(): Nonexistent dataset", {
219
skip_on_cran()
320
expect_message(fetch_ucirepo(id = 2000))

tests/testthat/test-list-available-datasets.R

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,23 @@ test_that("list_available_datasets(): graceful errors", {
1010

1111
})
1212

13+
test_that("list_available_datasets(): Graceful errors with bad connection", {
14+
skip_on_cran()
15+
16+
with_mocked_bindings(
17+
code = {
18+
# Check we ommit a diagnostic message instead of a hard error
19+
expect_message(result <- list_available_datasets(search = "toad"))
20+
21+
# Verify empty variables data frame
22+
# (This indicates an empty/failed response)
23+
expect_equal(nrow(result), 0)
24+
},
25+
request = function(...) { stop("Error!") },
26+
.package = "httr2"
27+
)
28+
})
29+
1330
test_that("list_available_datasets(): search", {
1431
skip_on_cran()
1532

0 commit comments

Comments
 (0)