Skip to content

Commit 41a0b89

Browse files
authored
Merge pull request #19 from mlr-org/ames
feat: add ames data set
2 parents d2cd24c + ad7a243 commit 41a0b89

24 files changed

+98
-15
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Suggests:
2828
mlr3 (>= 0.13.3)
2929
Encoding: UTF-8
3030
LazyData: true
31+
LazyDataCompression: xz
3132
NeedsCompilation: no
3233
Roxygen: list(markdown = TRUE)
33-
RoxygenNote: 7.2.1
34+
RoxygenNote: 7.2.3

R/ames_housing.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#' @title House Sales in Ames, Iowa
2+
#'
3+
#' @name ames_housing
4+
#' @aliases mlr_tasks_ames_housing
5+
#'
6+
#' @description
7+
#' Regression task to predict house sale prices for Ames, Iowa.
8+
#'
9+
#' Contains 80 features and 2930 observations.
10+
#' Target column is `"Sale_Price"`.
11+
#'
12+
#' @docType data
13+
#' @keywords data
14+
#' @examples
15+
#' data("ames_housing", package = "mlr3data")
16+
#' str(ames_housing)
17+
NULL
18+
19+
get_ames_housing_task = function() {
20+
b = as_backend("ames_housing")
21+
task = mlr3::TaskRegr$new("ames_housing", b, target = "Sale_Price", label = "Ames House Sales")
22+
b$hash = task$man = "mlr3data::mlr_tasks_ames_housing"
23+
task
24+
}

R/energy_usage.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#' @title Power Consumption of Kitchen Appliances in Ames, Iowa
2+
#'
3+
#' @name energy_usage
4+
#'
5+
#' @description
6+
#' Data for power consumption of kitchen appliances in Ames, Iowa.
7+
#' Extends the ames data set.
8+
#'
9+
#' Contains 720 features and 2930 observations.
10+
#'
11+
#' @docType data
12+
#' @keywords data
13+
#' @examples
14+
#' data("energy_usage", package = "mlr3data")
15+
#' str(energy_usage)
16+
NULL

R/zzz.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
register_tasks = function() {
55
x = getFromNamespace("mlr_tasks", ns = "mlr3")
66

7+
x$add("ames_housing", get_ames_housing_task)
78
x$add("bike_sharing", get_bike_sharing_task)
89
x$add("ilpd", get_ilpd_task)
910
x$add("kc_housing", get_kc_housing_task)

data-raw/ames_housing.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
url = "https://raw.githubusercontent.com/ja-thomas/extend_ames_housing/main/data/ames_dirty.csv"
2+
ames_housing = data.table::fread(url, stringsAsFactors = TRUE)
3+
usethis::use_data(ames_housing, overwrite = TRUE, compress = "xz")

data-raw/bike_sharing.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/B
55
bike_sharing = setDT(read.csv(unz(tmp, filename = "hour.csv"), as.is = TRUE))
66
bike_sharing = setnames(bike_sharing, c("instant", "date", "season", "year", "month", "hour", "holiday", "weekday",
77
"working_day", "weather", "temperature", "apparent_temperature", "humidity", "windspeed", "casual", "registered",
8-
"count"))
8+
"count"))
99

1010
bike_sharing[, holiday := as.logical(holiday)]
1111
bike_sharing[, working_day := as.logical(working_day)]
1212
bike_sharing[, season := factor(season, labels = c("winter", "spring", "summer", "fall"))]
13-
bike_sharing[, weather := as.factor(bike_sharing$weather)]
13+
bike_sharing[, weather := as.factor(bike_sharing$weather)]
1414

1515
bike_sharing = bike_sharing[, .SD, .SDcols = !c("instant", "casual", "registered")]
1616

17-
usethis::use_data(bike_sharing, overwrite = TRUE)
17+
usethis::use_data(bike_sharing, overwrite = TRUE, compress = "xz")

data-raw/energy_usage.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
url = "https://raw.githubusercontent.com/ja-thomas/extend_ames_housing/main/data/energy_usage.csv"
2+
energy_usage = data.table::fread(url, stringsAsFactors = TRUE)
3+
usethis::use_data(energy_usage, overwrite = TRUE, compress = "xz")

data-raw/ilpd.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ levels(ilpd$diseased) = c("yes", "no")
99

1010
data.table::setDF(ilpd)
1111

12-
usethis::use_data(ilpd, overwrite = TRUE)
12+
usethis::use_data(ilpd, overwrite = TRUE, compress = "xz")

data-raw/kc_housing.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ kc_housing[, date := anytime::anytime(date)]
1010
kc_housing[yr_renovated == 0, yr_renovated := NA_integer_]
1111
kc_housing[sqft_basement == 0, sqft_basement := NA_integer_]
1212
kc_housing[, waterfront := as.logical(waterfront)]
13-
setDF(kc_housing)
13+
data.table::setDF(kc_housing)
1414

15-
usethis::use_data(kc_housing, overwrite = TRUE)
15+
usethis::use_data(kc_housing, overwrite = TRUE, compress = "xz")

data-raw/moneyball.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ odata = OMLData$new(41021)
44
moneyball = odata$data
55
to_integer = c("Year", "RS", "RA", "W")
66
moneyball[, (to_integer) := lapply(.SD, as.integer), .SDcols = to_integer]
7-
setnames(moneyball, new = tolower(colnames(moneyball)))
7+
data.table::setnames(moneyball, new = tolower(colnames(moneyball)))
88

9-
setDF(moneyball)
9+
data.table::setDF(moneyball)
1010

11-
usethis::use_data(moneyball, overwrite = TRUE)
11+
usethis::use_data(moneyball, overwrite = TRUE, compress = "xz")

0 commit comments

Comments
 (0)