Skip to content

Commit de7a736

Browse files
Made xgboost tests, examples, and vignettes conditional on xgboost being installed
1 parent ece31cc commit de7a736

File tree

3 files changed

+115
-109
lines changed

3 files changed

+115
-109
lines changed

R/bayesOpt.R

Lines changed: 53 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -114,67 +114,69 @@
114114
#'
115115
#' \dontrun{
116116
#' # Example 2 - Hyperparameter Tuning in xgboost
117-
#' library("xgboost")
117+
#' if (requireNamespace('xgboost', quietly = TRUE)) {
118+
#' library("xgboost")
118119
#'
119-
#' data(agaricus.train, package = "xgboost")
120+
#' data(agaricus.train, package = "xgboost")
120121
#'
121-
#' Folds <- list(
122-
#' Fold1 = as.integer(seq(1,nrow(agaricus.train$data),by = 3))
123-
#' , Fold2 = as.integer(seq(2,nrow(agaricus.train$data),by = 3))
124-
#' , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3))
125-
#' )
122+
#' Folds <- list(
123+
#' Fold1 = as.integer(seq(1,nrow(agaricus.train$data),by = 3))
124+
#' , Fold2 = as.integer(seq(2,nrow(agaricus.train$data),by = 3))
125+
#' , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3))
126+
#' )
126127
#'
127-
#' scoringFunction <- function(max_depth, min_child_weight, subsample) {
128+
#' scoringFunction <- function(max_depth, min_child_weight, subsample) {
128129
#'
129-
#' dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
130+
#' dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
130131
#'
131-
#' Pars <- list(
132-
#' booster = "gbtree"
133-
#' , eta = 0.01
134-
#' , max_depth = max_depth
135-
#' , min_child_weight = min_child_weight
136-
#' , subsample = subsample
137-
#' , objective = "binary:logistic"
138-
#' , eval_metric = "auc"
139-
#' )
132+
#' Pars <- list(
133+
#' booster = "gbtree"
134+
#' , eta = 0.01
135+
#' , max_depth = max_depth
136+
#' , min_child_weight = min_child_weight
137+
#' , subsample = subsample
138+
#' , objective = "binary:logistic"
139+
#' , eval_metric = "auc"
140+
#' )
140141
#'
141-
#' xgbcv <- xgb.cv(
142-
#' params = Pars
143-
#' , data = dtrain
144-
#' , nround = 100
145-
#' , folds = Folds
146-
#' , prediction = TRUE
147-
#' , showsd = TRUE
148-
#' , early_stopping_rounds = 5
149-
#' , maximize = TRUE
150-
#' , verbose = 0
151-
#' )
142+
#' xgbcv <- xgb.cv(
143+
#' params = Pars
144+
#' , data = dtrain
145+
#' , nround = 100
146+
#' , folds = Folds
147+
#' , prediction = TRUE
148+
#' , showsd = TRUE
149+
#' , early_stopping_rounds = 5
150+
#' , maximize = TRUE
151+
#' , verbose = 0
152+
#' )
152153
#'
153-
#' return(
154-
#' list(
155-
#' Score = max(xgbcv$evaluation_log$test_auc_mean)
156-
#' , nrounds = xgbcv$best_iteration
154+
#' return(
155+
#' list(
156+
#' Score = max(xgbcv$evaluation_log$test_auc_mean)
157+
#' , nrounds = xgbcv$best_iteration
158+
#' )
157159
#' )
158-
#' )
159-
#' }
160+
#' }
160161
#'
161-
#' bounds <- list(
162-
#' max_depth = c(2L, 10L)
163-
#' , min_child_weight = c(1, 100)
164-
#' , subsample = c(0.25, 1)
165-
#' )
162+
#' bounds <- list(
163+
#' max_depth = c(2L, 10L)
164+
#' , min_child_weight = c(1, 100)
165+
#' , subsample = c(0.25, 1)
166+
#' )
166167
#'
167-
#' ScoreResult <- bayesOpt(
168-
#' FUN = scoringFunction
169-
#' , bounds = bounds
170-
#' , initPoints = 3
171-
#' , iters.n = 2
172-
#' , iters.k = 1
173-
#' , acq = "ei"
174-
#' , gsPoints = 10
175-
#' , parallel = FALSE
176-
#' , verbose = 1
177-
#' )
168+
#' ScoreResult <- bayesOpt(
169+
#' FUN = scoringFunction
170+
#' , bounds = bounds
171+
#' , initPoints = 3
172+
#' , iters.n = 2
173+
#' , iters.k = 1
174+
#' , acq = "ei"
175+
#' , gsPoints = 10
176+
#' , parallel = FALSE
177+
#' , verbose = 1
178+
#' )
179+
#' }
178180
#' }
179181
#' @importFrom data.table data.table setDT setcolorder := as.data.table copy .I setnames is.data.table rbindlist
180182
#' @importFrom utils head tail

man/bayesOpt.Rd

Lines changed: 53 additions & 51 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/tuningHyperparameters.Rmd

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ knitr::opts_chunk$set(
1616
)
1717
options(width = 1000)
1818
set.seed(1991)
19+
xgbAvail <- requireNamespace('xgboost', quietly = TRUE)
1920
```
2021

2122
********
@@ -35,7 +36,7 @@ Idealy, we would use the information from prior model evaluations to guide us in
3536
## Practical Example
3637

3738
In this example, we will be using the agaricus.train dataset provided in the XGBoost package. Here, we load the packages, data, and create a folds object to be used in the scoring function.
38-
```{r eval = TRUE, echo=TRUE, results = 'hide'}
39+
```{r eval = xgbAvail, echo=TRUE, results = 'hide'}
3940
library("xgboost")
4041
library("ParBayesianOptimization")
4142
@@ -50,7 +51,7 @@ Folds <- list(
5051

5152
Now we need to define the scoring function. This function should, at a minimum, return a list with a ```Score``` element, which is the model evaluation metric we want to maximize. We can also retain other pieces of information created by the scoring function by including them as named elements of the returned list. In this case, we want to retain the optimal number of rounds determined by the ```xgb.cv```:
5253

53-
```{r eval = TRUE}
54+
```{r eval = xgbAvail}
5455
scoringFunction <- function(max_depth, min_child_weight, subsample) {
5556
5657
dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
@@ -92,31 +93,32 @@ Some other objects we need to define are the bounds, GP kernel and acquisition f
9293
+ The kernel is passed to the ```GauPro``` function ```GauPro_kernel_model``` and defines the covariance function.
9394
+ The acquisition function defines the utility we get from using a certain parameter set.
9495

95-
```{r eval = TRUE}
96+
```{r eval = xgbAvail}
9697
bounds <- list(
9798
max_depth = c(2L, 10L)
9899
, min_child_weight = c(1, 25)
99100
, subsample = c(0.25, 1)
100101
)
101102
```
102103

103-
We are now ready to put this all into the ```BayesianOptimization``` function.
104+
We are now ready to put this all into the ```bayesOpt``` function.
104105

105-
```{r eval = TRUE}
106+
```{r eval = xgbAvail}
106107
set.seed(1234)
107108
optObj <- bayesOpt(
108109
FUN = scoringFunction
109110
, bounds = bounds
110111
, initPoints = 4
111112
, iters.n = 3
112113
)
114+
113115
```
114116

115117
The console informs us that the process initialized by running ```scoringFunction``` 4 times. It then fit a Gaussian process to the parameter-score pairs, found the global optimum of the acquisition function, and ran ```scoringFunction``` again. This process continued until we had 7 parameter-score pairs. You can interrogate the ```bayesOpt``` object to see the results:
116118

117-
```{r eval = TRUE}
119+
```{r eval = xgbAvail}
118120
optObj$scoreSummary
119121
```
120-
```{r eval = TRUE}
122+
```{r eval = xgbAvail}
121123
getBestPars(optObj)
122124
```

0 commit comments

Comments
 (0)