AnotherSamWilson
diff --git a/‎R/BayesianOptimization.R‎
Lines changed: 1 addition & 1 deletion b/‎R/BayesianOptimization.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/applyCluster.R‎
Lines changed: 9 additions & 0 deletions b/‎R/applyCluster.R‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎man/BayesianOptimization.Rd‎
Lines changed: 4 additions & 3 deletions b/‎man/BayesianOptimization.Rd‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎vignettes/Distributions.png‎
6.65 KB b/‎vignettes/Distributions.png‎
6.65 KB
diff --git a/‎vignettes/Optimums.png‎
8.36 KB b/‎vignettes/Optimums.png‎
8.36 KB
diff --git a/‎vignettes/advancedFeatures.Rmd‎
Lines changed: 13 additions & 21 deletions b/‎vignettes/advancedFeatures.Rmd‎
Lines changed: 13 additions & 21 deletions
diff --git a/‎vignettes/standardFeatures.Rmd‎
Lines changed: 22 additions & 22 deletions b/‎vignettes/standardFeatures.Rmd‎
Lines changed: 22 additions & 22 deletions
@@ -191,7 +191,7 @@ BayesianOptimization <- function(
   if (!initialize & nrow(leftOff) == 0) stop("initialize cannot be FALSE if leftOff is not provided. Set initialize to TRUE and provide either initGrid or initPoints. You can provide leftOff AND initialize if you want.\n")
   if (initialize & nrow(initGrid) == 0 & initPoints <= 0) stop("initialize is TRUE but neither initGrid or initPoints were provided")
   if (initPoints > 0 & nrow(initGrid)>0) stop("initGrid and initPoints are specified, choose one.")
-  if (initPoints <= 0 & nrow(initGrid)==0) stop("neither initGrid or initPoints are specified, choose one or provide leftOff")
+  if (initPoints <= 0 & nrow(initGrid)==0 & nrow(leftOff) == 0) stop("neither initGrid or initPoints are specified, choose one or provide leftOff")
   if (parallel & (Workers == 1)) stop("parallel is set to TRUE but no back end is registered.\n")
   if (!parallel & Workers > 1 & verbose > 0) cat("parallel back end is registered, but parallel is set to false. Process will not be run in parallel.\n")
   if (nrow(initGrid)>0) {
 
@@ -55,11 +55,20 @@ applyCluster <- function(e = parent.frame()) {
                       , scaled = TRUE
                       )
 
+      # Named vectors cannot be directly coerced to data.table
+      if (e$runNew-newPoints < 2) {
+        noisyP <- as.data.table(as.list(noisyP))
+      } else{
+        noisyP <- data.table(noisyP)
+      }
+
       if (nrow(fintersect(ScaleDT,data.table(noisyP))) == 0) {
         newSet <- rbind(clusterPoints[,(drop) := NULL],noisyP)
         break
       }
 
+      unique(rbind(ScaleDT,noisyP))
+
       tries <- tries + 1
 
     }
 
@@ -58,7 +58,7 @@ scoringFunction <- function(max_depth, min_child_weight, subsample) {
 }
 
 bounds <- list( max_depth = c(2L, 10L)
-              , min_child_weight = c(1L, 100L)
+              , min_child_weight = c(1, 100)
               , subsample = c(0.25, 1))
 
 kern <- "Matern52"
@@ -131,33 +131,25 @@ When a leftOff table is provided, depending on how the experiment is set up, one
 Keep in mind, if you change your bounds, you will need to delete any rows from your leftOff table that fall outside the bounds.
 
 ********
-### Adjusting the noiseAdd parameter
+### Adjusting noiseAdd and minClusterUtility
 
-Once we have extracted the next expected optimal parameter set from the Gaussian process, we have several decisions to make. We can run 1 new scoringFunction at the new parameter, or we can run the scoring function n times in parallel at n different parameter sets. If we run several scoringFunctions in parallel, we need to decide where the other n-1 parameter sets come from. For the sake of decreasing uncertainty around the estimated optimal parameter, this process pulls the other n-1 parameter sets from a shape(4,4) beta distribution centered at the estimated optimal parameter.
+If we want to run n scoring functions in parallel, an important decision we need to make is how to choose the next n parameter candidate sets. One logical choice is the parameter set which maximizes our acquisition function. However, we still need to decide on the other n-1 sets. There are two good choices:
 
-As an example, let's say our min_child_weight is bounded between [0,10] and the Gaussian process says that our acquisition function is maximized at min_child_weight = 6. We can control how the process randomly samples around this point by using the noiseAdd parameter, which tells the process the percentage of the range specified by ```bounds``` to sample:
+  1. Add noise to the global optimum to sample nearby points.
+  2. Determine if there are other local optimums which may be nearly as good
+  
+This package allows you to do both. Using the ```minClusterUtility``` parameter, you can specify the minimum percentage utility of the global optimum required for a different local optimum to be considered. As an example, let's say we are optimizing 1 hyperparameter ```min_child_weight```, which is bounded between [0,10]. Our acquisition function may look like the following:
 
 ```{r eval = TRUE, echo=FALSE}
-library(ggplot2)
-
-y1 <- function(x) {
-  (x-7)^3*(5-(x))^3/15
-}
+knitr::include_graphics("Optimums.png")
+```
 
-y2 <- function(x) {
-  (x-8)^3*(4-(x))^3/1700
-}
+In this case, we may want to run our scoring function on both the global and local maximum. If ```minClusterUtility``` is set to be at least 1.83/2.14 ~ 0.855, the process would use both the local and global maximums as candidate parameter sets in the next round.
 
-ggplot(data.frame(x=c(0,10)), aes(x)) +
-  stat_function(fun = y1, geom = "line", aes(colour = "red"), xlim = c(5,7)) +
-  stat_function(fun = y2, geom = "line", aes(colour = "blue"), xlim = c(4,8)) +
-  scale_x_continuous(name = "min_child_weight", breaks = seq(0,10,1), limits = c(0,10)) +
-  scale_y_continuous(limits=c(0,0.075)) +
-  scale_color_discrete(name = "noiseAdd", labels = c("0.4", "0.2")) +
-  ylab("Density") +
-  ggtitle("Distributions Sampled for Different noiseAdd Values") +
-  theme(plot.margin=unit(c(0.5,1,0.5,0.5),"cm"))
+However, this doesn't fully solve our problem. In the example above, we had 2 local maximums, but what if we want to run 10 instances of our scoring function in parallel? We would need to come up with 8 other sets of parameters. For the sake of decreasing uncertainty around the most promising parameter sets, this process samples from a shape(4,4) beta distribution centered at the estimated optimal parameters. In the example above, our acquisition function was maximized at ```min_child_weight = 4```. The figure below shows the effect that adjusting the ```noiseAdd``` parameter has on how we draw the other 8 candidate parameter sets:
 
+```{r eval = TRUE, echo=FALSE}
+knitr::include_graphics("Distributions.png")
 ```
 
 
 
@@ -40,9 +40,9 @@ library("ParBayesianOptimization")
 
 data(agaricus.train, package = "xgboost")
 
-Folds <- list(  Fold1 = as.integer(seq(1,nrow(agaricus.train$data),by = 3))
-                , Fold2 = as.integer(seq(2,nrow(agaricus.train$data),by = 3))
-                , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3)))
+Folds <- list(Fold1 = as.integer(seq(1,nrow(agaricus.train$data),by = 3))
+            , Fold2 = as.integer(seq(2,nrow(agaricus.train$data),by = 3))
+            , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3)))
 ```
 
 Now we need to define the scoring function. This function should, at a minimum, return a list with a ```Score``` element, which is the model evaluation metric we want to maximize. We can also retain other pieces of information created by the scoring function by including them as named elements of the returned list. In this case, we want to retain the optimal number of rounds determined by the ```xgb.cv```:
@@ -53,24 +53,24 @@ scoringFunction <- function(max_depth, min_child_weight, subsample) {
   dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
   
   Pars <- list( booster = "gbtree"
-                , eta = 0.01
-                , max_depth = max_depth
-                , min_child_weight = min_child_weight
-                , subsample = subsample
-                , objective = "binary:logistic"
-                , eval_metric = "auc")
-
-  xgbcv <- xgb.cv(params = Pars,
-                  data = dtrain
-                  , nround = 100
-                  , folds = Folds
-                  , prediction = TRUE
-                  , showsd = TRUE
-                  , early_stopping_rounds = 5
-                  , maximize = TRUE
-                  , verbose = 0)
-
-  return(list(Score = max(xgbcv$evaluation_log$test_auc_mean)
+              , eta = 0.01
+              , max_depth = max_depth
+              , min_child_weight = min_child_weight
+              , subsample = subsample
+              , objective = "binary:logistic"
+              , eval_metric = "auc")
+
+  xgbcv <- xgb.cv(params = Pars
+                , data = dtrain
+                , nround = 100
+                , folds = Folds
+                , prediction = TRUE
+                , showsd = TRUE
+                , early_stopping_rounds = 5
+                , maximize = TRUE
+                , verbose = 0)
+
+  return(list( Score = max(xgbcv$evaluation_log$test_auc_mean)
              , nrounds = xgbcv$best_iteration
              )
          )
@@ -86,7 +86,7 @@ Some other objects we need to define are the bounds, GP kernel and acquisition f
 
 ```{r eval = TRUE}
 bounds <- list( max_depth = c(2L, 10L)
-              , min_child_weight = c(1L, 100L)
+              , min_child_weight = c(1, 100)
               , subsample = c(0.25, 1))
 
 kern <- "Matern52"
Original file line number	Diff line number	Diff line change
`@@ -55,11 +55,20 @@ applyCluster <- function(e = parent.frame()) {`
`55`	`55`	`, scaled = TRUE`
`56`	`56`	`)`
`57`	`57`
	`58`	`+ # Named vectors cannot be directly coerced to data.table`
	`59`	`+ if (e$runNew-newPoints < 2) {`
	`60`	`+ noisyP <- as.data.table(as.list(noisyP))`
	`61`	`+ } else{`
	`62`	`+ noisyP <- data.table(noisyP)`
	`63`	`+ }`
	`64`	`+`
`58`	`65`	`if (nrow(fintersect(ScaleDT,data.table(noisyP))) == 0) {`
`59`	`66`	`newSet <- rbind(clusterPoints[,(drop) := NULL],noisyP)`
`60`	`67`	`break`
`61`	`68`	`}`
`62`	`69`
	`70`	`+ unique(rbind(ScaleDT,noisyP))`
	`71`	`+`
`63`	`72`	`tries <- tries + 1`
`64`	`73`
`65`	`74`	`}`