5656# ' Increase this for a higher chance to find global optimum, at the expense of more time.
5757# ' @param convThresh convergence threshold passed to \code{factr} when the \code{optim} function (L-BFGS-B) is called.
5858# ' Lower values will take longer to converge, but may be more accurate.
59+ # ' @param minClusterUtility the minimum percentage of the optimal utility required for a less optimal local
60+ # ' maximum to be included as a candidate parameter set in the next scoring function. If \code{NULL},
61+ # ' only the global optimum will be used as a candidate parameter set.
5962# ' @param noiseAdd if bulkNew > 1, specifies how much noise to add to the optimal candidate parameter set
6063# ' to obtain the other \code{bulkNew-1} candidate parameter sets. New random draws are pulled from
6164# ' a shape(4,4) beta distribution centered at the optimal candidate parameter set
103106# '
104107# ' return(list(Score = max(xgbcv$evaluation_log$test_auc_mean)
105108# ' , nrounds = xgbcv$best_iteration
106- # ' )
107109# ' )
110+ # ' )
111+ # ' }
108112# '
109- # ' bounds <- list( max_depth = c(2L, 10L)
110- # ' , min_child_weight = c(1L, 100L)
111- # ' , subsample = c(0.25, 1))
112113# '
113- # ' kern <- "Matern52"
114+ # ' bounds <- list( max_depth = c(2L, 10L)
115+ # ' , min_child_weight = c(1, 100)
116+ # ' , subsample = c(0.25, 1))
114117# '
115- # ' acq <- "ei "
118+ # ' kern <- "Matern52 "
116119# '
117- # ' ScoreResult <- BayesianOptimization(FUN = scoringFunction
118- # ' , bounds = bounds
119- # ' , initPoints = 10
120- # ' , bulkNew = 1
121- # ' , nIters = 12
122- # ' , kern = kern
123- # ' , acq = acq
124- # ' , kappa = 2.576
125- # ' , verbose = 1
126- # ' , parallel = FALSE)
120+ # ' acq <- "ei"
121+ # '
122+ # ' ScoreResult <- BayesianOptimization(FUN = scoringFunction
123+ # ' , bounds = bounds
124+ # ' , initPoints = 10
125+ # ' , bulkNew = 1
126+ # ' , nIters = 12
127+ # ' , kern = kern
128+ # ' , acq = acq
129+ # ' , kappa = 2.576
130+ # ' , verbose = 1
131+ # ' , parallel = FALSE)
127132# ' }
128- # ' @importFrom data.table data.table setDT setcolorder := as.data.table
133+ # ' @importFrom data.table data.table setDT setcolorder := as.data.table copy .I
129134# ' @importFrom utils head
130135# ' @importFrom GauPro GauPro_kernel_model Matern52 Matern32 Exponential Gaussian
131136# ' @export
@@ -151,6 +156,7 @@ BayesianOptimization <- function(
151156 , eps = 0.0
152157 , gsPoints = 100
153158 , convThresh = 1e7
159+ , minClusterUtility = NULL
154160 , noiseAdd = 0.25
155161 , verbose = 1
156162) {
@@ -165,6 +171,13 @@ BayesianOptimization <- function(
165171 Overwrites <- 0
166172 Iter <- 0
167173 kern <- assignKern(kern ,beta )
174+ boundsDT <- data.table( N = ParamNames
175+ , L = sapply(bounds , function (x ) x [1 ])
176+ , U = sapply(bounds , function (x ) x [2 ])
177+ , C = sapply(bounds , function (x ) class(x ))
178+ )
179+
180+
168181
169182 # Define processing function
170183 ParMethod <- function (x ) if (x ) {`%dopar%` } else {`%do%` }
@@ -197,7 +210,7 @@ BayesianOptimization <- function(
197210 if (nrow(initGrid )> 0 ){
198211 InitFeedParams <- initGrid
199212 } else {
200- InitFeedParams <- data.table(sapply(ParamNames ,RandParams ,initPoints , bounds ))
213+ InitFeedParams <- data.table(sapply(ParamNames ,RandParams ,initPoints , boundsDT ))
201214 }
202215
203216 if (verbose > 0 ) cat(" \n Running initial scoring function" ,nrow(InitFeedParams )," times in" ,Workers ," thread(s).\n " )
@@ -212,7 +225,7 @@ BayesianOptimization <- function(
212225 , .export = export
213226 ) %op % {
214227
215- Params <- InitFeedParams [iter ,]
228+ Params <- InitFeedParams [get( " iter" ) ,]
216229 Elapsed <- system.time(Result <- do.call(what = FUN , args = as.list(Params )))
217230 data.table(Params ,Elapsed = Elapsed [[3 ]],as.data.table(Result ))
218231
@@ -251,15 +264,14 @@ BayesianOptimization <- function(
251264
252265 # Setup for iterations
253266 GPlist <- list ()
267+ acqMaximums <- data.table()
254268 scaleList <- list (score = max(abs(ScoreDT $ Score )), elapsed = max(ScoreDT $ Elapsed ))
255269 GP <- NULL
256- OptParDT <- data.table(ScoreDT [0 ,c(" Iteration" ,ParamNames ), with = F ])
257- BestPars <- data.table( Iteration = Iter
258- , ScoreDT [which.max(Score ),c(ParamNames ," Score" ,extraRet ),with = F ]
270+ BestPars <- data.table( " Iteration" = Iter
271+ , ScoreDT [which.max(get(" Score" )),c(ParamNames ," Score" ,extraRet ),with = F ]
259272 , elapsedSecs = round(difftime(Time ,StartT ,units = " secs" ),0 )
260273 )
261274
262-
263275 # Start the iterative GP udpates
264276 while (nrow(ScoreDT ) < nIters ){
265277
@@ -276,29 +288,26 @@ BayesianOptimization <- function(
276288 acq <- stopImpatient $ newAcq
277289 }
278290
279-
280291 # Fit GP
281- newD <- ScoreDT [Iteration == Iter - 1 ,]
292+ newD <- ScoreDT [get( " Iteration" ) == Iter - 1 ,]
282293 if (verbose > 0 ) cat(" \n 1) Fitting Gaussian process..." )
283294 GP <- updateGP( GP = GP
284295 , kern = kern
285- , X = matrix (sapply(ParamNames , MinMaxScale , newD , bounds ), nrow = nrow(newD ))
286- , Z = matrix (as.matrix(newD [,.(Score / scaleList $ score ,Elapsed / scaleList $ elapsed )]), nrow = nrow(newD ))
296+ , X = matrix (sapply(ParamNames , MinMaxScale , newD , boundsDT ), nrow = nrow(newD ))
297+ , Z = matrix (as.matrix(newD [,.(get( " Score" ) / scaleList $ score ,Elapsed / scaleList $ elapsed )]), nrow = nrow(newD ))
287298 , acq = acq
288299 , scaleList = scaleList
289300 , parallel = parallel )
290301
291-
292-
293302 # Store GP in list
294303 GPlist [[Iter ]] <- GP
295304
296305 # Create random points to initialize local maximum search.
297- LocalTries <- data.table(sapply(ParamNames ,RandParams ,gsPoints ,bounds ))
298- LocalTryMM <- data.table(sapply(ParamNames ,MinMaxScale ,LocalTries ,bounds ))
306+ LocalTries <- data.table(sapply(ParamNames ,RandParams ,gsPoints ,boundsDT ))
307+ LocalTryMM <- data.table(sapply(ParamNames ,MinMaxScale ,LocalTries ,boundsDT ))
299308
300309 # Try gsPoints starting points to find parameter set that optimizes Acq
301- if (verbose > 0 ) cat(" \n 2) Running global optimum search..." )
310+ if (verbose > 0 ) cat(" \n 2) Running local optimum search..." )
302311 LocalOptims <- maxAcq( GP = GP
303312 , TryOver = LocalTryMM
304313 , acq = acq
@@ -310,24 +319,21 @@ BayesianOptimization <- function(
310319 , convThresh = convThresh
311320 )
312321
322+ if (sum(LocalOptims $ gradCount > 2 ) == 0 ) cat(" \n 2a) WARNING - No initial points converged.\n Process may just be sampling random points.\n Try decreasing convThresh." )
313323
314- # Extract the best GP_Utility, and then reverse scaling
315- NewP <- LocalOptims [order(- GP_Utility )]
316- NewPUMM <- data.table(sapply(ParamNames , UnMMScale , NewP , bounds ))[rep(1 ,runNew )]
317-
318-
319- # Save the best parameters for this Gaussian Process
320- OptParDT <- rbind(OptParDT , data.table(Iteration = Iter , NewPUMM [1 ,]))
321-
322- # Add noise to optimal parameter set.
323- NoisyP <- sapply(ParamNames , ApplyNoise , Table = NewPUMM , bounds = bounds , saveFirst = TRUE , noiseAdd = noiseAdd )
324- if (runNew == 1 ) {NoisyP <- as.data.table(as.list(NoisyP ))
325- } else NoisyP <- data.table(NoisyP )
324+ fromCluster <- applyCluster()
325+ acqMaximums <- rbind(acqMaximums , data.table(" Iteration" = Iter , fromCluster $ clusterPoints ))
326+ newScorePars <- sapply(ParamNames , UnMMScale , fromCluster $ newSet , boundsDT )
326327
328+ if (runNew < 2 ) {
329+ newScorePars <- as.data.table(as.list(newScorePars ))
330+ } else {
331+ newScorePars <- data.table(newScorePars )
332+ }
327333
328- if (verbose > 0 ) cat(" \n 3) Running scoring function" ,nrow(NoisyP )," times in" ,Workers ," thread(s)...\n " )
334+ if (verbose > 0 ) cat(" \n 3) Running scoring function" ,nrow(newScorePars )," times in" ,Workers ," thread(s)...\n " )
329335 sink(" NUL" )
330- NewResults <- foreach( iter = 1 : nrow(NoisyP )
336+ NewResults <- foreach( iter = 1 : nrow(newScorePars )
331337 , .combine = rbind
332338 , .multicombine = TRUE
333339 , .inorder = FALSE
@@ -337,9 +343,9 @@ BayesianOptimization <- function(
337343 , .export = export
338344 ) %op % {
339345
340- Params <- NoisyP [ iter ,]
346+ Params <- newScorePars [get( " iter" ) ,]
341347 Elapsed <- system.time(Result <- do.call(what = FUN , args = as.list(Params )))
342- data.table(NoisyP [ iter ,], Elapsed = Elapsed [[3 ]], as.data.table(Result ))
348+ data.table(newScorePars [get( " iter" ) ,], Elapsed = Elapsed [[3 ]], as.data.table(Result ))
343349
344350 }
345351 sink()
@@ -354,22 +360,22 @@ BayesianOptimization <- function(
354360
355361 if (max(NewResults $ Score ) > max(ScoreDT $ Score )) {
356362 cat(" \n New best parameter set found:\n " )
357- print(NewResults [which.max(Score ),], row.names = FALSE )
363+ print(NewResults [which.max(get( " Score" ) ),], row.names = FALSE )
358364 } else {
359365 cat(" \n Maximum score was not raised this round. Best score is still:\n " )
360- print(ScoreDT [which.max(Score ),], row.names = FALSE )
366+ print(ScoreDT [which.max(get( " Score" ) ),], row.names = FALSE )
361367 }
362368 }
363369
364370 ScoreDT <- rbind(ScoreDT
365- ,data.table(Iteration = rep(Iter ,nrow(NewResults ))
371+ ,data.table(" Iteration" = rep(Iter ,nrow(NewResults ))
366372 ,NewResults
367373 )
368374 )
369375 BestPars <- rbind(BestPars
370- ,data.table( Iteration = Iter
371- , ScoreDT [which.max(Score ),c(ParamNames ," Score" ,extraRet ),with = F ]
372- , elapsedSecs = round(difftime(Time ,StartT ,units = " secs" ),0 )
376+ ,data.table(" Iteration" = Iter
377+ , ScoreDT [which.max(get( " Score" ) ),c(ParamNames ," Score" ,extraRet ),with = F ]
378+ , " elapsedSecs" = round(difftime(Time ,StartT ,units = " secs" ),0 )
373379 )
374380 )
375381
@@ -390,12 +396,12 @@ BayesianOptimization <- function(
390396 RetList <- list ()
391397 names(GPlist ) <- paste0(" GP_" , 1 : Iter )
392398 RetList $ GPlist <- GPlist
393- RetList $ OptParDT <- OptParDT
399+ RetList $ acqMaximums <- acqMaximums
394400 RetList $ ScoreDT <- ScoreDT
395401 RetList $ BestPars <- BestPars
396402
397403 return (RetList )
398404
399405
400406}
401- utils :: globalVariables(c(" iter " , " Iteration " , " Score " , " . " , " GauPro_kernel_beta " , " GauPro_kernel_model " , " GP_Utility " ))
407+ utils :: globalVariables(c(" . " ))
0 commit comments