@@ -48,7 +48,7 @@ source("scripts/builtin/bandit.dml") as bandit;
4848
4949f_fit_pipeline = function(Frame[Unknown] trainData, Frame[Unknown] testData, Frame[Unknown] metaData = as.frame("NULL"),
5050 Frame[Unknown] pip, Frame[Unknown] applyFunc, Matrix[Double] hp, Integer cvk=3, String evaluationFunc, Matrix[Double] evalFunHp,
51- Boolean isLastLabel = TRUE, Boolean correctTypos=FALSE)
51+ Boolean isLastLabel = TRUE, Boolean correctTypos=FALSE, Boolean allCombinations=FALSE )
5252return (Matrix[Double] scores, Matrix[Double] cleanTrain, Matrix[Double] cleanTest, List[Unknown] externalState, List[Unknown] iState)
5353{
5454 externalState = list()
@@ -92,28 +92,66 @@ return (Matrix[Double] scores, Matrix[Double] cleanTrain, Matrix[Double] cleanTe
9292 hp_matrix = matrix(hp_width, rows=ncol(pip), cols=ncol(hp_width)/ncol(pip))
9393 pipList = list(ph = pip, hp = hp_matrix, flags = no_of_flag_vars)
9494
95+ print("Getting training score using CV")
9596 [trainScore, evalFunHp] = bandit::crossV(X=eXtrain, y=eYtrain, cvk=cvk, evalFunHp=evalFunHp,
9697 pipList=pipList, metaList=metaList, evalFunc=evaluationFunc)
97- print("train score cv: "+toString(trainScore))
98+ print("- train score cv: "+toString(trainScore))
9899
99-
100- # # # now test accuracy
101- [eXtrain, eYtrain, eXtest, eYtest, a, b, c, d, iState] = executePipeline(pipeline=pip, Xtrain=eXtrain, Ytrain=eYtrain,
102- Xtest=eXtest, Ytest=eYtest, metaList=metaList, hyperParameters=hp_matrix, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE)
103-
104- if(max(eYtrain) == min(eYtrain))
105- stop("Y contains only one class")
100+ print("----------------------------");
101+ print("Getting test accuracy")
102+ primitives = matrix(0, rows=0, cols=0);
103+ if (allCombinations) {
104+ # Count number of subsets of consecutive primitives
105+ totCount = 0;
106+ n = ncol(pip);
107+ for (i in 1:n) {
108+ for (j in i:n)
109+ totCount = totCount + 1;
110+ }
111+ # List start and end indices of all those subsets
112+ primitives = matrix(0, rows=totCount, cols=2);
113+ r = 1;
114+ for (start in 1:n) {
115+ for (end in start:n) {
116+ primitives[r,1] = start;
117+ primitives[r,2] = end;
118+ r = r + 1;
119+ }
120+ }
121+ }
122+ else {
123+ # Include all primitives
124+ primitives = matrix(0, rows=1, cols=2);
125+ primitives[1,1] = 1;
126+ primitives[1,2] = ncol(pip);
127+ }
106128
107- # score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp))
108- # trainAccuracy = as.scalar(score[1, 1])
109-
110- score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp))
111- testAccuracy = as.scalar(score[1, 1])
112-
113- scores = matrix(0, rows=1, cols=3)
114- scores[1, 1] = dirtyScore
115- # scores[1, 2] = trainAccuracy
116- scores[1, 3] = testAccuracy
117- cleanTrain = cbind(eXtrain, eYtrain)
118- cleanTest = cbind(eXtest, eYtest)
129+ for (r in 1:nrow(primitives)) {
130+ startInd = as.scalar(primitives[r,1]);
131+ endInd = as.scalar(primitives[r,2]);
132+ # # # now test accuracy
133+ [eXtrain_cl, eYtrain_cl, eXtest_cl, eYtest_cl, a, b, c, d, iState] = executePipeline(pipeline=pip, Xtrain=eXtrain, Ytrain=eYtrain,
134+ Xtest=eXtest, Ytest=eYtest, metaList=metaList, hyperParameters=hp_matrix, flagsCount=no_of_flag_vars, test=TRUE, verbose=FALSE, startInd=startInd, endInd=endInd)
135+
136+ if(max(eYtrain_cl) == min(eYtrain_cl))
137+ stop("Y contains only one class")
138+
139+ # score = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, Xtest=eXtrain, Ytest=eYtrain, Xorig=as.matrix(0), evalFunHp=evalFunHp))
140+ # trainAccuracy = as.scalar(score[1, 1])
141+
142+ score = eval(evaluationFunc, list(X=eXtrain_cl, Y=eYtrain_cl, Xtest=eXtest_cl, Ytest=eYtest_cl, Xorig=as.matrix(0), evalFunHp=evalFunHp))
143+ testAccuracy = as.scalar(score[1, 1])
144+
145+ scores = matrix(0, rows=1, cols=3)
146+ scores[1, 1] = dirtyScore
147+ # scores[1, 2] = trainAccuracy
148+ scores[1, 3] = testAccuracy
149+ cleanTrain = cbind(eXtrain_cl, eYtrain_cl)
150+ cleanTest = cbind(eXtest, eYtest)
151+
152+ header = frame(["dirty acc", "train acc", "test acc"], rows=1, cols=3)
153+ result = as.frame(scores)
154+ writeRes = rbind(header, result)
155+ print(toString(writeRes))
156+ }
119157}
0 commit comments