Skip to content

Commit 48fd91c

Browse files
committed
[SYSTEMDS-3819] Bug fixes in sliceLineExtract builtin
1 parent 21a5092 commit 48fd91c

File tree

3 files changed

+8
-6
lines changed

3 files changed

+8
-6
lines changed

scripts/builtin/sliceLineExtract.dml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
m_sliceLineExtract = function(Matrix[Double] X, Matrix[Double] e,
4141
Matrix[Double] TK, Matrix[Double] TKC, Integer k2 = -1)
42-
return(Matrix[Double] Xtk, Matrix[Double] etk)
42+
return(Matrix[Double] Xtk, Matrix[Double] etk, Matrix[Double] I)
4343
{
4444
# check valid parameters
4545
if( k2 > nrow(TK) )
@@ -50,7 +50,7 @@ m_sliceLineExtract = function(Matrix[Double] X, Matrix[Double] e,
5050
# extract first k2 slices from X and e
5151
I = matrix(0, k2, nrow(X));
5252
parfor(i in 1:k2) {
53-
I[i,] = t(rowSums(X == TK[i,]) == sum(TK[i,]))
53+
I[i,] = t(rowSums(X == TK[i,]) == sum(TK[i,] > 0))
5454
}
5555
I = t(colSums(I)); #union
5656

scripts/builtin/topk_cleaning.dml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ f_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
7979
# apply sampling on training data for pipeline enumeration
8080
# TODO why recoding/sampling twice (within getDirtyScore)
8181
print("---- class-stratified sampling of feature matrix w/ f="+sample);
82-
if(nrow(eYtrain) >= rowCount & sample == 1.0 & sum(mask) > ncol(mask)/2) # &
83-
[eXtrain, eYtrain ] = utils::doErrorSample(eXtrain, eYtrain, lq, uq, rowCount)
84-
else
82+
# if(nrow(eYtrain) >= rowCount & sample == 1.0 & sum(mask) > ncol(mask)/2) # &
83+
# [eXtrain, eYtrain ] = utils::doErrorSample(eXtrain, eYtrain, lq, uq, rowCount)
84+
# else
8585
[eXtrain, eYtrain] = utils::doSample(eXtrain, eYtrain, sample, mask, metaR, TRUE)
8686
t5 = time(); print("---- finalized in: "+(t5-t4)/1e9+"s");
8787

@@ -112,6 +112,7 @@ f_topk_cleaning = function(Frame[Unknown] dataTrain, Frame[Unknown] dataTest = a
112112
metaList['distY'] = dist
113113

114114
print("-- Cleaning - Enum Logical Pipelines: ");
115+
print("---- Data Dimension before Cleaning: "+ nrow(eXtrain) + ", " + ncol(eXtrain));
115116
[bestLogical, bestHp, con, refChanges, acc] = lg::enumerateLogical(X=eXtrain, y=eYtrain, Xtest=eXtest, ytest=eYtest,
116117
initial_population=logical, refSol=refSol, seed = seed, max_iter=max_iter, metaList = metaList,
117118
evaluationFunc=evaluationFunc, evalFunHp=evalFunHp, primitives=primitives, param=parameters,

scripts/pipelines/scripts/utils.dml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ doSample = function(Matrix[Double] eX, Matrix[Double] eY, Double ratio, Matrix[D
6464
sampledY = eY
6565
sampled = floor(nrow(eX) * ratio)
6666

67-
if(sampled > MIN_SAMPLE & ratio != 1.0)
67+
# if(sampled > MIN_SAMPLE & ratio != 1.0)
68+
if(ratio != 1.0)
6869
{
6970
sampleVec = sample(nrow(eX), sampled, FALSE, 23)
7071
P = table(seq(1, nrow(sampleVec)), sampleVec, nrow(sampleVec), nrow(eX))

0 commit comments

Comments
 (0)