Skip to content

Commit 5fc93b6

Browse files
fzoepffelmboehm7
authored andcommitted
[SYSTEMDS-3696] Extended incremental SliceLine state handling
Closes #2116.
1 parent 3b4f6cd commit 5fc93b6

File tree

3 files changed

+111
-46
lines changed

3 files changed

+111
-46
lines changed

scripts/builtin/incSliceLine.dml

Lines changed: 82 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
# prevTK previous top-k slices (for incremental updates)
5353
# prevTKC previous top-k scores (for incremental updates)
5454
# encodeLat flag for encoding output lattice for less memory consumption
55-
# pruningStrat flag for disabling certain pruning strategies
56-
# (0 all, 1 all exact (score and size), 2 no score, 3 no size, 4 none)
55+
# pruningStrat pruning strategy: 0 all pruning, 1 only score pruning, 2 only size pruning,
56+
# 3 only max score pruning, 4 only approx pruning, 5 no pruning
5757
# ---------------------------------------------------------------------------------------
5858
#
5959
# OUTPUT:
@@ -101,9 +101,11 @@ m_incSliceLine = function(
101101
+ " -- see documentation for more details.");
102102
}
103103

104-
disableIncScorePruning = (pruningStrat == 2 | pruningStrat == 4);
105-
disableIncSizePruning = (pruningStrat >= 3);
106-
disableIncApproxPruning = (pruningStrat >= 1)
104+
enableIncScorePruning = ( pruningStrat <= 1);
105+
enableIncSizePruning = ((pruningStrat == 0) | (pruningStrat == 2));
106+
enableIncMaxScorePruning = ((pruningStrat == 0) | (pruningStrat == 3));
107+
enableIncApproxPruning = ((pruningStrat == 0) | (pruningStrat == 4));
108+
enableIncApproxPruning = FALSE;
107109

108110
t1 = time();
109111

@@ -188,19 +190,20 @@ m_incSliceLine = function(
188190
prevStats, encodeLat, differentOffsets, alpha, eAvg, prevFoffb, prevFoffe, foffb, foffe);
189191
maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), addedX2, removedX2,
190192
addedE, removedE, prevLattice, metaPrevLattice, prevStats, encodeLat, differentOffsets,
191-
alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, disableIncApproxPruning);
192-
[S, R, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, totalE, changedE,
193-
eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, disableIncScorePruning);
193+
alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, enableIncApproxPruning);
194+
[S, R, SPr, RPr, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, totalE, changedE,
195+
eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, enableIncScorePruning, enableIncMaxScorePruning, enableIncApproxPruning);
194196

195197
# initialize lattice and statistics for incremental updates
196-
L1 = S;
198+
L1 = rbind(S, SPr);
197199
metaLattice = list();
198200
if( encodeLat ) {
199-
[L1, M] = transformSlicesToIDs(S, foffb, foffe);
201+
[L1, M] = transformSlicesToIDs(L1, foffb, foffe);
200202
metaLattice = append(metaLattice, M);
201203
}
202204
L = list(L1);
203-
Stats = list(R);
205+
Stats1 = rbind(R, RPr);
206+
Stats = list(Stats1);
204207

205208
# initialize top-k
206209
[TK, TKC] = maintainTopKInc(S, R, prevTK2, prevTKC2, k, minSup, foffb, foffe);
@@ -231,13 +234,14 @@ m_incSliceLine = function(
231234

232235
# load one hot encoded previous lattice for the current level
233236
prevLattice2 = matrix(0,0,0);
234-
if(!disableIncSizePruning){
237+
if(enableIncSizePruning){
235238
prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, prevFoffb,
236239
prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
237240
}
238241

242+
prevLattice1 = prevLattice2;
239243
if(selFeat){
240-
if(length(prevLattice2)>0 & !disableIncSizePruning){
244+
if(length(prevLattice2)>0 & enableIncSizePruning){
241245
prevLattice2 = removeEmpty(target=prevLattice2, margin="cols", select=t(selCols));
242246
}
243247
S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
@@ -249,21 +253,23 @@ m_incSliceLine = function(
249253
}
250254

251255
# prune unchanged slices with slice size < minSup
252-
if(level <= length(prevStats) & !disableIncSizePruning){
256+
SPr = matrix(0,0, ncol(S));
257+
RPr = matrix(0,0, 4);
258+
if(level <= length(prevStats) & enableIncSizePruning){
253259
npairs = nrow(S);
254-
[S, S2] = pruneUnchangedSlices(S, S2, prevLattice2, prevStats, changedX2, minSup, verbose, level);
260+
[S, S2, SPr, RPr] = pruneUnchangedSlices(S, S2, prevLattice1, prevLattice2, prevStats, changedX2, minSup, verbose, level);
255261
if(verbose) {
256262
print(" -- dropping "+(npairs-nrow(S))+"/"+npairs+" unaffected paired slice candidates ");
257263
}
258264
}
259265

260266
# prepare and store output lattice for next run
261-
Lrep = S
267+
L1 = rbind(S,SPr);
262268
if ( encodeLat ) {
263-
[Lrep, M] = transformSlicesToIDs(S, foffb, foffe);
269+
[L1, M] = transformSlicesToIDs(L1, foffb, foffe);
264270
metaLattice = append(metaLattice, M);
265271
}
266-
L = append(L, Lrep);
272+
L = append(L, L1);
267273

268274
if( nrow(S) > 0 ) {
269275
# extract and evaluate candidate slices
@@ -281,7 +287,8 @@ m_incSliceLine = function(
281287
}
282288

283289
# update output statistics
284-
Stats = append(Stats, R);
290+
Rrep = rbind(R, RPr);
291+
Stats = append(Stats, Rrep);
285292

286293
# maintain top-k after evaluation
287294
[TK, TKC] = maintainTopKInc(S, R, TK, TKC, k, minSup, foffb, foffe);
@@ -312,8 +319,8 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p,
312319
Matrix[Double] prevTK2, Matrix[Double] e, Matrix[Double] ep,
313320
Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
314321
Double minsc, Matrix[Double] maxsc, Matrix[Double] maxscub, Boolean verbose,
315-
Boolean disableIncScorePruning)
316-
return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
322+
Boolean enableIncScorePruning, Boolean enableIncMaxScorePruning, Boolean enableIncApproxPruning)
323+
return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] SPr, Matrix[Double] RPr, Matrix[Double] selCols)
317324
{
318325
n2 = ncol(X2);
319326
cCnts = t(colSums(X2)); # column counts
@@ -332,23 +339,27 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p,
332339
# b) unchanged pruning
333340
# (valid to prune feature if its previous max score was negative or below minsc)
334341
selCols2 = selCols;
335-
if( !disableIncScorePruning ) {
342+
if( enableIncMaxScorePruning ) {
336343
selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc));
337-
}
344+
}
338345

339346
if( verbose ) {
340347
n = as.integer(sum(selCols));
341348
drop = as.integer(sum(selCols) - sum(selCols2));
342349
print("incSliceLine: dropping "+drop+"/"+n+" unaffected features.");
343350
}
344351

345-
# c) max score changed pruning
346-
n = as.integer(sum(selCols2));
347-
selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
352+
if( enableIncApproxPruning ) {
353+
# c) max score changed pruning
354+
n = as.integer(sum(selCols2));
355+
if( enableIncApproxPruning ) {
356+
selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
357+
}
348358

349-
if( verbose ) {
350-
drop = as.integer(n - sum(selCols2));
351-
print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected features.");
359+
if( verbose ) {
360+
drop = as.integer(n - sum(selCols2));
361+
print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected features.");
362+
}
352363
}
353364

354365
# working set of active slices (#attr x #slices) and top k
@@ -362,14 +373,40 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p,
362373
# score 1-slices and create initial top-k
363374
sc = scoreInc(ss, se, eAvg, alpha, nrow(X2));
364375
R = cbind(sc, se, sm, ss);
376+
SPr = matrix(0,0, n2);
377+
RPr = matrix(0,0, 4);
378+
379+
# store all pruned slices for incremental updates
380+
if(sum(!selCols2) != 0){
381+
attrPr = removeEmpty(target=seq(1,n2), margin="rows", select=!selCols2);
382+
ssPr = removeEmpty(target=cCnts, margin="rows", select=!selCols2);
383+
sePr = removeEmpty(target=err, margin="rows", select=!selCols2);
384+
smPr = removeEmpty(target=merr, margin="rows", select=!selCols2);
385+
SPr = table(seq(1,nrow(attrPr)), attrPr, nrow(attrPr), n2);
386+
# scores are currently not used for pruning
387+
# in case of future use, set scores to Inf so a slice that was not scored
388+
# in this run can be identified and does not get pruned based on score in the next run
389+
scPr = matrix(Inf, nrow(SPr), 1);
390+
RPr = cbind(scPr, sePr, smPr, ssPr);
391+
}
365392

366393
# d) score pruning
367394
# compute upper bound scores for all remaining slices
368-
if(minsc > -Inf & !disableIncScorePruning) {
395+
if(minsc > -Inf & enableIncScorePruning) {
369396
ubSc = scoreUBInc(ss, se, sm, eAvg, minSup, alpha, nrow(X2));
370397
selCols3 = (ubSc > max(0, minsc));
398+
399+
# store all pruned slices for incremental updates
400+
if(sum(!selCols3) != 0){
401+
Rremoved = removeEmpty(target=R, margin="rows", select=!selCols3);
402+
Sremoved = removeEmpty(target=S, margin="rows", select=!selCols3);
403+
RPr = rbind(RPr, Rremoved);
404+
SPr = rbind(SPr, Sremoved);
405+
}
406+
371407
S = removeEmpty(target=S, margin="rows", select=selCols3);
372408
R = removeEmpty(target=R, margin="rows", select=selCols3);
409+
373410
if( verbose ) {
374411
n = as.integer(sum(selCols2));
375412
drop = as.integer(sum(selCols2) - sum(selCols3));
@@ -637,9 +674,12 @@ computeLowestPrevTK = function(Matrix[Double] prevTK2, Matrix[Double] X2,
637674
minsc = min(sc);
638675
}
639676

640-
pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level)
641-
return(Matrix[Double] S, Matrix[Double] S2)
677+
pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] prevLattice, Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level)
678+
return(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] SPr, Matrix[Double] RPr)
642679
{
680+
SPr = matrix(0,0, ncol(S));
681+
RPr = matrix(0,0, 4);
682+
643683
unchangedS = prevLattice2;
644684
unchangedR = as.matrix(prevStats[level])
645685

@@ -651,11 +691,18 @@ pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Doub
651691
I = t(colSums((changedX2 %*% t(unchangedS)) == level) == 0) # change pushdown
652692
& unchangedR[,4] < minSup; # minSup pushdown
653693
unchangedS2 = removeEmpty(target=unchangedS, margin="rows", select=I);
694+
if(sum(I) > 0){
695+
SPr = removeEmpty(target=prevLattice, margin="rows", select=I);
696+
RPr = removeEmpty(target=unchangedR, margin="rows", select=I);
697+
}
698+
654699
# c) select only rows that cannot be pruned
655700
selCols = !rowSums((S2 %*% t(unchangedS2)) == level);
701+
656702
if(nrow(unchangedS) > 0 & sum(selCols) < nrow(S) ){
657703
S2 = removeEmpty(target=S2, margin="rows", select=selCols);
658704
S = removeEmpty(target=S, margin="rows", select=selCols);
705+
659706
}
660707
}
661708
}
@@ -754,11 +801,11 @@ getMaxChangedScoreAllFeatures = function(Int numRows, Int numFeatures, Matrix[Do
754801
List[Unknown] prevLattice, List[Unknown] metaPrevLattice, List[Unknown] prevStats,
755802
Boolean encodeLat, Boolean differentOffsets, Double alpha, Double eAvg, Double minSup,
756803
Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb, Matrix[Double] foffe,
757-
Boolean disableIncApproxPruning)
804+
Boolean enableIncApproxPruning)
758805
return(Matrix[Double] maxscub)
759806
{
760807
maxscub = matrix(-Inf, numFeatures, 1);
761-
if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & !disableIncApproxPruning ) {
808+
if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & enableIncApproxPruning ) {
762809
# compute upper bounds per feature for added subset
763810
ss = t(colSums(addedX2));
764811
se = t(t(addedE) %*% addedX2);
@@ -836,3 +883,4 @@ removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
836883
P2 = table(seq(1, nrow(CIX)), CIX, nrow(CIX), nrow(M))
837884
remain = P2 %*% M;
838885
}
886+

src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ public void testTop10HybridTPFullManyAdded() {
344344

345345
@Test
346346
public void testTop10SinglenodeTPFullManyAdded() {
347-
runIncSliceLineTest(10, "e", false, false,99 , 1, false, false, false, ExecMode.SINGLE_NODE);
347+
runIncSliceLineTest(10, "e", false, false,90 , 1, false, false, false, ExecMode.SINGLE_NODE);
348348
}
349349

350350
@Test
@@ -359,7 +359,7 @@ public void testTop10HybridTPFullManyAddedRemoved() {
359359

360360
@Test
361361
public void testTop10SinglenodeTPFullManyAddedRemoved() {
362-
runIncSliceLineTest(10, "e", false, false,99 , 1, false, true, false, ExecMode.SINGLE_NODE);
362+
runIncSliceLineTest(10, "e", false, false,90 , 1, false, true, false, ExecMode.SINGLE_NODE);
363363
}
364364

365365
@Test
@@ -419,7 +419,7 @@ public void testTop10SinglenodeTPSelFullManyAddedRemoved() {
419419

420420
@Test
421421
public void testTop10HybridTPSelE2FullManyAddedRemoved() {
422-
runIncSliceLineTest(10, "oe", false, true, 50, 99, false, true, false, ExecMode.HYBRID);
422+
runIncSliceLineTest(10, "oe", false, true, 50, 90, false, true, false, ExecMode.HYBRID);
423423
}
424424

425425
@Test
@@ -569,7 +569,7 @@ public void testTop10HybridTPFullManyAddedOnlyNull() {
569569

570570
@Test
571571
public void testTop10SinglenodeTPFullManyAddedOnlyNull() {
572-
runIncSliceLineTest(10, "e", false, false,99 , 1, true, false,true, ExecMode.SINGLE_NODE);
572+
runIncSliceLineTest(10, "e", false, false,90 , 1, true, false,true, ExecMode.SINGLE_NODE);
573573
}
574574

575575
@Test
@@ -584,7 +584,7 @@ public void testTop10HybridTPFullManyAddedOnlyNullRemoved() {
584584

585585
@Test
586586
public void testTop10SinglenodeTPFullManyAddedOnlyNullRemoved() {
587-
runIncSliceLineTest(10, "e", false, false,99 , 1, true, true,true, ExecMode.SINGLE_NODE);
587+
runIncSliceLineTest(10, "e", false, false,90 , 1, true, true,true, ExecMode.SINGLE_NODE);
588588
}
589589

590590
@Test
@@ -992,7 +992,7 @@ public void testIncSliceLineCustomInputsFull() {
992992

993993
};
994994

995-
runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, false, false, true, ExecMode.SINGLE_NODE, false, false);
995+
runIncSliceLineTest(newX, e, 10, "e", false, true, 10, 1, false, false, true, ExecMode.SINGLE_NODE, false, false);
996996
}
997997

998998
// @Test

src/test/scripts/functions/builtin/incSliceLineFull.dml

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ disableIncScorePruning = $13;
3030
disableIncSizePruning = $14;
3131

3232
if(disableIncScorePruning & disableIncSizePruning){
33-
pruningStrat = 3;
33+
pruningStrat = 3;
3434
} else if (disableIncSizePruning){
3535
pruningStrat = 2;
3636
} else if (disableIncScorePruning){
@@ -46,16 +46,32 @@ if(nrow(indicesRemoved) > 0){
4646
}
4747
}
4848

49-
# first compute the top k slices in two increments
49+
# first compute the top k slices in two increments
5050
# first increment
51-
[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX, addedE=oldE, k=$5,
51+
[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX[1:nrow(oldX) -10], addedE=oldE[1:nrow(oldE) -10], k=$5,
5252
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10);
53-
53+
/*
54+
for(i in 1:nrow(Stats)){
55+
print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
56+
print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
57+
}*/
58+
59+
[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX[nrow(oldX) -9: nrow(oldX)], oldX = oldX[1:nrow(oldX) -10], oldE = oldE[1:nrow(oldE) -10], addedE=oldE[nrow(oldE) -9: nrow(oldE)], prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC = TKC, k=$5,
60+
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe, pruningStrat = pruningStrat);
61+
62+
/*
63+
for(i in 1:nrow(Stats)){
64+
print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
65+
print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
66+
}*/
67+
5468
# second increment
69+
70+
# third increment
5571
[TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] = incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE, prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC = TKC, k=$5,
5672
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe, pruningStrat = pruningStrat);
5773

58-
# prepare totalX and totalE for running sliceline on total data
74+
# prepare totalX and totalE for running sliceline on total data
5975
if(nrow(indicesRemoved) > 0){
6076
oldX = removeRowsByIndices(oldX, indicesRemoved);
6177
oldE = removeRowsByIndices(oldE, indicesRemoved);
@@ -73,7 +89,7 @@ write(TKC2, $12)
7389
# Function to remove rows from matrix M based on a list of indices
7490
removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
7591
return (Matrix[Double] result)
76-
{
92+
{
7793
result = matrix(0, 0, ncol(M));
7894
index = 1;
7995
for(i in 1:nrow(indices)){
@@ -88,3 +104,4 @@ removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
88104
}
89105
result = rbind(result, M[index:nrow(M),]);
90106
}
107+

0 commit comments

Comments
 (0)