|
30 | 30 | import org.apache.sysds.runtime.instructions.cp.DoubleObject; |
31 | 31 | import org.apache.sysds.runtime.instructions.spark.data.IndexedMatrixValue; |
32 | 32 | import org.apache.sysds.runtime.matrix.data.MatrixBlock; |
| 33 | +import org.apache.sysds.runtime.matrix.data.MatrixIndexes; |
33 | 34 | import org.apache.sysds.runtime.matrix.data.OperationsOnMatrixValues; |
34 | 35 | import org.apache.sysds.runtime.matrix.operators.AggregateOperator; |
35 | 36 | import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator; |
| 37 | +import org.apache.sysds.runtime.meta.DataCharacteristics; |
| 38 | +import org.apache.sysds.runtime.util.CommonThreadPool; |
36 | 39 |
|
| 40 | +import java.util.HashMap; |
| 41 | +import java.util.concurrent.ExecutorService; |
37 | 42 |
|
38 | 43 | public class AggregateUnaryOOCInstruction extends ComputationOOCInstruction { |
39 | 44 | private AggregateOperator _aop = null; |
@@ -61,34 +66,119 @@ public static AggregateUnaryOOCInstruction parseInstruction(String str) { |
61 | 66 |
|
62 | 67 | @Override |
63 | 68 | public void processInstruction( ExecutionContext ec ) { |
64 | | - //TODO support all types of aggregations, currently only full aggregation |
| 69 | + //TODO support all types of aggregations, currently only full aggregation, row aggregation and column aggregation |
65 | 70 |
|
66 | 71 | //setup operators and input queue |
67 | 72 | AggregateUnaryOperator aggun = (AggregateUnaryOperator) getOperator(); |
68 | 73 | MatrixObject min = ec.getMatrixObject(input1); |
69 | 74 | LocalTaskQueue<IndexedMatrixValue> q = min.getStreamHandle(); |
70 | | - IndexedMatrixValue tmp = null; |
71 | 75 | int blen = ConfigurationManager.getBlocksize(); |
72 | | - |
73 | | - //read blocks and aggregate immediately into result |
74 | | - int extra = _aop.correction.getNumRemovedRowsColumns(); |
75 | | - MatrixBlock ret = new MatrixBlock(1,1+extra,false); |
76 | | - MatrixBlock corr = new MatrixBlock(1,1+extra,false); |
77 | | - try { |
78 | | - while((tmp = q.dequeueTask()) != LocalTaskQueue.NO_MORE_TASKS) { |
79 | | - //block aggregation |
80 | | - MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue()) |
81 | | - .aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes()); |
82 | | - //accumulation into final result |
83 | | - OperationsOnMatrixValues.incrementalAggregation( |
84 | | - ret, _aop.existsCorrection() ? corr : null, ltmp, _aop, true); |
| 76 | + |
| 77 | + if (aggun.isRowAggregate() || aggun.isColAggregate()) { |
| 78 | + // intermediate state per aggregation index |
| 79 | + HashMap<Long, MatrixBlock> aggs = new HashMap<>(); // partial aggregates |
| 80 | + HashMap<Long, MatrixBlock> corrs = new HashMap<>(); // correction blocks |
| 81 | + HashMap<Long, Integer> cnt = new HashMap<>(); // processed block count per agg idx |
| 82 | + |
| 83 | + DataCharacteristics chars = ec.getDataCharacteristics(input1.getName()); |
| 84 | + // number of blocks to process per aggregation idx (row or column dim) |
| 85 | + long nBlocks = aggun.isRowAggregate()? chars.getNumColBlocks() : chars.getNumRowBlocks(); |
| 86 | + |
| 87 | + LocalTaskQueue<IndexedMatrixValue> qOut = new LocalTaskQueue<>(); |
| 88 | + ec.getMatrixObject(output).setStreamHandle(qOut); |
| 89 | + ExecutorService pool = CommonThreadPool.get(); |
| 90 | + try { |
| 91 | + pool.submit(() -> { |
| 92 | + IndexedMatrixValue tmp = null; |
| 93 | + try { |
| 94 | + while((tmp = q.dequeueTask()) != LocalTaskQueue.NO_MORE_TASKS) { |
| 95 | + long idx = aggun.isRowAggregate() ? |
| 96 | + tmp.getIndexes().getRowIndex() : tmp.getIndexes().getColumnIndex(); |
| 97 | + if(aggs.containsKey(idx)) { |
| 98 | + // update existing partial aggregate for this idx |
| 99 | + MatrixBlock ret = aggs.get(idx); |
| 100 | + MatrixBlock corr = corrs.get(idx); |
| 101 | + |
| 102 | + // aggregation |
| 103 | + MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue()) |
| 104 | + .aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes()); |
| 105 | + OperationsOnMatrixValues.incrementalAggregation(ret, |
| 106 | + _aop.existsCorrection() ? corr : null, ltmp, _aop, true); |
| 107 | + |
| 108 | + aggs.replace(idx, ret); |
| 109 | + corrs.replace(idx, corr); |
| 110 | + cnt.replace(idx, cnt.get(idx) + 1); |
| 111 | + } |
| 112 | + else { |
| 113 | + // first block for this idx - init aggregate and correction |
| 114 | + // TODO avoid corr block for inplace incremental aggregation |
| 115 | + int rows = tmp.getValue().getNumRows(); |
| 116 | + int cols = tmp.getValue().getNumColumns(); |
| 117 | + int extra = _aop.correction.getNumRemovedRowsColumns(); |
| 118 | + MatrixBlock ret = aggun.isRowAggregate()? new MatrixBlock(rows, 1 + extra, false) : new MatrixBlock(1 + extra, cols, false); |
| 119 | + MatrixBlock corr = aggun.isRowAggregate()? new MatrixBlock(rows, 1 + extra, false) : new MatrixBlock(1 + extra, cols, false); |
| 120 | + |
| 121 | + // aggregation |
| 122 | + MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue()).aggregateUnaryOperations( |
| 123 | + aggun, new MatrixBlock(), blen, tmp.getIndexes()); |
| 124 | + OperationsOnMatrixValues.incrementalAggregation(ret, |
| 125 | + _aop.existsCorrection() ? corr : null, ltmp, _aop, true); |
| 126 | + |
| 127 | + aggs.put(idx, ret); |
| 128 | + corrs.put(idx, corr); |
| 129 | + cnt.put(idx, 1); |
| 130 | + } |
| 131 | + |
| 132 | + if(cnt.get(idx) == nBlocks) { |
| 133 | + // all input blocks for this idx processed - emit aggregated block |
| 134 | + MatrixBlock ret = aggs.get(idx); |
| 135 | + // drop correction row/col |
| 136 | + ret.dropLastRowsOrColumns(_aop.correction); |
| 137 | + MatrixIndexes midx = aggun.isRowAggregate()? new MatrixIndexes(tmp.getIndexes().getRowIndex(), 1) : new MatrixIndexes(1, tmp.getIndexes().getColumnIndex()); |
| 138 | + IndexedMatrixValue tmpOut = new IndexedMatrixValue(midx, ret); |
| 139 | + |
| 140 | + qOut.enqueueTask(tmpOut); |
| 141 | + // drop intermediate states |
| 142 | + aggs.remove(idx); |
| 143 | + corrs.remove(idx); |
| 144 | + cnt.remove(idx); |
| 145 | + } |
| 146 | + } |
| 147 | + qOut.closeInput(); |
| 148 | + } |
| 149 | + catch(Exception ex) { |
| 150 | + throw new DMLRuntimeException(ex); |
| 151 | + } |
| 152 | + }); |
| 153 | + } catch (Exception ex) { |
| 154 | + throw new DMLRuntimeException(ex); |
| 155 | + } finally { |
| 156 | + pool.shutdown(); |
85 | 157 | } |
86 | 158 | } |
87 | | - catch(Exception ex) { |
88 | | - throw new DMLRuntimeException(ex); |
| 159 | + // full aggregation |
| 160 | + else { |
| 161 | + IndexedMatrixValue tmp = null; |
| 162 | + //read blocks and aggregate immediately into result |
| 163 | + int extra = _aop.correction.getNumRemovedRowsColumns(); |
| 164 | + MatrixBlock ret = new MatrixBlock(1,1+extra,false); |
| 165 | + MatrixBlock corr = new MatrixBlock(1,1+extra,false); |
| 166 | + try { |
| 167 | + while((tmp = q.dequeueTask()) != LocalTaskQueue.NO_MORE_TASKS) { |
| 168 | + //block aggregation |
| 169 | + MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue()) |
| 170 | + .aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes()); |
| 171 | + //accumulation into final result |
| 172 | + OperationsOnMatrixValues.incrementalAggregation( |
| 173 | + ret, _aop.existsCorrection() ? corr : null, ltmp, _aop, true); |
| 174 | + } |
| 175 | + } |
| 176 | + catch(Exception ex) { |
| 177 | + throw new DMLRuntimeException(ex); |
| 178 | + } |
| 179 | + |
| 180 | + //create scalar output |
| 181 | + ec.setScalarOutput(output.getName(), new DoubleObject(ret.get(0, 0))); |
89 | 182 | } |
90 | | - |
91 | | - //create scalar output |
92 | | - ec.setScalarOutput(output.getName(), new DoubleObject(ret.get(0, 0))); |
93 | 183 | } |
94 | 184 | } |
0 commit comments