|
33 | 33 | import org.apache.sysds.runtime.compress.CompressedMatrixBlock; |
34 | 34 | import org.apache.sysds.runtime.compress.DMLCompressionException; |
35 | 35 | import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P; |
| 36 | +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; |
36 | 37 | import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; |
37 | 38 | import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; |
38 | 39 | import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; |
|
43 | 44 | import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex; |
44 | 45 | import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; |
45 | 46 | import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; |
| 47 | +import org.apache.sysds.runtime.compress.utils.ACount; |
| 48 | +import org.apache.sysds.runtime.compress.utils.DblArray; |
| 49 | +import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap; |
46 | 50 | import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; |
47 | 51 | import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; |
48 | 52 | import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme; |
@@ -77,7 +81,7 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup { |
77 | 81 |
|
78 | 82 | static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED; |
79 | 83 |
|
80 | | - private ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { |
| 84 | + protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { |
81 | 85 | super(colIndexes, dict, cachedCounts); |
82 | 86 | _data = data; |
83 | 87 |
|
@@ -1105,4 +1109,57 @@ protected boolean allowShallowIdentityRightMult() { |
1105 | 1109 | return true; |
1106 | 1110 | } |
1107 | 1111 |
|
| 1112 | + public AColGroup convertToDeltaDDC() { |
| 1113 | + int numCols = _colIndexes.size(); |
| 1114 | + int numRows = _data.size(); |
| 1115 | + |
| 1116 | + DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64)); |
| 1117 | + double[] rowDelta = new double[numCols]; |
| 1118 | + double[] prevRow = new double[numCols]; |
| 1119 | + DblArray dblArray = new DblArray(rowDelta); |
| 1120 | + int[] rowToDictId = new int[numRows]; |
| 1121 | + |
| 1122 | + double[] dictVals = _dict.getValues(); |
| 1123 | + |
| 1124 | + for(int i = 0; i < numRows; i++) { |
| 1125 | + int dictIdx = _data.getIndex(i); |
| 1126 | + int off = dictIdx * numCols; |
| 1127 | + for(int j = 0; j < numCols; j++) { |
| 1128 | + double val = dictVals[off + j]; |
| 1129 | + if(i == 0) { |
| 1130 | + rowDelta[j] = val; |
| 1131 | + prevRow[j] = val; |
| 1132 | + } else { |
| 1133 | + rowDelta[j] = val - prevRow[j]; |
| 1134 | + prevRow[j] = val; |
| 1135 | + } |
| 1136 | + } |
| 1137 | + |
| 1138 | + rowToDictId[i] = map.increment(dblArray); |
| 1139 | + } |
| 1140 | + |
| 1141 | + if(map.size() == 0) |
| 1142 | + return new ColGroupEmpty(_colIndexes); |
| 1143 | + |
| 1144 | + ACount<DblArray>[] vals = map.extractValues(); |
| 1145 | + final int nVals = vals.length; |
| 1146 | + final double[] dictValues = new double[nVals * numCols]; |
| 1147 | + final int[] oldIdToNewId = new int[map.size()]; |
| 1148 | + int idx = 0; |
| 1149 | + for(int i = 0; i < nVals; i++) { |
| 1150 | + final ACount<DblArray> dac = vals[i]; |
| 1151 | + final double[] arrData = dac.key().getData(); |
| 1152 | + System.arraycopy(arrData, 0, dictValues, idx, numCols); |
| 1153 | + oldIdToNewId[dac.id] = i; |
| 1154 | + idx += numCols; |
| 1155 | + } |
| 1156 | + |
| 1157 | + DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols); |
| 1158 | + AMapToData newData = MapToFactory.create(numRows, nVals); |
| 1159 | + for(int i = 0; i < numRows; i++) { |
| 1160 | + newData.set(i, oldIdToNewId[rowToDictId[i]]); |
| 1161 | + } |
| 1162 | + return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null); |
| 1163 | + } |
| 1164 | + |
1108 | 1165 | } |
0 commit comments