|
34 | 34 | import org.apache.sysds.runtime.util.UtilFunctions; |
35 | 35 |
|
36 | 36 | /** |
37 | | - * Simple atomic decoder for dummycoded columns. This decoder builds internally |
38 | | - * inverted column mappings from the given frame meta data. |
39 | | - * |
| 37 | + * Simple atomic decoder for dummycoded columns. This decoder builds internally inverted column mappings from the given |
| 38 | + * frame meta data. |
| 39 | + * |
40 | 40 | */ |
41 | | -public class DecoderDummycode extends Decoder |
42 | | -{ |
| 41 | +public class DecoderDummycode extends Decoder { |
43 | 42 | private static final long serialVersionUID = 4758831042891032129L; |
44 | | - |
| 43 | + |
45 | 44 | private int[] _clPos = null; |
46 | 45 | private int[] _cuPos = null; |
47 | | - |
| 46 | + |
48 | 47 | protected DecoderDummycode(ValueType[] schema, int[] dcCols) { |
49 | | - //dcCols refers to column IDs in output (non-dc) |
| 48 | + // dcCols refers to column IDs in output (non-dc) |
50 | 49 | super(schema, dcCols); |
51 | 50 | } |
52 | 51 |
|
53 | 52 | @Override |
54 | 53 | public FrameBlock decode(MatrixBlock in, FrameBlock out) { |
55 | | - //TODO perf (exploit sparse representation for better asymptotic behavior) |
56 | 54 | out.ensureAllocatedColumns(in.getNumRows()); |
57 | 55 | decode(in, out, 0, in.getNumRows()); |
58 | 56 | return out; |
59 | 57 | } |
60 | 58 |
|
61 | 59 | @Override |
62 | 60 | public void decode(MatrixBlock in, FrameBlock out, int rl, int ru) { |
63 | | - if(in.isInSparseFormat()) { |
64 | | - SparseBlock sb = in.getSparseBlock(); |
65 | | - for(int i = rl; i < ru; i++) { |
66 | | - if(!sb.isEmpty(i)) { |
67 | | - int apos = sb.pos(i); |
68 | | - int alen = sb.size(i) + apos; |
69 | | - int[] aix = sb.indexes(i); |
70 | | - // double[] val = sb.values(i); always 1... therefore not needed |
71 | | - int h = 0; |
72 | | - for(int j = 0; j < _colList.length && h < alen; j++) { |
73 | | - // find k, the index in aix, within the range of low and high |
74 | | - int low = _clPos[j]; |
75 | | - int high = _cuPos[j]; |
76 | | - while(h < alen && aix[h] < low) { |
77 | | - h++; |
78 | | - } |
79 | | - if(h < alen && aix[h] >= low && aix[h] < high) { |
80 | | - int k = aix[h]; |
81 | | - int col = _colList[j] - 1; |
82 | | - out.getColumn(col).set(i, k - _clPos[j] + 1); |
83 | | - } |
84 | | - while(h < alen && aix[h] < high) { |
85 | | - h++; |
86 | | - } |
| 61 | + if(in.isInSparseFormat()) |
| 62 | + decodeSparse(in, out, rl, ru); |
| 63 | + else |
| 64 | + decodeDense(in, out, rl, ru); |
| 65 | + } |
| 66 | + |
| 67 | + private void decodeDense(MatrixBlock in, FrameBlock out, int rl, int ru) { |
| 68 | + for(int i = rl; i < ru; i++) |
| 69 | + for(int j = 0; j < _colList.length; j++) |
| 70 | + for(int k = _clPos[j]; k < _cuPos[j]; k++) |
| 71 | + if(in.get(i, k - 1) != 0) { |
| 72 | + int col = _colList[j] - 1; |
| 73 | + out.getColumn(col).set(i, k - _clPos[j] + 1); |
| 74 | + // if the non zero is found, we can skip the rest of k. |
| 75 | + continue; |
87 | 76 | } |
| 77 | + } |
| 78 | + |
| 79 | + private void decodeSparse(MatrixBlock in, FrameBlock out, int rl, int ru) { |
| 80 | + final SparseBlock sb = in.getSparseBlock(); |
| 81 | + for(int i = rl; i < ru; i++) { |
| 82 | + decodeSparseRow(out, sb, i); |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + private void decodeSparseRow(FrameBlock out, final SparseBlock sb, int i) { |
| 87 | + if(!sb.isEmpty(i)) { |
| 88 | + final int apos = sb.pos(i); |
| 89 | + final int alen = sb.size(i) + apos; |
| 90 | + final int[] aix = sb.indexes(i); |
| 91 | + // double[] val = sb.values(i); always 1... therefore not needed |
| 92 | + int h = 0; |
| 93 | + for(int j = 0; j < _colList.length && h < alen; j++) { // for each decode column. |
| 94 | + // find k, the index in aix, within the range of low and high |
| 95 | + int low = _clPos[j]; |
| 96 | + int high = _cuPos[j]; |
| 97 | + while(h < alen && aix[h] < low) { |
| 98 | + h++; |
| 99 | + } |
| 100 | + if(h < alen && aix[h] >= low && aix[h] < high) { |
| 101 | + int k = aix[h]; |
| 102 | + int col = _colList[j] - 1; |
| 103 | + out.getColumn(col).set(i, k - _clPos[j] + 1); |
| 104 | + h++; |
| 105 | + } |
| 106 | + while(h < alen && aix[h] < high) { |
| 107 | + h++; |
88 | 108 | } |
89 | 109 | } |
90 | 110 | } |
91 | | - else { |
92 | | - for(int i = rl; i < ru; i++) |
93 | | - for(int j = 0; j < _colList.length; j++) |
94 | | - for(int k = _clPos[j]; k < _cuPos[j]; k++) |
95 | | - if(in.get(i, k - 1) != 0) { |
96 | | - int col = _colList[j] - 1; |
97 | | - out.getColumn(col).set(i, k - _clPos[j] + 1); |
98 | | - // if the non zero is found, we can skip the rest of k. |
99 | | - continue; |
100 | | - } |
101 | | - } |
102 | 111 | } |
103 | 112 |
|
104 | 113 | @Override |
105 | 114 | public Decoder subRangeDecoder(int colStart, int colEnd, int dummycodedOffset) { |
106 | 115 | List<Integer> dcList = new ArrayList<>(); |
107 | 116 | List<Integer> clPosList = new ArrayList<>(); |
108 | 117 | List<Integer> cuPosList = new ArrayList<>(); |
109 | | - |
| 118 | + |
110 | 119 | // get the column IDs for the sub range of the dummycode columns and their destination positions, |
111 | 120 | // where they will be decoded to |
112 | | - for( int j=0; j<_colList.length; j++ ) { |
| 121 | + for(int j = 0; j < _colList.length; j++) { |
113 | 122 | int colID = _colList[j]; |
114 | | - if (colID >= colStart && colID < colEnd) { |
| 123 | + if(colID >= colStart && colID < colEnd) { |
115 | 124 | dcList.add(colID - (colStart - 1)); |
116 | 125 | clPosList.add(_clPos[j] - dummycodedOffset); |
117 | 126 | cuPosList.add(_cuPos[j] - dummycodedOffset); |
118 | 127 | } |
119 | 128 | } |
120 | | - if (dcList.isEmpty()) |
| 129 | + if(dcList.isEmpty()) |
121 | 130 | return null; |
122 | 131 | // create sub-range decoder |
123 | 132 | int[] colList = dcList.stream().mapToInt(i -> i).toArray(); |
124 | | - DecoderDummycode subRangeDecoder = new DecoderDummycode( |
125 | | - Arrays.copyOfRange(_schema, colStart - 1, colEnd - 1), colList); |
| 133 | + DecoderDummycode subRangeDecoder = new DecoderDummycode(Arrays.copyOfRange(_schema, colStart - 1, colEnd - 1), |
| 134 | + colList); |
126 | 135 | subRangeDecoder._clPos = clPosList.stream().mapToInt(i -> i).toArray(); |
127 | 136 | subRangeDecoder._cuPos = cuPosList.stream().mapToInt(i -> i).toArray(); |
128 | 137 | return subRangeDecoder; |
129 | 138 | } |
130 | | - |
| 139 | + |
131 | 140 | @Override |
132 | 141 | public void updateIndexRanges(long[] beginDims, long[] endDims) { |
133 | 142 | if(_colList == null) |
134 | 143 | return; |
135 | | - |
| 144 | + |
136 | 145 | long lowerColDest = beginDims[1]; |
137 | 146 | long upperColDest = endDims[1]; |
138 | 147 | for(int i = 0; i < _colList.length; i++) { |
139 | 148 | long numDistinct = _cuPos[i] - _clPos[i]; |
140 | | - |
| 149 | + |
141 | 150 | if(_cuPos[i] <= beginDims[1] + 1) |
142 | 151 | if(numDistinct > 0) |
143 | 152 | lowerColDest -= numDistinct - 1; |
144 | | - |
| 153 | + |
145 | 154 | if(_cuPos[i] <= endDims[1] + 1) |
146 | 155 | if(numDistinct > 0) |
147 | 156 | upperColDest -= numDistinct - 1; |
148 | 157 | } |
149 | 158 | beginDims[1] = lowerColDest; |
150 | 159 | endDims[1] = upperColDest; |
151 | 160 | } |
152 | | - |
| 161 | + |
153 | 162 | @Override |
154 | 163 | public void initMetaData(FrameBlock meta) { |
155 | | - _clPos = new int[_colList.length]; //col lower pos |
156 | | - _cuPos = new int[_colList.length]; //col upper pos |
157 | | - for( int j=0, off=0; j<_colList.length; j++ ) { |
| 164 | + _clPos = new int[_colList.length]; // col lower pos |
| 165 | + _cuPos = new int[_colList.length]; // col upper pos |
| 166 | + for(int j = 0, off = 0; j < _colList.length; j++) { |
158 | 167 | int colID = _colList[j]; |
159 | | - ColumnMetadata d = meta.getColumnMetadata()[colID-1]; |
160 | | - String v = meta.getString(0, colID-1); |
| 168 | + ColumnMetadata d = meta.getColumnMetadata()[colID - 1]; |
| 169 | + String v = meta.getString(0, colID - 1); |
161 | 170 | int ndist; |
162 | | - if(v.length() > 1 && v.charAt(0) == '¿'){ |
| 171 | + if(v.length() > 1 && v.charAt(0) == '¿') { |
163 | 172 | ndist = UtilFunctions.parseToInt(v.substring(1)); |
164 | 173 | } |
165 | 174 | else { |
166 | | - ndist = d.isDefault() ? 0 : (int)d.getNumDistinct() ; |
| 175 | + ndist = d.isDefault() ? 0 : (int) d.getNumDistinct(); |
167 | 176 | } |
168 | 177 |
|
169 | | - ndist = ndist < -1 ? 0: ndist; // safety if all values was null. |
170 | | - |
| 178 | + ndist = ndist < -1 ? 0 : ndist; // safety if all values was null. |
| 179 | + |
171 | 180 | _clPos[j] = off + colID; |
172 | | - _cuPos[j] = _clPos[j] + ndist ; |
| 181 | + _cuPos[j] = _clPos[j] + ndist; |
173 | 182 | off += ndist - 1; |
174 | 183 | } |
175 | 184 | } |
|
0 commit comments