Skip to content

Commit fb8d04a

Browse files
committed
decode
1 parent f1020fe commit fb8d04a

File tree

1 file changed

+74
-65
lines changed

1 file changed

+74
-65
lines changed

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderDummycode.java

Lines changed: 74 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -34,142 +34,151 @@
3434
import org.apache.sysds.runtime.util.UtilFunctions;
3535

3636
/**
37-
* Simple atomic decoder for dummycoded columns. This decoder builds internally
38-
* inverted column mappings from the given frame meta data.
39-
*
37+
* Simple atomic decoder for dummycoded columns. This decoder builds internally inverted column mappings from the given
38+
* frame meta data.
39+
*
4040
*/
41-
public class DecoderDummycode extends Decoder
42-
{
41+
public class DecoderDummycode extends Decoder {
4342
private static final long serialVersionUID = 4758831042891032129L;
44-
43+
4544
private int[] _clPos = null;
4645
private int[] _cuPos = null;
47-
46+
4847
protected DecoderDummycode(ValueType[] schema, int[] dcCols) {
49-
//dcCols refers to column IDs in output (non-dc)
48+
// dcCols refers to column IDs in output (non-dc)
5049
super(schema, dcCols);
5150
}
5251

5352
@Override
5453
public FrameBlock decode(MatrixBlock in, FrameBlock out) {
55-
//TODO perf (exploit sparse representation for better asymptotic behavior)
5654
out.ensureAllocatedColumns(in.getNumRows());
5755
decode(in, out, 0, in.getNumRows());
5856
return out;
5957
}
6058

6159
@Override
6260
public void decode(MatrixBlock in, FrameBlock out, int rl, int ru) {
63-
if(in.isInSparseFormat()) {
64-
SparseBlock sb = in.getSparseBlock();
65-
for(int i = rl; i < ru; i++) {
66-
if(!sb.isEmpty(i)) {
67-
int apos = sb.pos(i);
68-
int alen = sb.size(i) + apos;
69-
int[] aix = sb.indexes(i);
70-
// double[] val = sb.values(i); always 1... therefore not needed
71-
int h = 0;
72-
for(int j = 0; j < _colList.length && h < alen; j++) {
73-
// find k, the index in aix, within the range of low and high
74-
int low = _clPos[j];
75-
int high = _cuPos[j];
76-
while(h < alen && aix[h] < low) {
77-
h++;
78-
}
79-
if(h < alen && aix[h] >= low && aix[h] < high) {
80-
int k = aix[h];
81-
int col = _colList[j] - 1;
82-
out.getColumn(col).set(i, k - _clPos[j] + 1);
83-
}
84-
while(h < alen && aix[h] < high) {
85-
h++;
86-
}
61+
if(in.isInSparseFormat())
62+
decodeSparse(in, out, rl, ru);
63+
else
64+
decodeDense(in, out, rl, ru);
65+
}
66+
67+
private void decodeDense(MatrixBlock in, FrameBlock out, int rl, int ru) {
68+
for(int i = rl; i < ru; i++)
69+
for(int j = 0; j < _colList.length; j++)
70+
for(int k = _clPos[j]; k < _cuPos[j]; k++)
71+
if(in.get(i, k - 1) != 0) {
72+
int col = _colList[j] - 1;
73+
out.getColumn(col).set(i, k - _clPos[j] + 1);
74+
// if the non zero is found, we can skip the rest of k.
75+
continue;
8776
}
77+
}
78+
79+
private void decodeSparse(MatrixBlock in, FrameBlock out, int rl, int ru) {
80+
final SparseBlock sb = in.getSparseBlock();
81+
for(int i = rl; i < ru; i++) {
82+
decodeSparseRow(out, sb, i);
83+
}
84+
}
85+
86+
private void decodeSparseRow(FrameBlock out, final SparseBlock sb, int i) {
87+
if(!sb.isEmpty(i)) {
88+
final int apos = sb.pos(i);
89+
final int alen = sb.size(i) + apos;
90+
final int[] aix = sb.indexes(i);
91+
// double[] val = sb.values(i); always 1... therefore not needed
92+
int h = 0;
93+
for(int j = 0; j < _colList.length && h < alen; j++) { // for each decode column.
94+
// find k, the index in aix, within the range of low and high
95+
int low = _clPos[j];
96+
int high = _cuPos[j];
97+
while(h < alen && aix[h] < low) {
98+
h++;
99+
}
100+
if(h < alen && aix[h] >= low && aix[h] < high) {
101+
int k = aix[h];
102+
int col = _colList[j] - 1;
103+
out.getColumn(col).set(i, k - _clPos[j] + 1);
104+
h++;
105+
}
106+
while(h < alen && aix[h] < high) {
107+
h++;
88108
}
89109
}
90110
}
91-
else {
92-
for(int i = rl; i < ru; i++)
93-
for(int j = 0; j < _colList.length; j++)
94-
for(int k = _clPos[j]; k < _cuPos[j]; k++)
95-
if(in.get(i, k - 1) != 0) {
96-
int col = _colList[j] - 1;
97-
out.getColumn(col).set(i, k - _clPos[j] + 1);
98-
// if the non zero is found, we can skip the rest of k.
99-
continue;
100-
}
101-
}
102111
}
103112

104113
@Override
105114
public Decoder subRangeDecoder(int colStart, int colEnd, int dummycodedOffset) {
106115
List<Integer> dcList = new ArrayList<>();
107116
List<Integer> clPosList = new ArrayList<>();
108117
List<Integer> cuPosList = new ArrayList<>();
109-
118+
110119
// get the column IDs for the sub range of the dummycode columns and their destination positions,
111120
// where they will be decoded to
112-
for( int j=0; j<_colList.length; j++ ) {
121+
for(int j = 0; j < _colList.length; j++) {
113122
int colID = _colList[j];
114-
if (colID >= colStart && colID < colEnd) {
123+
if(colID >= colStart && colID < colEnd) {
115124
dcList.add(colID - (colStart - 1));
116125
clPosList.add(_clPos[j] - dummycodedOffset);
117126
cuPosList.add(_cuPos[j] - dummycodedOffset);
118127
}
119128
}
120-
if (dcList.isEmpty())
129+
if(dcList.isEmpty())
121130
return null;
122131
// create sub-range decoder
123132
int[] colList = dcList.stream().mapToInt(i -> i).toArray();
124-
DecoderDummycode subRangeDecoder = new DecoderDummycode(
125-
Arrays.copyOfRange(_schema, colStart - 1, colEnd - 1), colList);
133+
DecoderDummycode subRangeDecoder = new DecoderDummycode(Arrays.copyOfRange(_schema, colStart - 1, colEnd - 1),
134+
colList);
126135
subRangeDecoder._clPos = clPosList.stream().mapToInt(i -> i).toArray();
127136
subRangeDecoder._cuPos = cuPosList.stream().mapToInt(i -> i).toArray();
128137
return subRangeDecoder;
129138
}
130-
139+
131140
@Override
132141
public void updateIndexRanges(long[] beginDims, long[] endDims) {
133142
if(_colList == null)
134143
return;
135-
144+
136145
long lowerColDest = beginDims[1];
137146
long upperColDest = endDims[1];
138147
for(int i = 0; i < _colList.length; i++) {
139148
long numDistinct = _cuPos[i] - _clPos[i];
140-
149+
141150
if(_cuPos[i] <= beginDims[1] + 1)
142151
if(numDistinct > 0)
143152
lowerColDest -= numDistinct - 1;
144-
153+
145154
if(_cuPos[i] <= endDims[1] + 1)
146155
if(numDistinct > 0)
147156
upperColDest -= numDistinct - 1;
148157
}
149158
beginDims[1] = lowerColDest;
150159
endDims[1] = upperColDest;
151160
}
152-
161+
153162
@Override
154163
public void initMetaData(FrameBlock meta) {
155-
_clPos = new int[_colList.length]; //col lower pos
156-
_cuPos = new int[_colList.length]; //col upper pos
157-
for( int j=0, off=0; j<_colList.length; j++ ) {
164+
_clPos = new int[_colList.length]; // col lower pos
165+
_cuPos = new int[_colList.length]; // col upper pos
166+
for(int j = 0, off = 0; j < _colList.length; j++) {
158167
int colID = _colList[j];
159-
ColumnMetadata d = meta.getColumnMetadata()[colID-1];
160-
String v = meta.getString(0, colID-1);
168+
ColumnMetadata d = meta.getColumnMetadata()[colID - 1];
169+
String v = meta.getString(0, colID - 1);
161170
int ndist;
162-
if(v.length() > 1 && v.charAt(0) == '¿'){
171+
if(v.length() > 1 && v.charAt(0) == '¿') {
163172
ndist = UtilFunctions.parseToInt(v.substring(1));
164173
}
165174
else {
166-
ndist = d.isDefault() ? 0 : (int)d.getNumDistinct() ;
175+
ndist = d.isDefault() ? 0 : (int) d.getNumDistinct();
167176
}
168177

169-
ndist = ndist < -1 ? 0: ndist; // safety if all values was null.
170-
178+
ndist = ndist < -1 ? 0 : ndist; // safety if all values was null.
179+
171180
_clPos[j] = off + colID;
172-
_cuPos[j] = _clPos[j] + ndist ;
181+
_cuPos[j] = _clPos[j] + ndist;
173182
off += ndist - 1;
174183
}
175184
}

0 commit comments

Comments
 (0)