Skip to content

Commit 6dfa925

Browse files
committed
Huge refactor: Changed input structure for decoders. Now working properly on Bin and Pass-through.
1 parent ec7fe78 commit 6dfa925

File tree

8 files changed

+408
-344
lines changed

8 files changed

+408
-344
lines changed

src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoder.java

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,38 @@ public abstract class ColumnDecoder implements Externalizable {
3636
protected static final Log LOG = LogFactory.getLog(Decoder.class.getName());
3737
private static final long serialVersionUID = -1732411001366177787L;
3838

39-
protected ValueType[] _schema;
39+
protected ValueType _schema;
40+
protected int _colID;
41+
protected ValueType[] _multiSchema;
4042
protected int[] _colList;
4143
protected String[] _colnames = null;
42-
protected ColumnDecoder(ValueType[] schema, int[] colList) {
44+
protected int _offset;
45+
46+
protected ColumnDecoder(ValueType schema, int colID, int offset) {
4347
_schema = schema;
44-
_colList = colList;
48+
_colID = colID;
49+
_offset = offset;
4550
}
4651

47-
public ValueType[] getSchema() {
52+
protected ColumnDecoder(ValueType[] multiSchema, int[] colList, int offset) {
53+
_multiSchema = multiSchema;
54+
_colList = colList;
55+
_offset = offset;
56+
}
57+
public ValueType getSchema() {
4858
return _schema;
4959
}
5060

61+
public ValueType[] getMultiSchema() {
62+
return _multiSchema;
63+
}
64+
65+
public int getColID() {
66+
return _colID;
67+
}
68+
69+
public int[] getColList() {return _colList;}
70+
5171
public void setColnames(String[] colnames) {
5272
_colnames = colnames;
5373
}
@@ -56,7 +76,6 @@ public String[] getColnames() {
5676
return _colnames;
5777
}
5878

59-
public int[] getColList() {return _colList;}
6079
/**
6180
* Block decode API converting a matrix block into a frame block.
6281
*
@@ -135,10 +154,10 @@ public void writeExternal(ObjectOutput os)
135154
for(int j = 0; j < size2; j++)
136155
os.writeUTF(_colnames[j]);
137156

138-
int size3 = (_schema == null) ? 0 : _schema.length;
139-
os.writeInt(size3);
140-
for(int j = 0; j < size3; j++)
141-
os.writeByte(_schema[j].ordinal());
157+
//int size3 = (_schema == null) ? 0 : _schema.length;
158+
//os.writeInt(size3);
159+
//for(int j = 0; j < size3; j++)
160+
// os.writeByte(_schema[j].ordinal());
142161
}
143162

144163
/**
@@ -163,10 +182,10 @@ public void readExternal(ObjectInput in)
163182
_colnames[j] = in.readUTF();
164183
}
165184

166-
int size3 = in.readInt();
167-
_schema = (size3 == 0) ? null : new ValueType[size3];
168-
for(int j = 0; j < size3; j++) {
169-
_schema[j] = ValueType.values()[in.readByte()];
170-
}
185+
//int size3 = in.readInt();
186+
//_schema = (size3 == 0) ? null : new ValueType[size3];
187+
//for(int j = 0; j < size3; j++) {
188+
// _schema[j] = ValueType.values()[in.readByte()];
189+
//}
171190
}
172191
}

src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderBin.java

Lines changed: 102 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,21 @@
3030
import java.io.IOException;
3131
import java.io.ObjectInput;
3232
import java.io.ObjectOutput;
33+
import java.util.Arrays;
3334

3435
public class ColumnDecoderBin extends ColumnDecoder {
3536
private static final long serialVersionUID = -3784249774608228805L;
3637

37-
private int[] _numBins;
38-
private double[][] _binMins = null;
39-
private double[][] _binMaxs = null;
38+
private int _numBins;
39+
private double[] _binMins = null;
40+
private double[] _binMaxs = null;
4041

4142
public ColumnDecoderBin() {
42-
super(null, null);
43+
super(null, -1, -1);
4344
}
4445

45-
protected ColumnDecoderBin(ValueType[] schema, int[] binCols) {
46-
super(schema, binCols);
46+
protected ColumnDecoderBin(ValueType schema, int binCols, int offset) {
47+
super(schema, binCols, offset);
4748
}
4849

4950

@@ -76,14 +77,12 @@ protected ColumnDecoderBin(ValueType[] schema, int[] binCols) {
7677
public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) {
7778
long b1 = System.nanoTime();
7879
out.ensureAllocatedColumns(in.getNumRows());
79-
80-
final int outColIndex = _colList[0] - 1;
81-
final double[] binMins = _binMins[0];
82-
final double[] binMaxs = _binMaxs[0];
80+
final double[] binMins = _binMins;
81+
final double[] binMaxs = _binMaxs;
8382
final int nRows = in.getNumRows();
84-
Array<?> a = out.getColumn(0);
83+
Array<?> a = out.getColumn(_colID);
8584
for (int i = 0; i < nRows; i++) {
86-
double val = in.get(i, 0);
85+
double val = in.get(i, _colID);
8786
double decoded;
8887
if (!Double.isNaN(val)) {
8988
int key = (int) Math.round(val);
@@ -104,20 +103,20 @@ public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) {
104103

105104
@Override
106105
public void columnDecode(MatrixBlock in, FrameBlock out, int rl, int ru) {
107-
for (int i = rl; i < ru; i++) {
108-
for (int j = 0; j < _colList.length; j++) {
109-
double val = in.get(i, j);
110-
if (!Double.isNaN(val)) {
111-
int key = (int) Math.round(val);
112-
double bmin = _binMins[j][key - 1];
113-
double bmax = _binMaxs[j][key - 1];
114-
double oval = bmin + (bmax - bmin) / 2 + (val - key) * (bmax - bmin);
115-
out.getColumn(_colList[j] - 1).set(i, oval);
116-
} else {
117-
out.getColumn(_colList[j] - 1).set(i, val);
118-
}
119-
}
120-
}
106+
//for (int i = rl; i < ru; i++) {
107+
// for (int j = 0; j < _colList.length; j++) {
108+
// double val = in.get(i, j);
109+
// if (!Double.isNaN(val)) {
110+
// int key = (int) Math.round(val);
111+
// double bmin = _binMins[j][key - 1];
112+
// double bmax = _binMaxs[j][key - 1];
113+
// double oval = bmin + (bmax - bmin) / 2 + (val - key) * (bmax - bmin);
114+
// out.getColumn(_colList[j] - 1).set(i, oval);
115+
// } else {
116+
// out.getColumn(_colList[j] - 1).set(i, val);
117+
// }
118+
// }
119+
//}
121120
}
122121

123122
//@Override
@@ -140,75 +139,96 @@ public void columnDecode(MatrixBlock in, FrameBlock out, int rl, int ru) {
140139

141140
@Override
142141
public ColumnDecoder subRangeDecoder(int colStart, int colEnd, int dummycodedOffset) {
143-
144-
for (int i = 0; i < _colList.length; i++) {
145-
long b1 = System.nanoTime();
146-
ValueType[] schema = (_schema != null) ? new ValueType[]{_schema[colStart - 1]} : null;
147-
if (_colList[i] == colStart) {
148-
ColumnDecoderBin sub = new ColumnDecoderBin(schema, new int[]{colStart});
149-
sub._numBins = new int[]{_numBins[i]};
150-
sub._binMins = new double[][]{_binMins[i]};
151-
sub._binMaxs = new double[][]{_binMaxs[i]};
152-
return sub;
153-
}
154-
long b2 = System.nanoTime();
155-
System.out.println("time: " + (b2 - b1) / 1e6 + " ms");
156-
}
157142
return null;
143+
//for (int i = 0; i < _colList.length; i++) {
144+
// long b1 = System.nanoTime();
145+
// ValueType[] schema = (_schema != null) ? new ValueType[]{_schema[colStart - 1]} : null;
146+
// if (_colList[i] == colStart) {
147+
// ColumnDecoderBin sub = new ColumnDecoderBin(schema, new int[]{colStart});
148+
// sub._numBins = new int[]{_numBins[i]};
149+
// sub._binMins = new double[][]{_binMins[i]};
150+
// sub._binMaxs = new double[][]{_binMaxs[i]};
151+
// return sub;
152+
// }
153+
// long b2 = System.nanoTime();
154+
// System.out.println("time: " + (b2 - b1) / 1e6 + " ms");
155+
//}
156+
//return null;
158157
}
159-
160158
@Override
161159
public void initMetaData(FrameBlock meta) {
162-
//initialize bin boundaries
163-
_numBins = new int[_colList.length];
164-
_binMins = new double[_colList.length][];
165-
_binMaxs = new double[_colList.length][];
166-
167-
//parse and insert bin boundaries
168-
for( int j=0; j<_colList.length; j++ ) {
169-
int numBins = (int)meta.getColumnMetadata(_colList[j]-1).getNumDistinct();
170-
_binMins[j] = new double[numBins];
171-
_binMaxs[j] = new double[numBins];
172-
for( int i=0; i<meta.getNumRows() & i<numBins; i++ ) {
173-
if( meta.get(i, _colList[j]-1)==null ) {
174-
if( i+1 < numBins )
175-
throw new DMLRuntimeException("Did not reach number of bins: "+(i+1)+"/"+numBins);
176-
break; //reached end of bins
177-
}
178-
String[] parts = UtilFunctions.splitRecodeEntry(
179-
meta.get(i, _colList[j]-1).toString());
180-
_binMins[j][i] = Double.parseDouble(parts[0]);
181-
_binMaxs[j][i] = Double.parseDouble(parts[1]);
160+
int col = _colID; // already 0-based
161+
162+
int numBins = (int) meta.getColumnMetadata(col).getNumDistinct();
163+
_binMins = new double[numBins];
164+
_binMaxs = new double[numBins];
165+
166+
for (int i = 0; i < meta.getNumRows() && i < numBins; i++) {
167+
Object val = meta.get(i, col);
168+
if (val == null) {
169+
if (i + 1 < numBins)
170+
throw new DMLRuntimeException("Did not reach number of bins: " + (i + 1) + "/" + numBins);
171+
break;
182172
}
173+
174+
String[] parts = UtilFunctions.splitRecodeEntry(val.toString());
175+
_binMins[i] = Double.parseDouble(parts[0]);
176+
_binMaxs[i] = Double.parseDouble(parts[1]);
183177
}
184178
}
179+
//@Override
180+
//public void initMetaData(FrameBlock meta) {
181+
// System.out.println("11");
182+
// //initialize bin boundaries
183+
// _numBins = new int[_colList.length];
184+
// _binMins = new double[_colList.length][];
185+
// _binMaxs = new double[_colList.length][];
186+
//
187+
// //parse and insert bin boundaries
188+
// for( int j=0; j<_colList.length; j++ ) {
189+
// int numBins = (int)meta.getColumnMetadata(_colList[j]-1).getNumDistinct();
190+
// _binMins[j] = new double[numBins];
191+
// _binMaxs[j] = new double[numBins];
192+
// for( int i=0; i<meta.getNumRows() & i<numBins; i++ ) {
193+
// if( meta.get(i, _colList[j]-1)==null ) {
194+
// if( i+1 < numBins )
195+
// throw new DMLRuntimeException("Did not reach number of bins: "+(i+1)+"/"+numBins);
196+
// break; //reached end of bins
197+
// }
198+
// String[] parts = UtilFunctions.splitRecodeEntry(
199+
// meta.get(i, _colList[j]-1).toString());
200+
// _binMins[j][i] = Double.parseDouble(parts[0]);
201+
// _binMaxs[j][i] = Double.parseDouble(parts[1]);
202+
// }
203+
// }
204+
//}
185205

186206
@Override
187207
public void writeExternal(ObjectOutput out) throws IOException {
188-
super.writeExternal(out);
189-
for( int i=0; i<_colList.length; i++ ) {
190-
int len = _numBins[i];
191-
out.writeInt(len);
192-
for(int j=0; j<len; j++) {
193-
out.writeDouble(_binMins[i][j]);
194-
out.writeDouble(_binMaxs[i][j]);
195-
}
196-
}
208+
// super.writeExternal(out);
209+
// for( int i=0; i<_colList.length; i++ ) {
210+
// int len = _numBins[i];
211+
// out.writeInt(len);
212+
// for(int j=0; j<len; j++) {
213+
// out.writeDouble(_binMins[i][j]);
214+
// out.writeDouble(_binMaxs[i][j]);
215+
// }
216+
// }
197217
}
198218

199219
@Override
200220
public void readExternal(ObjectInput in) throws IOException {
201-
super.readExternal(in);
202-
_numBins = new int[_colList.length];
203-
_binMins = new double[_colList.length][];
204-
_binMaxs = new double[_colList.length][];
205-
for( int i=0; i<_colList.length; i++ ) {
206-
int len = in.readInt();
207-
_numBins[i] = len;
208-
for(int j=0; j<len; j++) {
209-
_binMins[i][j] = in.readDouble();
210-
_binMaxs[i][j] = in.readDouble();
211-
}
212-
}
221+
// super.readExternal(in);
222+
// _numBins = new int[_colList.length];
223+
// _binMins = new double[_colList.length][];
224+
// _binMaxs = new double[_colList.length][];
225+
// for( int i=0; i<_colList.length; i++ ) {
226+
// int len = in.readInt();
227+
// _numBins[i] = len;
228+
// for(int j=0; j<len; j++) {
229+
// _binMins[i][j] = in.readDouble();
230+
// _binMaxs[i][j] = in.readDouble();
231+
// }
232+
// }
213233
}
214234
}

0 commit comments

Comments
 (0)