Skip to content

Commit 247a18c

Browse files
committed
fix ?
1 parent 99cfe32 commit 247a18c

File tree

7 files changed

+84
-31
lines changed

7 files changed

+84
-31
lines changed

src/main/java/org/apache/sysds/runtime/frame/data/lib/MatrixBlockFromFrame.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class MatrixBlockFromFrame {
3737

3838
public static final int blocksizeIJ = 32;
3939

40-
public static boolean WARNED_FOR_FAILED_CAST = false;
40+
public static Boolean WARNED_FOR_FAILED_CAST = false;
4141

4242
private MatrixBlockFromFrame(){
4343
// private constructor for code coverage.
@@ -108,12 +108,13 @@ private static long convert(FrameBlock frame, MatrixBlock mb, int n, int rl, int
108108
return convertGeneric(frame, mb, n, rl, ru);
109109
}
110110
catch(NumberFormatException | DMLRuntimeException e) {
111-
if(!WARNED_FOR_FAILED_CAST) {
112-
113-
LOG.error(
114-
"Failed to convert to Matrix because of number format errors, falling back to NaN on incompatible cells",
115-
e);
116-
WARNED_FOR_FAILED_CAST = true;
111+
synchronized(WARNED_FOR_FAILED_CAST){
112+
if(!WARNED_FOR_FAILED_CAST) {
113+
LOG.error(
114+
"Failed to convert to Matrix because of number format errors, falling back to NaN on incompatible cells",
115+
e);
116+
WARNED_FOR_FAILED_CAST = true;
117+
}
117118
}
118119
return convertSafeCast(frame, mb, n, rl, ru);
119120

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderBin.java

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.sysds.runtime.DMLRuntimeException;
2929
import org.apache.sysds.runtime.frame.data.FrameBlock;
3030
import org.apache.sysds.runtime.frame.data.columns.Array;
31+
import org.apache.sysds.runtime.frame.data.columns.ColumnMetadata;
3132
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
3233
import org.apache.sysds.runtime.util.UtilFunctions;
3334

@@ -43,15 +44,18 @@ public class DecoderBin extends Decoder {
4344

4445
// a) column bin boundaries
4546
private int[] _numBins;
47+
private int[] _dcCols = null;
48+
private int[] _srcCols = null;
4649
private double[][] _binMins = null;
4750
private double[][] _binMaxs = null;
4851

49-
public DecoderBin() {
50-
super(null, null);
51-
}
52+
// public DecoderBin() {
53+
// super(null, null);
54+
// }
5255

53-
protected DecoderBin(ValueType[] schema, int[] binCols) {
56+
protected DecoderBin(ValueType[] schema, int[] binCols, int[] dcCols) {
5457
super(schema, binCols);
58+
_dcCols = dcCols;
5559
}
5660

5761
@Override
@@ -66,7 +70,7 @@ public void decode(MatrixBlock in, FrameBlock out, int rl, int ru) {
6670
for( int i=rl; i< ru; i++ ) {
6771
for( int j=0; j<_colList.length; j++ ) {
6872
final Array<?> a = out.getColumn(_colList[j] - 1);
69-
final double val = in.get(i, _colList[j] - 1);
73+
final double val = in.get(i, _srcCols[j] - 1);
7074
if(!Double.isNaN(val)){
7175
try{
7276

@@ -125,6 +129,34 @@ public void initMetaData(FrameBlock meta) {
125129
_binMaxs[j][i] = Double.parseDouble(parts[1]);
126130
}
127131
}
132+
133+
134+
if( _dcCols.length > 0 ) {
135+
//prepare source column id mapping w/ dummy coding
136+
_srcCols = new int[_colList.length];
137+
int ix1 = 0, ix2 = 0, off = 0;
138+
while( ix1<_colList.length ) {
139+
if( ix2>=_dcCols.length || _colList[ix1] < _dcCols[ix2] ) {
140+
_srcCols[ix1] = _colList[ix1] + off;
141+
ix1 ++;
142+
}
143+
else { //_colList[ix1] > _dcCols[ix2]
144+
ColumnMetadata d =meta.getColumnMetadata()[_dcCols[ix2]-1];
145+
String v = meta.getString( _dcCols[ix2]-1, 0);
146+
if(v.charAt(0) == '¿'){
147+
off += UtilFunctions.parseToLong(v.substring(1));
148+
}
149+
else {
150+
off += d.isDefault() ? -1 : d.getNumDistinct() - 1;
151+
}
152+
ix2 ++;
153+
}
154+
}
155+
}
156+
else {
157+
//prepare direct source column mapping
158+
_srcCols = _colList;
159+
}
128160
}
129161

130162
@Override

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderComposite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ protected DecoderComposite(ValueType[] schema, List<Decoder> decoders) {
5050
_decoders = decoders;
5151
}
5252

53-
public DecoderComposite() { super(null, null); }
53+
// public DecoderComposite() { super(null, null); }
5454

5555
@Override
5656
public FrameBlock decode(MatrixBlock in, FrameBlock out) {

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderFactory.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,29 @@ public static Decoder createDecoder(String spec, String[] colnames, ValueType[]
6464
try {
6565
//parse transform specification
6666
JSONObject jSpec = new JSONObject(spec);
67-
List<Decoder> ldecoders = new ArrayList<>();
6867

69-
//create decoders 'bin', 'recode', 'dummy' and 'pass-through'
68+
//create decoders 'bin', 'recode', 'hash', 'dummy', and 'pass-through'
7069
List<Integer> binIDs = TfMetaUtils.parseBinningColIDs(jSpec, colnames, minCol, maxCol);
7170
List<Integer> rcIDs = Arrays.asList(ArrayUtils.toObject(
7271
TfMetaUtils.parseJsonIDList(jSpec, colnames, TfMethod.RECODE.toString(), minCol, maxCol)));
7372
List<Integer> hcIDs = Arrays.asList(ArrayUtils.toObject(
7473
TfMetaUtils.parseJsonIDList(jSpec, colnames, TfMethod.HASH.toString(), minCol, maxCol)));
7574
List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
7675
TfMetaUtils.parseJsonIDList(jSpec, colnames, TfMethod.DUMMYCODE.toString(), minCol, maxCol)));
76+
// only specially treat the columns with both recode and dictionary
7777
rcIDs = unionDistinct(rcIDs, dcIDs);
78+
// remove hash recoded. // todo potentially wrong and remove?
7879
rcIDs = except(rcIDs, hcIDs);
80+
7981
int len = dcIDs.isEmpty() ? Math.min(meta.getNumColumns(), clen) : meta.getNumColumns();
80-
List<Integer> ptIDs = except(except(UtilFunctions.getSeqList(1, len, 1), rcIDs), binIDs);
82+
83+
// set the remaining columns to passthrough.
84+
List<Integer> ptIDs = UtilFunctions.getSeqList(1, len, 1);
85+
// except recoded columns
86+
ptIDs = except(ptIDs, rcIDs);
87+
// binned columns
88+
ptIDs = except(ptIDs, binIDs);
89+
// hashed columns
8190
ptIDs = except(ptIDs, hcIDs); // remove hashed columns
8291

8392
//create default schema if unspecified (with double columns for pass-through)
@@ -86,10 +95,14 @@ public static Decoder createDecoder(String spec, String[] colnames, ValueType[]
8695
for( Integer col : ptIDs )
8796
schema[col-1] = ValueType.FP64;
8897
}
98+
99+
// collect all the decoders in one list.
100+
List<Decoder> ldecoders = new ArrayList<>();
89101

90102
if( !binIDs.isEmpty() ) {
91103
ldecoders.add(new DecoderBin(schema,
92-
ArrayUtils.toPrimitive(binIDs.toArray(new Integer[0]))));
104+
ArrayUtils.toPrimitive(binIDs.toArray(new Integer[0])),
105+
ArrayUtils.toPrimitive(dcIDs.toArray(new Integer[0]))));
93106
}
94107
if( !dcIDs.isEmpty() ) {
95108
ldecoders.add(new DecoderDummycode(schema,

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderPassThrough.java

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ protected DecoderPassThrough(ValueType[] schema, int[] ptCols, int[] dcCols) {
4949
_dcCols = dcCols;
5050
}
5151

52-
public DecoderPassThrough() { super(null, null); }
52+
// public DecoderPassThrough() { super(null, null); }
5353

5454
@Override
5555
public FrameBlock decode(MatrixBlock in, FrameBlock out) {
@@ -61,13 +61,12 @@ public FrameBlock decode(MatrixBlock in, FrameBlock out) {
6161
@Override
6262
public void decode(MatrixBlock in, FrameBlock out, int rl, int ru) {
6363
int clen = Math.min(_colList.length, out.getNumColumns());
64-
for( int i=rl; i<ru; i++ ) {
65-
for( int j=0; j<clen; j++ ) {
66-
int srcColID = _srcCols[j];
67-
int tgtColID = _colList[j];
68-
double val = in.get(i, srcColID-1);
69-
out.set(i, tgtColID-1,
70-
UtilFunctions.doubleToObject(_schema[tgtColID-1], val));
64+
for(int i = rl; i < ru; i++) {
65+
for(int j = 0; j < clen; j++) {
66+
int srcColID = _srcCols[j] - 1;
67+
int tgtColID = _colList[j] - 1;
68+
double val = in.get(i, srcColID);
69+
out.getColumn(tgtColID).set(i, val);
7170
}
7271
}
7372
}
@@ -114,7 +113,13 @@ public void initMetaData(FrameBlock meta) {
114113
}
115114
else { //_colList[ix1] > _dcCols[ix2]
116115
ColumnMetadata d =meta.getColumnMetadata()[_dcCols[ix2]-1];
117-
off += d.isDefault() ? -1 : d.getNumDistinct() - 1;
116+
String v = meta.getString( _dcCols[ix2]-1, 0);
117+
if(v.charAt(0) == '¿'){
118+
off += UtilFunctions.parseToLong(v.substring(1));
119+
}
120+
else {
121+
off += d.isDefault() ? -1 : d.getNumDistinct() - 1;
122+
}
118123
ix2 ++;
119124
}
120125
}

src/main/java/org/apache/sysds/runtime/transform/decode/DecoderRecode.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ public class DecoderRecode extends Decoder
5050
// private Object[][] _rcMapsDirect = null;
5151
private boolean _onOut = false;
5252

53-
public DecoderRecode() {
54-
super(null, null);
55-
}
53+
// public DecoderRecode() {
54+
// super(null, null);
55+
// }
5656

5757
protected DecoderRecode(ValueType[] schema, boolean onOut, int[] rcCols) {
5858
super(schema, rcCols);

src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderFeatureHash.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,15 +146,17 @@ public FrameBlock getMetaData(FrameBlock meta) {
146146
return meta;
147147

148148
meta.ensureAllocatedColumns(1);
149-
meta.set(0, _colID - 1, String.valueOf(_K));
149+
// set metadata of hash columns to magical hash value + k
150+
meta.set(0, _colID - 1, String.format("¿%d" , _K));
151+
150152
return meta;
151153
}
152154

153155
@Override
154156
public void initMetaData(FrameBlock meta) {
155157
if(meta == null || meta.getNumRows() <= 0)
156158
return;
157-
_K = UtilFunctions.parseToLong(meta.get(0, _colID - 1).toString());
159+
_K = UtilFunctions.parseToLong(meta.getString(0, _colID - 1).substring(1));
158160
}
159161

160162
@Override

0 commit comments

Comments
 (0)