2121
2222import org .apache .sysds .common .Types .ValueType ;
2323import org .apache .sysds .runtime .frame .data .FrameBlock ;
24+ import org .apache .sysds .runtime .frame .data .columns .ABooleanArray ;
25+ import org .apache .sysds .runtime .frame .data .columns .Array ;
2426import org .apache .sysds .runtime .matrix .data .MatrixBlock ;
2527import org .apache .sysds .runtime .matrix .data .Pair ;
2628import org .apache .sysds .runtime .transform .TfUtils ;
@@ -36,53 +38,41 @@ public class ColumnDecoderRecode extends ColumnDecoder {
3638
3739 private static final long serialVersionUID = -3784249774608228805L ;
3840
39- private HashMap <Long , Object > _rcMaps = null ;
40- private Object [] _rcMapsDirect = null ;
41+ private HashMap <Long , Object > _rcMap = null ;
42+ private Object [] _rcMapDirect = null ;
4143 private boolean _onOut = false ;
4244
4345 public ColumnDecoderRecode () {
4446 super (null , -1 , -1 );
4547 }
4648
47- protected ColumnDecoderRecode (ValueType [] schema , boolean onOut , int [] rcCols , int offset ) {
48- super (schema , rcCols , offset );
49+ protected ColumnDecoderRecode (ValueType schema , boolean onOut , int rcCol , int offset ) {
50+ super (schema , rcCol , offset );
4951 _onOut = onOut ;
5052 }
5153
5254 @ Override
5355 public FrameBlock columnDecode (MatrixBlock in , FrameBlock out ) {
5456
55- long r1 = System .nanoTime ();
56- // TODO
5757 out .ensureAllocatedColumns (in .getNumRows ());
5858 columnDecode (in , out , 0 , in .getNumRows ());
59- long r2 = System .nanoTime ();
60- System .out .println (this .getClass () + "time: " + (r2 - r1 ) / 1e6 + " ms" );
6159 return out ;
6260 }
6361
6462 @ Override
6563 public void columnDecode (MatrixBlock in , FrameBlock out , int rl , int ru ) {
6664 // TODO
67- if ( _onOut ) { //recode on output (after dummy)
68- for ( int i =rl ; i <ru ; i ++ ) {
69- for ( int j =0 ; j <_colList .length ; j ++ ) {
70- int colID = _colList [j ];
71- double val = UtilFunctions .objectToDouble (
72- out .getSchema ()[colID -1 ], out .get (i , colID -1 ));
73- long key = UtilFunctions .toLong (val );
74- out .set (i , colID -1 , getRcMapValue (j , key ));
75- }
76- }
65+ Array <?> a = out .getColumn (_colID );
66+ if (_onOut ) {
67+ for (int i = rl ; i < ru ; i ++) {
68+ double val = UtilFunctions .objectToDouble (_schema , a .get (i ));
69+ long key = UtilFunctions .toLong (val );
70+ setArrayValue (a , i , getRcMapValue (key )); }
7771 }
78- else { //recode on input (no dummy)
79- out .ensureAllocatedColumns (in .getNumRows ());
80- for ( int i =rl ; i <ru ; i ++ ) {
81- for ( int j =0 ; j <_colList .length ; j ++ ) {
82- //double val = in.get(i, _colList[j]-1);
83- long key = UtilFunctions .toLong (in .get (i , j ));
84- out .set (i , _colList [j ]-1 , getRcMapValue (j , key ));
85- }
72+ else {
73+ for (int i = rl ; i < ru ; i ++) {
74+ long key = UtilFunctions .toLong (in .get (i , _colID ));
75+ setArrayValue (a , i , getRcMapValue (key ));
8676 }
8777 }
8878 }
@@ -117,7 +107,24 @@ public ColumnDecoder subRangeDecoder(int colStart, int colEnd, int dummycodedOff
117107 @ Override
118108 @ SuppressWarnings ("unchecked" )
119109 public void initMetaData (FrameBlock meta ) {
120-
110+ int col = _colID ; // already 0-based
111+ _rcMap = new HashMap <>();
112+ long max = 0 ;
113+ for (int i =0 ; i <meta .getNumRows (); i ++) {
114+ Object val = meta .get (i , col );
115+ if (val == null )
116+ break ;
117+ String [] tmp = ColumnEncoderRecode .splitRecodeMapEntry (val .toString ());
118+ Object obj = UtilFunctions .stringToObject (_schema , tmp [0 ]);
119+ long lval = Long .parseLong (tmp [1 ]);
120+ _rcMap .put (lval , obj );
121+ max = Math .max (max , lval );
122+ }
123+ if (max < Integer .MAX_VALUE ) {
124+ _rcMapDirect = new Object [(int )max ];
125+ for (Map .Entry <Long ,Object > e : _rcMap .entrySet ())
126+ _rcMapDirect [e .getKey ().intValue ()-1 ] = e .getValue ();
127+ }
121128 //initialize recode maps according to schema
122129 //_rcMaps = new HashMap[_colList.length];
123130 //long[] max = new long[_colList.length];
@@ -146,12 +153,29 @@ public void initMetaData(FrameBlock meta) {
146153 // }
147154 //}
148155 }
149- public Object getRcMapValue (int i , long key ) {
150- return null ;
151- //return (_rcMapsDirect != null) ?
152- // _rcMapsDirect[i][(int)key-1] : _rcMaps[i].get(key);
156+ public Object getRcMapValue (long key ) {
157+ return (_rcMapDirect != null && key > 0 && key <= _rcMapDirect .length ) ?
158+ _rcMapDirect [(int )key -1 ] : _rcMap .get (key );
153159 }
154-
160+ private void setArrayValue (Array <?> a , int index , Object val ) {
161+ if (val == null ) {
162+ if (_schema == ValueType .STRING || _schema == ValueType .CHARACTER )
163+ a .set (index , (String )null );
164+ else if (_schema == ValueType .BOOLEAN )
165+ ((ABooleanArray )a ).set (index , (Boolean )null );
166+ else
167+ a .set (index , Double .NaN );
168+ }
169+ else if (_schema .isNumeric ()) {
170+ a .set (index , UtilFunctions .objectToDouble (_schema , val ));
171+ }
172+ else if (_schema == ValueType .BOOLEAN ) {
173+ ((ABooleanArray )a ).set (index , UtilFunctions .objectToBoolean (_schema , val ));
174+ }
175+ else { // STRING or CHARACTER
176+ a .set (index , val .toString ());
177+ }
178+ }
155179 /**
156180 * Parses a line of <token, ID, count> into <token, ID> pairs, where
157181 * quoted tokens (potentially including separators) are supported.
@@ -182,6 +206,13 @@ public void writeExternal(ObjectOutput out) throws IOException {
182206 // out.writeUTF(e1.getValue().toString());
183207 // }
184208 //}
209+ super .writeExternal (out );
210+ out .writeBoolean (_onOut );
211+ out .writeInt (_rcMap .size ());
212+ for (Map .Entry <Long ,Object > e : _rcMap .entrySet ()) {
213+ out .writeLong (e .getKey ());
214+ out .writeUTF (e .getValue ().toString ());
215+ }
185216 }
186217
187218 @ Override
@@ -197,5 +228,21 @@ public void readExternal(ObjectInput in) throws IOException {
197228 // maps.put(in.readLong(), in.readUTF());
198229 // _rcMaps[i] = maps;
199230 //}
231+ super .readExternal (in );
232+ _onOut = in .readBoolean ();
233+ int size = in .readInt ();
234+ _rcMap = new HashMap <>();
235+ long max = 0 ;
236+ for (int i = 0 ; i < size ; i ++) {
237+ long key = in .readLong ();
238+ String val = in .readUTF ();
239+ _rcMap .put (key , val );
240+ max = Math .max (max , key );
241+ }
242+ if (max < Integer .MAX_VALUE ) {
243+ _rcMapDirect = new Object [(int )max ];
244+ for (Map .Entry <Long ,Object > e : _rcMap .entrySet ())
245+ _rcMapDirect [e .getKey ().intValue ()-1 ] = e .getValue ();
246+ }
200247 }
201248}
0 commit comments