2222import java .io .DataOutput ;
2323import java .io .IOException ;
2424import java .io .Serializable ;
25- import java .util .BitSet ;
25+ import java .util .ArrayList ;
26+ import java .util .List ;
27+ import java .util .concurrent .ExecutorService ;
28+ import java .util .concurrent .Future ;
2629
2730import org .apache .commons .lang3 .NotImplementedException ;
2831import org .apache .commons .logging .Log ;
@@ -95,7 +98,6 @@ public final void setUnique(int nUnique) {
9598 */
9699 public abstract int getIndex (int n );
97100
98-
99101 /**
100102 * Shortcut method to support Integer objects, not really efficient but for the purpose of reusing code.
101103 *
@@ -116,6 +118,18 @@ public void set(int n, Integer v) {
116118 */
117119 public abstract void set (int n , int v );
118120
121+ /**
122+ * set a range of values from another map.
123+ *
124+ * The given tm must only contain supported values, and it is not verified.
125+ *
126+ * @param l lower bound
127+ * @param u upper bound (not inclusive)
128+ * @param off offset to take values from tm
129+ * @param tm the other map to copy values from
130+ */
131+ public abstract void set (int l , int u , int off , AMapToData tm );
132+
119133 /**
120134 * Set the index to the value and get the contained value after.
121135 *
@@ -411,8 +425,6 @@ public final int[] getCounts() {
411425 * @param nCol The number of columns
412426 */
413427 public final void preAggregateDDC_DDC (AMapToData tm , IDictionary td , Dictionary ret , int nCol ) {
414- if (td .getNumberOfValues (nCol ) != tm .nUnique )
415- throw new DMLCompressionException ("Invalid map and dict combination" );
416428 if (nCol == 1 )
417429 preAggregateDDC_DDCSingleCol (tm , td .getValues (), ret .getValues ());
418430 else
@@ -788,9 +800,9 @@ public void preAggregateDDC_RLE(int[] ptr, char[] data, IDictionary td, Dictiona
788800 */
789801 public void copy (AMapToData d ) {
790802 if (d .nUnique == 1 )
791- return ;
792- // else if(d instanceof MapToBit)
793- // copyBit((MapToBit) d);
803+ fill ( 0 ) ;
804+ else if (d instanceof MapToBit )
805+ copyBit ((MapToBit ) d );
794806 else if (d instanceof MapToInt )
795807 copyInt ((MapToInt ) d );
796808 else {
@@ -813,9 +825,18 @@ protected void copyInt(MapToInt d) {
813825 *
814826 * @param d The array to copy
815827 */
816- public abstract void copyInt (int [] d );
828+ public void copyInt (int [] d ) {
829+ copyInt (d , 0 , Math .min (d .length , size ()));
830+ }
831+
832+ public abstract void copyInt (int [] d , int start , int end );
817833
818- public abstract void copyBit (BitSet d );
834+ public void copyBit (MapToBit d ) {
835+ fill (0 );
836+ for (int i = d .nextSetBit (0 ); i >= 0 ; i = d .nextSetBit (i + 1 )) {
837+ set (i , 1 );
838+ }
839+ }
819840
820841 public int getMax () {
821842 int m = -1 ;
@@ -826,13 +847,6 @@ public int getMax() {
826847 return m ;
827848 }
828849
829- /**
830- * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values
831- *
832- * @return The maximum number of distinct values to encode
833- */
834- public abstract int getMaxPossible ();
835-
836850 /**
837851 * Reallocate the map, to a smaller instance if applicable. Note it does not change the length of the array, just the
838852 * datatype.
@@ -887,7 +901,8 @@ public int countRuns(AOffset off) {
887901
888902 @ Override
889903 public boolean equals (Object e ) {
890- return e instanceof AMapToData && (this == e || this .equals ((AMapToData ) e ));
904+ return this == e || // same object or
905+ (e instanceof AMapToData && this .equals ((AMapToData ) e ));
891906 }
892907
893908 /**
@@ -903,7 +918,7 @@ public void verify() {
903918 if (CompressedMatrixBlock .debug ) {
904919 for (int i = 0 ; i < size (); i ++) {
905920 if (getIndex (i ) >= nUnique ) {
906- throw new DMLCompressionException ("invalid construction of Mapping data containing values above unique" );
921+ throw new DMLCompressionException ("Invalid construction of Mapping data containing values above unique" );
907922 }
908923 }
909924 }
@@ -934,7 +949,7 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val
934949 decompressToRangeOff (c , rl , ru , offR , values );
935950 }
936951
937- public void decompressToRangeOff (double [] c , int rl , int ru , int offR , double [] values ) {
952+ protected void decompressToRangeOff (double [] c , int rl , int ru , int offR , double [] values ) {
938953 for (int i = rl , offT = rl + offR ; i < ru ; i ++, offT ++)
939954 c [offT ] += values [getIndex (i )];
940955 }
@@ -950,14 +965,70 @@ protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) {
950965 c [r + 7 ] += values [getIndex (r + 7 )];
951966 }
952967
953- public void decompressToRangeNoOff (double [] c , int rl , int ru , double [] values ) {
968+ protected void decompressToRangeNoOff (double [] c , int rl , int ru , double [] values ) {
954969 final int h = (ru - rl ) % 8 ;
955970 for (int rc = rl ; rc < rl + h ; rc ++)
956971 c [rc ] += values [getIndex (rc )];
957972 for (int rc = rl + h ; rc < ru ; rc += 8 )
958973 decompressToRangeNoOffBy8 (c , rc , values );
959974 }
960975
976+ /**
977+ * Split this mapping into x smaller mappings according to round robin.
978+ *
979+ * @param multiplier The number of smaller mappings to construct
980+ * @return The list of smaller mappings
981+ */
982+ public AMapToData [] splitReshapeDDC (final int multiplier ) {
983+
984+ final int s = size ();
985+ final AMapToData [] ret = new AMapToData [multiplier ];
986+ final int eachSize = s / multiplier ;
987+ for (int i = 0 ; i < multiplier ; i ++)
988+ ret [i ] = MapToFactory .create (eachSize , getUnique ());
989+
990+ final int blkz = Math .max (eachSize / 8 , 2048 ) * multiplier ;
991+ for (int i = 0 ; i < s ; i += blkz )
992+ splitReshapeDDCBlock (ret , multiplier , i , Math .min (i + blkz , s ));
993+
994+ return ret ;
995+ }
996+
997+ public AMapToData [] splitReshapeDDCPushDown (final int multiplier , final ExecutorService pool ) throws Exception {
998+
999+ final int s = size ();
1000+ final AMapToData [] ret = new AMapToData [multiplier ];
1001+ final int eachSize = s / multiplier ;
1002+ for (int i = 0 ; i < multiplier ; i ++)
1003+ ret [i ] = MapToFactory .create (eachSize , getUnique ());
1004+
1005+ final int blkz = Math .max (eachSize / 8 , 2048 ) * multiplier ;
1006+ List <Future <?>> tasks = new ArrayList <>();
1007+ for (int i = 0 ; i < s ; i += blkz ) {
1008+ final int start = i ;
1009+ final int end = Math .min (i + blkz , s );
1010+ tasks .add (pool .submit (() -> splitReshapeDDCBlock (ret , multiplier , start , end )));
1011+ }
1012+
1013+ for (Future <?> t : tasks )
1014+ t .get ();
1015+
1016+ return ret ;
1017+ }
1018+
1019+ private void splitReshapeDDCBlock (final AMapToData [] ret , final int multiplier , final int start , final int end ) {
1020+
1021+ for (int i = start ; i < end ; i += multiplier )
1022+ splitReshapeDDCRow (ret , multiplier , i );
1023+ }
1024+
1025+ private void splitReshapeDDCRow (final AMapToData [] ret , final int multiplier , final int i ) {
1026+ final int off = i / multiplier ;
1027+ final int end = i + multiplier ;
1028+ for (int j = i ; j < end ; j ++)
1029+ ret [j % multiplier ].set (off , getIndex (j ));
1030+ }
1031+
9611032 @ Override
9621033 public String toString () {
9631034 final int sz = size ();
0 commit comments