Skip to content

Commit 809490f

Browse files
committed
[MINOR] Mapping Add A Range Setting
This commit adds a range setting function for mappings, to enable subsequent parallel setting from integer arrays. While adding the range support the commit also cleanup some edge case logic on specific map types, to fix 127 vs 128 unique values support in MapToUByte for instance. Closes #2164 Signed-off-by: Sebastian Baunsgaard <[email protected]>
1 parent aaa0192 commit 809490f

File tree

18 files changed

+1168
-312
lines changed

18 files changed

+1168
-312
lines changed

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
3131
import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary;
3232
import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary;
33+
import org.apache.sysds.runtime.compress.colgroup.dictionary.PlaceHolderDict;
3334
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
3435
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
3536
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
@@ -79,7 +80,7 @@ private ColGroupConst(IColIndex colIndices, IDictionary dict) {
7980
public static AColGroup create(IColIndex colIndices, IDictionary dict) {
8081
if(dict == null)
8182
return new ColGroupEmpty(colIndices);
82-
else if(dict.getNumberOfValues(colIndices.size()) > 1) {
83+
else if(dict.getNumberOfValues(colIndices.size()) > 1 && !(dict instanceof PlaceHolderDict)) {
8384
// extract dict first row
8485
final double[] nd = new double[colIndices.size()];
8586
for(int i = 0; i < colIndices.size(); i++)

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java

Lines changed: 91 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
import java.io.DataOutput;
2323
import java.io.IOException;
2424
import java.io.Serializable;
25-
import java.util.BitSet;
25+
import java.util.ArrayList;
26+
import java.util.List;
27+
import java.util.concurrent.ExecutorService;
28+
import java.util.concurrent.Future;
2629

2730
import org.apache.commons.lang3.NotImplementedException;
2831
import org.apache.commons.logging.Log;
@@ -95,7 +98,6 @@ public final void setUnique(int nUnique) {
9598
*/
9699
public abstract int getIndex(int n);
97100

98-
99101
/**
100102
* Shortcut method to support Integer objects, not really efficient but for the purpose of reusing code.
101103
*
@@ -116,6 +118,18 @@ public void set(int n, Integer v) {
116118
*/
117119
public abstract void set(int n, int v);
118120

121+
/**
122+
* set a range of values from another map.
123+
*
124+
* The given tm must only contain supported values, and it is not verified.
125+
*
126+
* @param l lower bound
127+
* @param u upper bound (not inclusive)
128+
* @param off offset to take values from tm
129+
* @param tm the other map to copy values from
130+
*/
131+
public abstract void set(int l, int u, int off, AMapToData tm);
132+
119133
/**
120134
* Set the index to the value and get the contained value after.
121135
*
@@ -411,8 +425,6 @@ public final int[] getCounts() {
411425
* @param nCol The number of columns
412426
*/
413427
public final void preAggregateDDC_DDC(AMapToData tm, IDictionary td, Dictionary ret, int nCol) {
414-
if(td.getNumberOfValues(nCol) != tm.nUnique)
415-
throw new DMLCompressionException("Invalid map and dict combination");
416428
if(nCol == 1)
417429
preAggregateDDC_DDCSingleCol(tm, td.getValues(), ret.getValues());
418430
else
@@ -788,9 +800,9 @@ public void preAggregateDDC_RLE(int[] ptr, char[] data, IDictionary td, Dictiona
788800
*/
789801
public void copy(AMapToData d) {
790802
if(d.nUnique == 1)
791-
return;
792-
// else if(d instanceof MapToBit)
793-
// copyBit((MapToBit) d);
803+
fill(0);
804+
else if(d instanceof MapToBit)
805+
copyBit((MapToBit) d);
794806
else if(d instanceof MapToInt)
795807
copyInt((MapToInt) d);
796808
else {
@@ -813,9 +825,18 @@ protected void copyInt(MapToInt d) {
813825
*
814826
* @param d The array to copy
815827
*/
816-
public abstract void copyInt(int[] d);
828+
public void copyInt(int[] d) {
829+
copyInt(d, 0, Math.min(d.length, size()));
830+
}
831+
832+
public abstract void copyInt(int[] d, int start, int end);
817833

818-
public abstract void copyBit(BitSet d);
834+
public void copyBit(MapToBit d) {
835+
fill(0);
836+
for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) {
837+
set(i, 1);
838+
}
839+
}
819840

820841
public int getMax() {
821842
int m = -1;
@@ -826,13 +847,6 @@ public int getMax() {
826847
return m;
827848
}
828849

829-
/**
830-
* Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values
831-
*
832-
* @return The maximum number of distinct values to encode
833-
*/
834-
public abstract int getMaxPossible();
835-
836850
/**
837851
* Reallocate the map, to a smaller instance if applicable. Note it does not change the length of the array, just the
838852
* datatype.
@@ -887,7 +901,8 @@ public int countRuns(AOffset off) {
887901

888902
@Override
889903
public boolean equals(Object e) {
890-
return e instanceof AMapToData && (this == e || this.equals((AMapToData) e));
904+
return this == e || // same object or
905+
(e instanceof AMapToData && this.equals((AMapToData) e));
891906
}
892907

893908
/**
@@ -903,7 +918,7 @@ public void verify() {
903918
if(CompressedMatrixBlock.debug) {
904919
for(int i = 0; i < size(); i++) {
905920
if(getIndex(i) >= nUnique) {
906-
throw new DMLCompressionException("invalid construction of Mapping data containing values above unique");
921+
throw new DMLCompressionException("Invalid construction of Mapping data containing values above unique");
907922
}
908923
}
909924
}
@@ -934,7 +949,7 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val
934949
decompressToRangeOff(c, rl, ru, offR, values);
935950
}
936951

937-
public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) {
952+
protected void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) {
938953
for(int i = rl, offT = rl + offR; i < ru; i++, offT++)
939954
c[offT] += values[getIndex(i)];
940955
}
@@ -950,14 +965,70 @@ protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) {
950965
c[r + 7] += values[getIndex(r + 7)];
951966
}
952967

953-
public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) {
968+
protected void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) {
954969
final int h = (ru - rl) % 8;
955970
for(int rc = rl; rc < rl + h; rc++)
956971
c[rc] += values[getIndex(rc)];
957972
for(int rc = rl + h; rc < ru; rc += 8)
958973
decompressToRangeNoOffBy8(c, rc, values);
959974
}
960975

976+
/**
977+
* Split this mapping into x smaller mappings according to round robin.
978+
*
979+
* @param multiplier The number of smaller mappings to construct
980+
* @return The list of smaller mappings
981+
*/
982+
public AMapToData[] splitReshapeDDC(final int multiplier) {
983+
984+
final int s = size();
985+
final AMapToData[] ret = new AMapToData[multiplier];
986+
final int eachSize = s / multiplier;
987+
for(int i = 0; i < multiplier; i++)
988+
ret[i] = MapToFactory.create(eachSize, getUnique());
989+
990+
final int blkz = Math.max(eachSize / 8, 2048) * multiplier;
991+
for(int i = 0; i < s; i += blkz)
992+
splitReshapeDDCBlock(ret, multiplier, i, Math.min(i + blkz, s));
993+
994+
return ret;
995+
}
996+
997+
public AMapToData[] splitReshapeDDCPushDown(final int multiplier, final ExecutorService pool) throws Exception {
998+
999+
final int s = size();
1000+
final AMapToData[] ret = new AMapToData[multiplier];
1001+
final int eachSize = s / multiplier;
1002+
for(int i = 0; i < multiplier; i++)
1003+
ret[i] = MapToFactory.create(eachSize, getUnique());
1004+
1005+
final int blkz = Math.max(eachSize / 8, 2048) * multiplier;
1006+
List<Future<?>> tasks = new ArrayList<>();
1007+
for(int i = 0; i < s; i += blkz) {
1008+
final int start = i;
1009+
final int end = Math.min(i + blkz, s);
1010+
tasks.add(pool.submit(() -> splitReshapeDDCBlock(ret, multiplier, start, end)));
1011+
}
1012+
1013+
for(Future<?> t : tasks)
1014+
t.get();
1015+
1016+
return ret;
1017+
}
1018+
1019+
private void splitReshapeDDCBlock(final AMapToData[] ret, final int multiplier, final int start, final int end) {
1020+
1021+
for(int i = start; i < end; i += multiplier)
1022+
splitReshapeDDCRow(ret, multiplier, i);
1023+
}
1024+
1025+
private void splitReshapeDDCRow(final AMapToData[] ret, final int multiplier, final int i) {
1026+
final int off = i / multiplier;
1027+
final int end = i + multiplier;
1028+
for(int j = i; j < end; j++)
1029+
ret[j % multiplier].set(off, getIndex(j));
1030+
}
1031+
9611032
@Override
9621033
public String toString() {
9631034
final int sz = size();

0 commit comments

Comments
 (0)