Skip to content

Commit d282ccc

Browse files
committed
mapping improved tests
1 parent e735593 commit d282ccc

File tree

14 files changed

+247
-188
lines changed

14 files changed

+247
-188
lines changed

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ protected void copyInt(MapToInt d) {
829829
* @param d The array to copy
830830
*/
831831
public void copyInt(int[] d) {
832-
copyInt(d, 0, size());
832+
copyInt(d, 0, Math.min(d.length, size()));
833833
}
834834

835835
public abstract void copyInt(int[] d, int start, int end);
@@ -846,7 +846,7 @@ public int getMax() {
846846
}
847847

848848
/**
849-
* Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values
849+
* Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values therefore max is 1
850850
*
851851
* @return The maximum number of distinct values to encode
852852
*/

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ private static int longSize(int size) {
442442
}
443443

444444
public int getMaxPossible() {
445-
return 2;
445+
return 1;
446446
}
447447

448448
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ public AMapToData appendN(IMapToDataGroup[] d) {
284284

285285
@Override
286286
public int getMaxPossible() {
287-
return 256;
287+
return 255;
288288
}
289289

290290
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public class MapToChar extends AMapToData {
4646
private final char[] _data;
4747

4848
protected MapToChar(int size) {
49-
this(Character.MAX_VALUE, size);
49+
this(Character.MAX_VALUE+1, size);
5050
}
5151

5252
public MapToChar(int unique, int size) {
@@ -328,7 +328,7 @@ public AMapToData appendN(IMapToDataGroup[] d) {
328328

329329
@Override
330330
public int getMaxPossible() {
331-
return Character.MAX_VALUE;
331+
return Character.MAX_VALUE ;
332332
}
333333

334334
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ public class MapToCharPByte extends AMapToData {
3838

3939
private static final long serialVersionUID = 6315708056775476541L;
4040

41-
// 8323073
42-
public static final int max = 0xFFFF * 127;
41+
public static final int max = (0xFFFF + 1) * 128 -1;
4342
private final char[] _data_c;
4443
private final byte[] _data_b; // next byte after the char
4544

@@ -280,7 +279,7 @@ public AMapToData appendN(IMapToDataGroup[] d) {
280279

281280
@Override
282281
public int getMaxPossible() {
283-
return Character.MAX_VALUE * 256;
282+
return (Character.MAX_VALUE+1) * 256 -1;
284283
}
285284

286285
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -75,42 +75,45 @@ public static AMapToData create(int unique, IntArrayList values) {
7575
return _data;
7676
}
7777

78-
public static AMapToData create(int size, int[] values, int nUnique, int k) {
78+
public static AMapToData create(int size, int[] values, int nUnique, int k) throws Exception {
7979
AMapToData _data = create(size, nUnique);
80-
ExecutorService pool = CommonThreadPool.get(k);
80+
final ExecutorService pool = CommonThreadPool.get(k);
8181
int blk = Math.max((values.length / k), 1024);
8282
blk -= blk % 64; // ensure long size
8383
List<Future<?>> tasks = new ArrayList<>();
84-
for(int i = 0; i < values.length; i += blk){
84+
for(int i = 0; i < values.length; i += blk) {
8585
int start = i;
8686
int end = Math.min(i + blk, values.length);
8787
tasks.add(pool.submit(() -> _data.copyInt(values, start, end)));
8888
}
89+
90+
for(Future<?> t : tasks)
91+
t.get();
8992
return _data;
9093
}
9194

9295
/**
93-
* Create and allocate a map with the given size and support for upto the num tuples argument of values
96+
* Create and allocate a map with the given size and support for up to the num tuples argument of values
9497
*
95-
* @param size The number of cells to allocate
96-
* @param numTuples The maximum value to be able to represent inside the map.
98+
* @param size The number of cells to allocate
99+
* @param unique The number of unique values to support (can encode unique -1)
97100
* @return A new map
98101
*/
99-
public static AMapToData create(final int size, final int numTuples) {
100-
if(numTuples <= 1)
102+
public static AMapToData create(final int size, final int unique) {
103+
if(unique <= 1)
101104
return new MapToZero(size);
102-
else if(numTuples == 2 && size > 32)
103-
return new MapToBit(numTuples, size);
104-
else if(numTuples <= 127)
105-
return new MapToUByte(numTuples, size);
106-
else if(numTuples <= 256)
107-
return new MapToByte(numTuples, size);
108-
else if(numTuples <= Character.MAX_VALUE + 1)
109-
return new MapToChar(numTuples, size);
110-
else if(numTuples <= MapToCharPByte.max)
111-
return new MapToCharPByte(numTuples, size);
105+
else if(unique == 2 && size > 32)
106+
return new MapToBit(unique, size);
107+
else if(unique <= 128)
108+
return new MapToUByte(unique, size);
109+
else if(unique <= 256)
110+
return new MapToByte(unique, size);
111+
else if(unique <= Character.MAX_VALUE + 1)
112+
return new MapToChar(unique, size);
113+
else if(unique <= MapToCharPByte.max + 1)
114+
return new MapToCharPByte(unique, size);
112115
else
113-
return new MapToInt(numTuples, size);
116+
return new MapToInt(unique, size);
114117
}
115118

116119
/**
@@ -181,20 +184,20 @@ public static AMapToData resizeForce(AMapToData d, MAP_TYPE t) {
181184
/**
182185
* Estimate the size in memory of a MapToFactory.
183186
*
184-
* @param size The size of the mapping
185-
* @param numTuples The number of unique values to be supported by the mapping
187+
* @param size The size of the mapping
188+
* @param unique The number of unique values to support (can encode unique -1)
186189
* @return The size in number of bytes.
187190
*/
188-
public static long estimateInMemorySize(int size, int numTuples) {
189-
if(numTuples <= 1)
191+
public static long estimateInMemorySize(int size, int unique) {
192+
if(unique <= 1)
190193
return MapToZero.getInMemorySize(size);
191-
else if(numTuples == 2 && size > 32)
194+
else if(unique == 2 && size > 32)
192195
return MapToBit.getInMemorySize(size);
193-
else if(numTuples <= 256)
196+
else if(unique <= 256)
194197
return MapToByte.getInMemorySize(size);
195-
else if(numTuples <= Character.MAX_VALUE + 1)
198+
else if(unique <= Character.MAX_VALUE + 1)
196199
return MapToChar.getInMemorySize(size);
197-
else if(numTuples <= MapToCharPByte.max)
200+
else if(unique <= MapToCharPByte.max)
198201
return MapToCharPByte.getInMemorySize(size);
199202
else
200203
return MapToInt.getInMemorySize(size);

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class MapToInt extends AMapToData {
3737
private final int[] _data;
3838

3939
protected MapToInt(int size) {
40-
this(Character.MAX_VALUE + 1, size);
40+
this(Integer.MAX_VALUE, size);
4141
}
4242

4343
public MapToInt(int unique, int size) {
@@ -226,11 +226,11 @@ public AMapToData resize(int unique) {
226226
return new MapToZero(size);
227227
else if(unique == 2 && size > 32)
228228
ret = new MapToBit(unique, size);
229-
else if(unique <= 127)
229+
else if(unique < 128)
230230
ret = new MapToUByte(unique, size);
231231
else if(unique < 256)
232232
ret = new MapToByte(unique, size);
233-
else if(unique < Character.MAX_VALUE - 1)
233+
else if(unique < Character.MAX_VALUE )
234234
ret = new MapToChar(unique, size);
235235
else if(unique < MapToCharPByte.max)
236236
ret = new MapToCharPByte(unique, size);

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ public class MapToUByte extends MapToByte {
3131
private static final long serialVersionUID = -2498505439667351828L;
3232

3333
protected MapToUByte(int size) {
34-
this(127, size);
34+
this(128, size);
3535
}
3636

3737
public MapToUByte(int unique, int size) {
38-
super(Math.min(unique, 127), new byte[size]);
38+
super(Math.min(unique, 128), new byte[size]);
3939
}
4040

4141
protected MapToUByte(int unique, byte[] data) {
@@ -126,7 +126,7 @@ public int[] getCounts(int[] ret) {
126126

127127
@Override
128128
public int getMaxPossible() {
129-
return 128;
129+
return 127;
130130
}
131131

132132
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ public AMapToData appendN(IMapToDataGroup[] d) {
185185

186186
@Override
187187
public int getMaxPossible() {
188-
return 1;
188+
return 0;
189189
}
190190

191191
@Override

src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ private Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, ReaderC
189189
while((cellVals = reader.nextRow()) != null) {
190190
final int row = reader.getCurrentRowIndex();
191191
final int id = map.increment(cellVals);
192-
if(id >= max)
192+
if(id > max)
193193
throw new DMLCompressionException("Failed update and encode with " + max + " possible values");
194194
d.set(row, id);
195195
}
@@ -204,7 +204,7 @@ private Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, ReaderC
204204
d.set(r++, emptyIdx.id);
205205
}
206206
final int id = map.increment(cellVals);
207-
if(id >= max)
207+
if(id > max)
208208
throw new DMLCompressionException(
209209
"Failed update and encode with " + max + " possible values" + map + " " + map.size());
210210
d.set(row, id);

0 commit comments

Comments
 (0)