Skip to content

Commit 0458808

Browse files
committed
HashMapToInt
1 parent 59ca242 commit 0458808

File tree

14 files changed

+374
-105
lines changed

14 files changed

+374
-105
lines changed

src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -232,21 +232,21 @@ public void copyInt(int[] d, int start, int end) {
232232
public int[] getCounts(int[] ret) {
233233
final int h = (_data.length) % 8;
234234
for(int i = 0; i < h; i++)
235-
ret[_data[i]]++;
235+
ret[getIndex(i)]++;
236236
getCountsBy8P(ret, h, _data.length);
237237
return ret;
238238
}
239239

240240
private void getCountsBy8P(int[] ret, int s, int e) {
241241
for(int i = s; i < e; i += 8) {
242-
ret[_data[i]]++;
243-
ret[_data[i + 1]]++;
244-
ret[_data[i + 2]]++;
245-
ret[_data[i + 3]]++;
246-
ret[_data[i + 4]]++;
247-
ret[_data[i + 5]]++;
248-
ret[_data[i + 6]]++;
249-
ret[_data[i + 7]]++;
242+
ret[getIndex(i)]++;
243+
ret[getIndex(i + 1)]++;
244+
ret[getIndex(i + 2)]++;
245+
ret[getIndex(i + 3)]++;
246+
ret[getIndex(i + 4)]++;
247+
ret[getIndex(i + 5)]++;
248+
ret[getIndex(i + 6)]++;
249+
ret[getIndex(i + 7)]++;
250250
}
251251
}
252252

src/main/java/org/apache/sysds/runtime/frame/data/columns/ABooleanArray.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
package org.apache.sysds.runtime.frame.data.columns;
2121

22-
import java.util.HashMap;
23-
import java.util.Map;
2422
import java.util.concurrent.ExecutorService;
2523

2624
public abstract class ABooleanArray extends Array<Boolean> {
@@ -59,12 +57,12 @@ public boolean possiblyContainsNaN() {
5957

6058

6159
@Override
62-
protected Map<Boolean, Integer> createRecodeMap(int estimate, ExecutorService pool, int k) {
63-
Map<Boolean, Integer> map = new HashMap<>();
60+
protected HashMapToInt<Boolean> createRecodeMap(int estimate, ExecutorService pool, int k) {
61+
HashMapToInt<Boolean> map = new HashMapToInt<Boolean>(2);
6462
int id = 1;
6563
for(int i = 0; i < size() && id <= 2; i++) {
66-
Integer v = map.putIfAbsent(get(i), id);
67-
if(v == null)
64+
int v = map.putIfAbsentI(get(i), id);
65+
if(v == -1)
6866
id++;
6967
}
7068
return map;

src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public abstract class Array<T> implements Writable {
5353
public static int ROW_PARALLELIZATION_THRESHOLD = 10000;
5454

5555
/** A soft reference to a memorization of this arrays mapping, used in transformEncode */
56-
protected SoftReference<Map<T, Integer>> _rcdMapCache = null;
56+
protected SoftReference<Map<T,Integer>> _rcdMapCache = null;
5757

5858
/** The current allocated number of elements in this Array */
5959
protected int _size;
@@ -73,7 +73,7 @@ protected int newSize() {
7373
*
7474
* @return The cached recode map
7575
*/
76-
public final SoftReference<Map<T, Integer>> getCache() {
76+
public final SoftReference<Map<T,Integer>> getCache() {
7777
return _rcdMapCache;
7878
}
7979

@@ -82,7 +82,7 @@ public final SoftReference<Map<T, Integer>> getCache() {
8282
*
8383
* @param m The element to cache.
8484
*/
85-
public final void setCache(SoftReference<Map<T, Integer>> m) {
85+
public final void setCache(SoftReference<Map<T,Integer>> m) {
8686
_rcdMapCache = m;
8787
}
8888

@@ -121,16 +121,16 @@ public synchronized final Map<T, Integer> getRecodeMap(int estimate) {
121121
*
122122
* @param estimate the estimated number of unique values in this array.
123123
* @param pool An executor pool to be used for parallel execution (Note this method does not shutdown the pool)
124-
* @param k Parallelization degree allowed
124+
* @param k Parallelization degree allowed
125125
* @return A recode map
126126
* @throws ExecutionException if the parallel execution fails
127127
* @throws InterruptedException if the parallel execution fails
128128
*/
129-
public synchronized final Map<T, Integer> getRecodeMap(int estimate, ExecutorService pool, int k)
129+
public synchronized final Map<T,Integer> getRecodeMap(int estimate, ExecutorService pool, int k)
130130
throws InterruptedException, ExecutionException {
131131
// probe cache for existing map
132-
Map<T, Integer> map;
133-
SoftReference<Map<T, Integer>> tmp = getCache();
132+
Map<T,Integer> map;
133+
SoftReference<Map<T,Integer>> tmp = getCache();
134134
map = (tmp != null) ? tmp.get() : null;
135135
if(map != null)
136136
return map;
@@ -152,17 +152,17 @@ public synchronized final Map<T, Integer> getRecodeMap(int estimate, ExecutorSer
152152
* @param estimate The estimate number of unique values inside this array.
153153
* @param pool The thread pool to use for parallel creation of recode map (can be null). (Note this method does
154154
* not shutdown the pool)
155-
* @param k The allowed degree of parallelism
155+
* @param k The allowed degree of parallelism
156156
* @return The recode map created.
157157
* @throws ExecutionException if the parallel execution fails
158158
* @throws InterruptedException if the parallel execution fails
159159
*/
160-
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool, int k)
160+
protected HashMapToInt<T> createRecodeMap(int estimate, ExecutorService pool, int k)
161161
throws InterruptedException, ExecutionException {
162162
final boolean debug = LOG.isDebugEnabled();
163163
final Timing t = debug ? new Timing() : null;
164164
final int s = size();
165-
final Map<T, Integer> ret;
165+
final HashMapToInt<T> ret;
166166
if(k <= 1 || pool == null || s < ROW_PARALLELIZATION_THRESHOLD)
167167
ret = createRecodeMap(estimate, 0, s);
168168
else
@@ -175,21 +175,21 @@ protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool, in
175175
return ret;
176176
}
177177

178-
private Map<T, Integer> parallelCreateRecodeMap(int estimate, ExecutorService pool, final int s, int k)
178+
private HashMapToInt<T> parallelCreateRecodeMap(int estimate, ExecutorService pool, final int s, int k)
179179
throws InterruptedException, ExecutionException {
180180

181181
final int blk = Math.max(ROW_PARALLELIZATION_THRESHOLD / 2, (s + k) / k);
182-
final List<Future<Map<T, Integer>>> tasks = new ArrayList<>();
182+
final List<Future<HashMapToInt<T>>> tasks = new ArrayList<>();
183183
for(int i = blk; i < s; i += blk) { // start at blk for the other threads
184184
final int start = i;
185185
final int end = Math.min(i + blk, s);
186186
tasks.add(pool.submit(() -> createRecodeMap(estimate, start, end)));
187187
}
188188
// make the initial map thread local allocation.
189-
final Map<T, Integer> map = new HashMap<>((int) (estimate * 1.3));
189+
final HashMapToInt<T> map = new HashMapToInt<T>((int) (estimate * 1.3));
190190
createRecodeMap(map, 0, blk);
191191
for(int i = 0; i < tasks.size(); i++) { // merge with other threads work.
192-
final Map<T, Integer> map2 = tasks.get(i).get();
192+
final HashMapToInt<T> map2 = tasks.get(i).get();
193193
mergeRecodeMaps(map, map2);
194194
}
195195
return map;
@@ -216,22 +216,22 @@ protected static <T> void mergeRecodeMaps(Map<T, Integer> target, Map<T, Integer
216216
}
217217
}
218218

219-
private Map<T, Integer> createRecodeMap(final int estimate, final int s, final int e) {
219+
protected HashMapToInt<T> createRecodeMap(final int estimate, final int s, final int e) {
220220
// * 1.3 because we hashMap has a load factor of 1.75
221-
final Map<T, Integer> map = new HashMap<>((int) (Math.min((long) estimate, (e - s)) * 1.3));
221+
final HashMapToInt<T> map = new HashMapToInt<>((int) (Math.min((long) estimate, (e - s)) * 1.3));
222222
return createRecodeMap(map, s, e);
223223
}
224224

225-
private Map<T, Integer> createRecodeMap(Map<T, Integer> map, final int s, final int e) {
225+
protected HashMapToInt<T> createRecodeMap(HashMapToInt<T> map, final int s, final int e) {
226226
int id = 1;
227227
for(int i = s; i < e; i++)
228228
id = addValRecodeMap(map, id, i);
229229
return map;
230230
}
231231

232-
protected int addValRecodeMap(Map<T, Integer> map, int id, int i) {
232+
protected int addValRecodeMap(HashMapToInt<T> map, int id, int i) {
233233
final T val = getInternal(i);
234-
if(val != null && map.putIfAbsent(val, id) == null)
234+
if(val != null && map.putIfAbsentI(val, id) == -1)
235235
id++;
236236
return id;
237237
}
@@ -1040,8 +1040,8 @@ public double[] minMax(int l, int u) {
10401040
* @param m The MapToData to set the value part of the Map from
10411041
* @param i The index to set in m
10421042
*/
1043-
public void setM(Map<T, Integer> map, AMapToData m, int i) {
1044-
m.set(i, map.get(getInternal(i)).intValue() - 1);
1043+
public void setM(HashMapToInt<T> map, AMapToData m, int i) {
1044+
m.set(i, map.getI(getInternal(i)) - 1);
10451045
}
10461046

10471047
/**
@@ -1053,17 +1053,17 @@ public void setM(Map<T, Integer> map, AMapToData m, int i) {
10531053
* @param m The MapToData to set the value part of the Map from
10541054
* @param i The index to set in m
10551055
*/
1056-
public void setM(Map<T, Integer> map, int si, AMapToData m, int i) {
1057-
try {
1058-
final T v = getInternal(i);
1059-
if(v != null)
1060-
m.set(i, map.get(v).intValue() - 1);
1061-
else
1062-
m.set(i, si);
1063-
}
1064-
catch(Exception e) {
1065-
String error = "expected: " + getInternal(i) + " to be in map: " + map;
1066-
throw new RuntimeException(error, e);
1067-
}
1056+
public void setM(HashMapToInt<T> map, int si, AMapToData m, int i) {
1057+
// try {
1058+
final T v = getInternal(i);
1059+
if(v != null)
1060+
m.set(i, map.getI(v) - 1);
1061+
else
1062+
m.set(i, si);
1063+
// }
1064+
// catch(Exception e) {
1065+
// String error = "expected: " + getInternal(i) + " to be in map: " + map;
1066+
// throw new RuntimeException(error, e);
1067+
// }
10681068
}
10691069
}

src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ public static <T> Array<T> compressToDDC(Array<T> arr, int estimateUnique) {
178178
}
179179

180180
@Override
181-
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool, int k)
181+
protected HashMapToInt<T> createRecodeMap(int estimate, ExecutorService pool, int k)
182182
throws InterruptedException, ExecutionException {
183183
return dict.createRecodeMap(estimate, pool, k);
184184
}

src/main/java/org/apache/sysds/runtime/frame/data/columns/HashIntegerArray.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.io.DataOutput;
2424
import java.io.IOException;
2525
import java.util.Arrays;
26-
import java.util.Map;
2726

2827
import org.apache.commons.lang3.NotImplementedException;
2928
import org.apache.sysds.common.Types.ValueType;
@@ -434,7 +433,7 @@ public boolean possiblyContainsNaN() {
434433
}
435434

436435
@Override
437-
protected int addValRecodeMap(Map<Object, Integer> map, int id, int i) {
436+
protected int addValRecodeMap(HashMapToInt<Object> map, int id, int i) {
438437
Integer val = Integer.valueOf(getInt(i));
439438
Integer v = map.putIfAbsent(val, id);
440439
if(v == null)
@@ -443,12 +442,12 @@ protected int addValRecodeMap(Map<Object, Integer> map, int id, int i) {
443442
}
444443

445444
@Override
446-
public void setM(Map<Object, Integer> map, AMapToData m, int i) {
445+
public void setM(HashMapToInt<Object> map, AMapToData m, int i) {
447446
m.set(i, map.get(Integer.valueOf(getInt(i))).intValue() - 1);
448447
}
449448

450449
@Override
451-
public void setM(Map<Object, Integer> map, int si, AMapToData m, int i) {
450+
public void setM(HashMapToInt<Object> map, int si, AMapToData m, int i) {
452451
final Integer v = Integer.valueOf(getInt(i));
453452
m.set(i, map.get(v).intValue() - 1);
454453
}

src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import java.io.DataOutput;
2424
import java.io.IOException;
2525
import java.util.Arrays;
26-
import java.util.Map;
2726

2827
import org.apache.commons.lang3.NotImplementedException;
2928
import org.apache.sysds.common.Types.ValueType;
@@ -430,22 +429,12 @@ public boolean possiblyContainsNaN() {
430429
}
431430

432431
@Override
433-
protected int addValRecodeMap(Map<Object, Integer> map, int id, int i) {
434-
Long val = Long.valueOf(getLong(i));
435-
Integer v = map.putIfAbsent(val, id);
436-
if(v == null)
437-
id++;
438-
439-
return id;
440-
}
441-
442-
@Override
443-
public void setM(Map<Object, Integer> map, AMapToData m, int i) {
432+
public void setM(HashMapToInt<Object> map, AMapToData m, int i) {
444433
m.set(i, map.get(Long.valueOf(getLong(i))) - 1);
445434
}
446435

447436
@Override
448-
public void setM(Map<Object, Integer> map, int si, AMapToData m, int i) {
437+
public void setM(HashMapToInt<Object> map, int si, AMapToData m, int i) {
449438
m.set(i, map.get(Long.valueOf(getLong(i))) - 1);
450439
}
451440

0 commit comments

Comments
 (0)