Skip to content

Commit 59ca242

Browse files
committed
fix hash nnz
1 parent b33dd7e commit 59ca242

File tree

6 files changed

+23
-23
lines changed

6 files changed

+23
-23
lines changed

src/main/java/org/apache/sysds/runtime/frame/data/columns/ABooleanArray.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public boolean possiblyContainsNaN() {
5959

6060

6161
@Override
62-
protected Map<Boolean, Integer> createRecodeMap(int estimate, ExecutorService pool) {
62+
protected Map<Boolean, Integer> createRecodeMap(int estimate, ExecutorService pool, int k) {
6363
Map<Boolean, Integer> map = new HashMap<>();
6464
int id = 1;
6565
for(int i = 0; i < size() && id <= 2; i++) {

src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import org.apache.commons.logging.LogFactory;
3535
import org.apache.hadoop.io.Writable;
3636
import org.apache.sysds.common.Types.ValueType;
37-
import org.apache.sysds.hops.OptimizerUtils;
3837
import org.apache.sysds.runtime.DMLRuntimeException;
3938
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
4039
import org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory;
@@ -137,7 +136,7 @@ public synchronized final Map<T, Integer> getRecodeMap(int estimate, ExecutorSer
137136
return map;
138137

139138
// construct recode map
140-
map = createRecodeMap(estimate, pool);
139+
map = createRecodeMap(estimate, pool, k);
141140

142141
// put created map into cache
143142
setCache(new SoftReference<>(map));
@@ -153,22 +152,23 @@ public synchronized final Map<T, Integer> getRecodeMap(int estimate, ExecutorSer
153152
* @param estimate The estimate number of unique values inside this array.
154153
* @param pool The thread pool to use for parallel creation of recode map (can be null). (Note this method does
155154
* not shutdown the pool)
155+
* @param k The allowed degree of parallelism
156156
* @return The recode map created.
157157
* @throws ExecutionException if the parallel execution fails
158158
* @throws InterruptedException if the parallel execution fails
159159
*/
160-
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool)
160+
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool, int k)
161161
throws InterruptedException, ExecutionException {
162-
Timing t = new Timing();
162+
final boolean debug = LOG.isDebugEnabled();
163+
final Timing t = debug ? new Timing() : null;
163164
final int s = size();
164-
int k = OptimizerUtils.getTransformNumThreads();
165-
Map<T, Integer> ret;
165+
final Map<T, Integer> ret;
166166
if(k <= 1 || pool == null || s < ROW_PARALLELIZATION_THRESHOLD)
167167
ret = createRecodeMap(estimate, 0, s);
168168
else
169169
ret = parallelCreateRecodeMap(estimate, pool, s, k);
170170

171-
if(LOG.isDebugEnabled()) {
171+
if(debug) {
172172
String base = "CreateRecodeMap estimate: %10d actual %10d time: %10.5f";
173173
LOG.debug(String.format(base, estimate, ret.size(), t.stop()));
174174
}
@@ -230,12 +230,9 @@ private Map<T, Integer> createRecodeMap(Map<T, Integer> map, final int s, final
230230
}
231231

232232
protected int addValRecodeMap(Map<T, Integer> map, int id, int i) {
233-
T val = getInternal(i);
234-
if(val != null) {
235-
Integer v = map.putIfAbsent(val, id);
236-
if(v == null)
237-
id++;
238-
}
233+
final T val = getInternal(i);
234+
if(val != null && map.putIfAbsent(val, id) == null)
235+
id++;
239236
return id;
240237
}
241238

src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,9 @@ public static <T> Array<T> compressToDDC(Array<T> arr, int estimateUnique) {
178178
}
179179

180180
@Override
181-
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool)
181+
protected Map<T, Integer> createRecodeMap(int estimate, ExecutorService pool, int k)
182182
throws InterruptedException, ExecutionException {
183-
return dict.createRecodeMap(estimate, pool);
183+
return dict.createRecodeMap(estimate, pool, k);
184184
}
185185

186186
@Override

src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ public final boolean isNotEmpty(int i) {
669669
}
670670

671671
@Override
672-
protected Map<String, Integer> createRecodeMap(int estimate, ExecutorService pool) throws InterruptedException, ExecutionException {
672+
protected Map<String, Integer> createRecodeMap(int estimate, ExecutorService pool, int k) throws InterruptedException, ExecutionException {
673673
try {
674674
Map<String, Integer> map = new HashMap<>((int) Math.min((long) estimate * 2, size()));
675675
for(int i = 0; i < size(); i++) {
@@ -682,7 +682,7 @@ protected Map<String, Integer> createRecodeMap(int estimate, ExecutorService poo
682682
return map;
683683
}
684684
catch(Exception e) {
685-
return super.createRecodeMap(estimate, pool);
685+
return super.createRecodeMap(estimate, pool, k);
686686
}
687687
}
688688

src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ private AColGroup hash(ColumnEncoderComposite c) {
531531

532532
AMapToData m = createHashMappingAMapToData(a, domain, nulls);
533533
AColGroup ret = ColGroupDDC.create(colIndexes, d, m, null);
534+
nnz.addAndGet(ret.getNumberNonZeros(in.getNumRows()));
534535
return ret;
535536
}
536537

@@ -545,7 +546,9 @@ private AColGroup hashToDummy(ColumnEncoderComposite c) {
545546
return ColGroupConst.create(colIndexes, new double[] {1});
546547
ADictionary d = new IdentityDictionary(colIndexes.size(), nulls);
547548
AMapToData m = createHashMappingAMapToData(a, domain, nulls);
548-
return ColGroupDDC.create(colIndexes, d, m, null);
549+
AColGroup ret = ColGroupDDC.create(colIndexes, d, m, null);
550+
nnz.addAndGet(ret.getNumberNonZeros(in.getNumRows()));
551+
return ret;
549552
}
550553

551554
@SuppressWarnings("unchecked")

src/test/java/org/apache/sysds/performance/frame/Transform.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,11 @@ public static void main(String[] args) throws Exception {
113113
ValueType.UINT4, ValueType.UINT4, ValueType.UINT4, ValueType.UINT4, ValueType.UINT4, ValueType.UINT4},
114114
32);
115115

116-
// System.out.println("10 col without null");
117-
// run10(k, in);
118-
System.out.println("10 col compressed without null");
119-
in = FrameLibCompress.compress(in, k);
116+
System.out.println("10 col without null");
120117
run10(k, in);
118+
// System.out.println("10 col compressed without null");
119+
// in = FrameLibCompress.compress(in, k);
120+
// run10(k, in);
121121

122122
in = TestUtils.generateRandomFrameBlock(
123123
100000 * i, new ValueType[] {ValueType.UINT4, ValueType.UINT4, ValueType.UINT4, ValueType.UINT4,

0 commit comments

Comments
 (0)