Skip to content

Commit 62e4461

Browse files
authored
Update histogram implementation to FastUtil and use 64-bit bucket counts (#108)
* Update histogram implementation to FastUtil and use 64-bit bucket counts. * Fix histogram iteration.
1 parent e40d6dc commit 62e4461

File tree

6 files changed

+123
-47
lines changed

6 files changed

+123
-47
lines changed

pom.xml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
<apache.httpcore.version>4.4.5</apache.httpcore.version>
110110
<arpnetworking.commons.version>1.13.3</arpnetworking.commons.version>
111111
<aspectjrt.version>1.9.1</aspectjrt.version>
112+
<asynchttpclient.version>2.0.24</asynchttpclient.version>
112113
<cglib.version>3.2.5</cglib.version>
113114
<commons.codec.version>1.10</commons.codec.version>
114115
<ebean.version>6.8.1</ebean.version>
@@ -118,7 +119,7 @@
118119
<guice.version>4.1.0</guice.version>
119120
<hamcrest.version>2.0.0.0</hamcrest.version>
120121
<hikari.version>2.5.1</hikari.version>
121-
<asynchttpclient.version>2.0.24</asynchttpclient.version>
122+
<fastutil.version>8.3.1</fastutil.version>
122123
<h2.version>1.4.193</h2.version>
123124
<jackson.version>2.9.2</jackson.version>
124125
<javassist.version>3.22.0-GA</javassist.version>
@@ -136,7 +137,7 @@
136137
<metrics.client.version>0.11.1</metrics.client.version>
137138
<metrics.jvm.extra.version>0.11.0</metrics.jvm.extra.version>
138139
<metrics.http.extra.version>0.11.1</metrics.http.extra.version>
139-
<metrics.aggregator.protocol.version>1.0.6</metrics.aggregator.protocol.version>
140+
<metrics.aggregator.protocol.version>1.0.8</metrics.aggregator.protocol.version>
140141
<mockito.version>2.12.0</mockito.version>
141142
<netty.version>3.10.3.Final</netty.version>
142143
<netty.all.version>4.0.21.Final</netty.all.version>
@@ -733,6 +734,14 @@
733734
<artifactId>oval</artifactId>
734735
<version>${oval.version}</version>
735736
</dependency>
737+
<dependency>
738+
<groupId>it.unimi.dsi</groupId>
739+
<artifactId>fastutil</artifactId>
740+
<version>${fastutil.version}</version>
741+
<!-- Jackson support not yet available:
742+
https://github.com/FasterXML/jackson-datatypes-collections/issues/38
743+
-->
744+
</dependency>
736745
<dependency>
737746
<groupId>org.javassist</groupId>
738747
<artifactId>javassist</artifactId>

src/main/java/com/arpnetworking/clusteraggregator/models/CombinedMetricData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ public static Builder fromStatisticSetRecord(final Messages.StatisticSetRecord r
276276
final HistogramStatistic.Histogram histogram = new HistogramStatistic.Histogram();
277277
for (final Messages.SparseHistogramEntry entry : supportingData.getEntriesList()) {
278278
final double bucket = entry.getBucket();
279-
final int count = entry.getCount();
279+
final long count = entry.getCount();
280280
histogram.recordValue(bucket, count);
281281
}
282282

src/main/java/com/arpnetworking/tsdcore/sinks/KairosDbSink.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.fasterxml.jackson.databind.ObjectMapper;
3636
import com.google.common.collect.ImmutableMap;
3737
import com.google.common.collect.Lists;
38+
import it.unimi.dsi.fastutil.doubles.Double2LongMap;
3839
import net.sf.oval.constraint.Min;
3940
import net.sf.oval.constraint.NotNull;
4041
import org.joda.time.Period;
@@ -323,8 +324,8 @@ public void serializeHistogram(
323324
chunkGenerator.writeNumberField("mean", additionalData.getMean());
324325
chunkGenerator.writeNumberField("sum", additionalData.getSum());
325326
chunkGenerator.writeObjectFieldStart("bins");
326-
for (Map.Entry<Double, Integer> bin : bins.getValues()) {
327-
chunkGenerator.writeNumberField(bin.getKey().toString(), bin.getValue());
327+
for (Double2LongMap.Entry bin : bins.getValues()) {
328+
chunkGenerator.writeNumberField(String.valueOf(bin.getDoubleKey()), bin.getLongValue());
328329
}
329330

330331
chunkGenerator.writeEndObject(); //close bins

src/main/java/com/arpnetworking/tsdcore/sinks/circonus/CirconusSinkActor.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.google.common.collect.Lists;
3535
import com.google.common.collect.Maps;
3636
import com.google.common.collect.Sets;
37+
import it.unimi.dsi.fastutil.doubles.Double2LongMap;
3738
import org.joda.time.Period;
3839
import org.joda.time.format.ISOPeriodFormat;
3940
import play.libs.ws.StandaloneWSResponse;
@@ -287,9 +288,9 @@ private Map<String, Object> serialize(final Collection<AggregatedData> data) {
287288
final HistogramStatistic.HistogramSupportingData histogramSupportingData = (HistogramStatistic.HistogramSupportingData)
288289
aggregatedData.getSupportingData();
289290
final HistogramStatistic.HistogramSnapshot histogram = histogramSupportingData.getHistogramSnapshot();
290-
final ArrayList<String> valueList = new ArrayList<>(histogram.getEntriesCount());
291+
final List<String> valueList = new ArrayList<>((int) histogram.getEntriesCount());
291292
final MathContext context = new MathContext(2, RoundingMode.DOWN);
292-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
293+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
293294
for (int i = 0; i < entry.getValue(); i++) {
294295
final BigDecimal decimal = new BigDecimal(entry.getKey(), context);
295296
final String bucketString = String.format("H[%s]=%d", decimal.toPlainString(), entry.getValue());

src/main/java/com/arpnetworking/tsdcore/statistics/HistogramStatistic.java

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
import com.arpnetworking.tsdcore.model.CalculatedValue;
2121
import com.arpnetworking.tsdcore.model.Quantity;
2222
import com.arpnetworking.tsdcore.model.Unit;
23+
import it.unimi.dsi.fastutil.doubles.Double2LongAVLTreeMap;
24+
import it.unimi.dsi.fastutil.doubles.Double2LongMap;
25+
import it.unimi.dsi.fastutil.doubles.Double2LongSortedMap;
26+
import it.unimi.dsi.fastutil.objects.ObjectSortedSet;
2327
import net.sf.oval.constraint.NotNull;
2428

2529
import java.util.List;
2630
import java.util.Map;
2731
import java.util.Optional;
28-
import java.util.Set;
29-
import java.util.TreeMap;
3032

3133
/**
3234
* Histogram statistic. This is a supporting statistic and does not produce
@@ -59,6 +61,7 @@ public Quantity calculateAggregations(final List<AggregatedData> aggregations) {
5961

6062
private HistogramStatistic() { }
6163

64+
private static final int DEFAULT_PRECISION = 7;
6265
private static final long serialVersionUID = 7060886488604176233L;
6366

6467
/**
@@ -183,9 +186,9 @@ public HistogramSnapshot getHistogramSnapshot() {
183186
public HistogramSupportingData toUnit(final Unit newUnit) {
184187
if (_unit.isPresent()) {
185188
final Histogram newHistogram = new Histogram();
186-
for (final Map.Entry<Double, Integer> entry : _histogramSnapshot.getValues()) {
187-
final Double newBucket = newUnit.convert(entry.getKey(), _unit.get());
188-
newHistogram.recordValue(newBucket, entry.getValue());
189+
for (final Double2LongMap.Entry entry : _histogramSnapshot.getValues()) {
190+
final double newBucket = newUnit.convert(entry.getDoubleKey(), _unit.get());
191+
newHistogram.recordValue(newBucket, entry.getLongValue());
189192
}
190193
return new HistogramSupportingData.Builder()
191194
.setHistogramSnapshot(newHistogram.getSnapshot())
@@ -249,14 +252,15 @@ public Builder setUnit(final Optional<Unit> value) {
249252
*/
250253
public static final class Histogram {
251254

255+
252256
/**
253257
* Records a value into the histogram.
254258
*
255259
* @param value The value of the entry.
256260
* @param count The number of entries at this value.
257261
*/
258-
public void recordValue(final double value, final int count) {
259-
_data.merge(truncate(value), count, (i, j) -> i + j);
262+
public void recordValue(final double value, final long count) {
263+
_data.merge(truncateToDouble(value), count, Long::sum);
260264
_entriesCount += count;
261265
}
262266

@@ -269,29 +273,83 @@ public void recordValue(final double value) {
269273
recordValue(value, 1);
270274
}
271275

276+
/**
277+
* Records a packed value into the histogram. The packed values must be at the
278+
* precision that this {@link Histogram} was declared with!
279+
*
280+
* @param packed The packed bucket key.
281+
* @param count The number of entries at this value.
282+
*/
283+
public void recordPacked(final long packed, final long count) {
284+
recordValue(unpack(packed), count);
285+
}
286+
272287
/**
273288
* Adds a histogram snapshot to this one.
274289
*
275290
* @param histogramSnapshot The histogram snapshot to add to this one.
276291
*/
277292
public void add(final HistogramSnapshot histogramSnapshot) {
278-
for (final Map.Entry<Double, Integer> entry : histogramSnapshot._data.entrySet()) {
279-
_data.merge(entry.getKey(), entry.getValue(), (i, j) -> i + j);
293+
for (final Double2LongMap.Entry entry : histogramSnapshot._data.double2LongEntrySet()) {
294+
_data.merge(entry.getDoubleKey(), entry.getLongValue(), Long::sum);
280295
}
281296
_entriesCount += histogramSnapshot._entriesCount;
282297
}
283298

284299
public HistogramSnapshot getSnapshot() {
285-
return new HistogramSnapshot(_data, _entriesCount);
300+
return new HistogramSnapshot(_data, _entriesCount, _precision);
301+
}
302+
303+
long truncateToLong(final double val) {
304+
return Double.doubleToRawLongBits(val) & _truncateMask;
305+
}
306+
307+
double truncateToDouble(final double val) {
308+
return Double.longBitsToDouble(truncateToLong(val));
309+
}
310+
311+
long pack(final double val) {
312+
final long truncated = truncateToLong(val);
313+
final long shifted = truncated >> (MANTISSA_BITS - _precision);
314+
return shifted & _packMask;
315+
}
316+
317+
double unpack(final long packed) {
318+
return Double.longBitsToDouble(packed << (MANTISSA_BITS - _precision));
286319
}
287320

288-
private static double truncate(final double val) {
289-
final long mask = 0xffffe00000000000L;
290-
return Double.longBitsToDouble(Double.doubleToRawLongBits(val) & mask);
321+
/**
322+
* Public constructor.
323+
*/
324+
public Histogram() {
325+
this(DEFAULT_PRECISION);
326+
}
327+
328+
/**
329+
* Public constructor.
330+
*
331+
* @param precision the bits of mantissa precision in the bucket key
332+
*/
333+
public Histogram(final int precision) {
334+
// TODO(ville): Support variable precision histograms end-to-end.
335+
if (precision != DEFAULT_PRECISION) {
336+
throw new IllegalArgumentException("The stack does not fully support variable precision histograms.");
337+
}
338+
339+
_precision = precision;
340+
_truncateMask = BASE_MASK >> _precision;
341+
_packMask = (1 << (_precision + EXPONENT_BITS + 1)) - 1;
291342
}
292343

293344
private int _entriesCount = 0;
294-
private final TreeMap<Double, Integer> _data = new TreeMap<>();
345+
private final Double2LongSortedMap _data = new Double2LongAVLTreeMap();
346+
private final int _precision;
347+
private final long _truncateMask;
348+
private final int _packMask;
349+
350+
private static final int MANTISSA_BITS = 52;
351+
private static final int EXPONENT_BITS = 11;
352+
private static final long BASE_MASK = (1L << (MANTISSA_BITS + EXPONENT_BITS)) >> EXPONENT_BITS;
295353
}
296354

297355
/**
@@ -300,7 +358,8 @@ private static double truncate(final double val) {
300358
* @author Brandon Arp (brandon dot arp at inscopemetrics dot com)
301359
*/
302360
public static final class HistogramSnapshot {
303-
private HistogramSnapshot(final TreeMap<Double, Integer> data, final int entriesCount) {
361+
private HistogramSnapshot(final Double2LongSortedMap data, final long entriesCount, final int precision) {
362+
_precision = precision;
304363
_entriesCount = entriesCount;
305364
_data.putAll(data);
306365
}
@@ -316,24 +375,30 @@ public Double getValueAtPercentile(final double percentile) {
316375
// The Math.min is for the case where the computation may be just
317376
// slightly larger than the _entriesCount and prevents an index out of range.
318377
final int target = (int) Math.min(Math.ceil(_entriesCount * percentile / 100.0D), _entriesCount);
319-
int accumulated = 0;
320-
for (final Map.Entry<Double, Integer> next : _data.entrySet()) {
321-
accumulated += next.getValue();
378+
long accumulated = 0;
379+
for (final Double2LongMap.Entry next : _data.double2LongEntrySet()) {
380+
accumulated += next.getLongValue();
322381
if (accumulated >= target) {
323-
return next.getKey();
382+
return next.getDoubleKey();
324383
}
325384
}
326385
return 0D;
327386
}
328387

329-
public int getEntriesCount() {
388+
public int getPrecision() {
389+
return _precision;
390+
}
391+
392+
public long getEntriesCount() {
330393
return _entriesCount;
331394
}
332395

333-
public Set<Map.Entry<Double, Integer>> getValues() {
334-
return _data.entrySet();
396+
public ObjectSortedSet<Double2LongMap.Entry> getValues() {
397+
return _data.double2LongEntrySet();
335398
}
336-
private int _entriesCount = 0;
337-
private final TreeMap<Double, Integer> _data = new TreeMap<>();
399+
400+
private long _entriesCount = 0;
401+
private final Double2LongSortedMap _data = new Double2LongAVLTreeMap();
402+
private final int _precision;
338403
}
339404
}

src/test/java/com/arpnetworking/tsdcore/statistics/HistogramStatisticTest.java

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
import com.arpnetworking.tsdcore.model.CalculatedValue;
1919
import com.arpnetworking.tsdcore.model.Quantity;
2020
import com.arpnetworking.tsdcore.model.Unit;
21+
import it.unimi.dsi.fastutil.doubles.Double2LongMap;
2122
import org.junit.Assert;
2223
import org.junit.Test;
2324

2425
import java.util.Collections;
25-
import java.util.Map;
2626

2727
/**
2828
* Tests the HistogramStatistic class.
@@ -40,8 +40,8 @@ public void histogramAccumulateQuantities() {
4040
final CalculatedValue<HistogramStatistic.HistogramSupportingData> value = accumulator.calculate(Collections.emptyMap());
4141
final HistogramStatistic.HistogramSupportingData supportingData = value.getData();
4242
final HistogramStatistic.HistogramSnapshot histogram = supportingData.getHistogramSnapshot();
43-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
44-
Assert.assertEquals(entry.getValue(), (Integer) 1);
43+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
44+
Assert.assertEquals(entry.getLongValue(), 1L);
4545
}
4646
}
4747

@@ -59,8 +59,8 @@ public void histogramAccumulateHistogram() {
5959
final CalculatedValue<HistogramStatistic.HistogramSupportingData> value = merged.calculate(Collections.emptyMap());
6060
final HistogramStatistic.HistogramSupportingData supportingData = value.getData();
6161
final HistogramStatistic.HistogramSnapshot histogram = supportingData.getHistogramSnapshot();
62-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
63-
Assert.assertEquals(entry.getValue(), (Integer) 1);
62+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
63+
Assert.assertEquals(entry.getLongValue(), 1L);
6464
}
6565
}
6666

@@ -90,14 +90,14 @@ public void histogramAccumulateMultipleHistogram() {
9090
final CalculatedValue<HistogramStatistic.HistogramSupportingData> value = merged.calculate(Collections.emptyMap());
9191
final HistogramStatistic.HistogramSupportingData supportingData = value.getData();
9292
final HistogramStatistic.HistogramSnapshot histogram = supportingData.getHistogramSnapshot();
93-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
94-
final int val = entry.getKey().intValue();
93+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
94+
final int val = (int) entry.getDoubleKey();
9595
if (val < 50) {
96-
Assert.assertEquals("incorrect value for key " + val, (Integer) 1, entry.getValue());
96+
Assert.assertEquals("incorrect value for key " + val, 1L, entry.getLongValue());
9797
} else if (val <= 100) {
98-
Assert.assertEquals("incorrect value for key " + val, (Integer) 2, entry.getValue());
98+
Assert.assertEquals("incorrect value for key " + val, 2L, entry.getLongValue());
9999
} else { // val > 100
100-
Assert.assertEquals("incorrect value for key " + val, (Integer) 1, entry.getValue());
100+
Assert.assertEquals("incorrect value for key " + val, 1L, entry.getLongValue());
101101
}
102102
}
103103

@@ -150,9 +150,9 @@ public void histogramAccumulateMultipleHistogramConversion() {
150150
final CalculatedValue<HistogramStatistic.HistogramSupportingData> value = merged.calculate(Collections.emptyMap());
151151
final HistogramStatistic.HistogramSupportingData supportingData = value.getData();
152152
final HistogramStatistic.HistogramSnapshot histogram = supportingData.getHistogramSnapshot();
153-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
153+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
154154

155-
Assert.assertTrue(entry.getKey() <= 100);
155+
Assert.assertTrue(entry.getDoubleKey() <= 100.0);
156156
}
157157

158158
Assert.assertEquals(200, histogram.getEntriesCount());
@@ -168,17 +168,17 @@ public void histogramUnitConversion() {
168168
final CalculatedValue<HistogramStatistic.HistogramSupportingData> value = accumulator.calculate(Collections.emptyMap());
169169
final HistogramStatistic.HistogramSupportingData supportingData = value.getData();
170170
HistogramStatistic.HistogramSnapshot histogram = supportingData.getHistogramSnapshot();
171-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
172-
final int val = entry.getKey().intValue();
171+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
172+
final int val = (int) entry.getDoubleKey();
173173
if (val < 990) {
174174
Assert.fail("shouldn't see a key this small");
175175
}
176176
}
177177

178178
final HistogramStatistic.HistogramSupportingData converted = supportingData.toUnit(Unit.SECOND);
179179
histogram = converted.getHistogramSnapshot();
180-
for (final Map.Entry<Double, Integer> entry : histogram.getValues()) {
181-
final int val = entry.getKey().intValue();
180+
for (final Double2LongMap.Entry entry : histogram.getValues()) {
181+
final int val = (int) entry.getDoubleKey();
182182
if (val > 100) {
183183
Assert.fail("shouldn't see a key this large after unit conversion");
184184
}

0 commit comments

Comments
 (0)