Skip to content

Commit 1dc8842

Browse files
Extended docs regarding ValueAtQuantile() and added ValuesAreEquivalent() (#39)
* [add] Extended docs regarding ValueAtQuantile() and added ValuesAreEquivalent() * [add] made New() documentation clearer * [fix] Fixes per PR review on New() * [fix] Fixed New() not to panic on numberOfSignificantValueDigits < 1 || numberOfSignificantValueDigits > 5. Adding linter check to CI * [add] Added whitebox testing for hdr.go ( specifically for New() numberOfSignificantValueDigits limits ).
1 parent 6663c35 commit 1dc8842

File tree

8 files changed

+130
-590
lines changed

8 files changed

+130
-590
lines changed

.github/workflows/unit-tests.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,15 @@ jobs:
1515
- name: Checkout code
1616
uses: actions/checkout@v2
1717
- name: Test
18-
run: make test
18+
run: make test
19+
lint:
20+
runs-on: ubuntu-latest
21+
steps:
22+
- name: Install Go
23+
uses: actions/setup-go@v2
24+
with:
25+
go-version: 1.15.x
26+
- name: Checkout code
27+
uses: actions/checkout@v2
28+
- name: Lint
29+
run: make lint

example_hdr_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
)
88

99
// This latency Histogram could be used to track and analyze the counts of
10-
// observed integer values between 0 us and 30000000 us ( 30 secs )
10+
// observed integer values between 1 us and 30000000 us ( 30 secs )
1111
// while maintaining a value precision of 4 significant digits across that range,
1212
// translating to a value resolution of :
1313
// - 1 microsecond up to 10 milliseconds,

go.mod

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ module github.com/HdrHistogram/hdrhistogram-go
33
go 1.14
44

55
require (
6-
github.com/golangci/golangci-lint v1.31.0 // indirect
6+
github.com/davecgh/go-spew v1.1.1 // indirect
77
github.com/google/go-cmp v0.5.2
8+
github.com/kr/text v0.2.0 // indirect
9+
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
810
github.com/stretchr/testify v1.6.1
11+
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
12+
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
913
)

go.sum

Lines changed: 3 additions & 561 deletions
Large diffs are not rendered by default.

hdr.go

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type Snapshot struct {
2828
// non-normally distributed data (like latency) with a high degree of accuracy
2929
// and a bounded degree of precision.
3030
type Histogram struct {
31-
lowestTrackableValue int64
31+
lowestDiscernibleValue int64
3232
highestTrackableValue int64
3333
unitMagnitude int64
3434
significantFigures int64
@@ -69,23 +69,41 @@ func (h *Histogram) SetStartTimeMs(startTimeMs int64) {
6969
h.startTimeMs = startTimeMs
7070
}
7171

72-
// New returns a new Histogram instance capable of tracking values in the given
73-
// range and with the given amount of precision.
74-
func New(minValue, maxValue int64, sigfigs int) *Histogram {
75-
if sigfigs < 1 || 5 < sigfigs {
76-
panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
77-
}
78-
79-
largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
72+
// Construct a Histogram given the Lowest and Highest values to be tracked and a number of significant decimal digits.
73+
//
74+
// Providing a lowestDiscernibleValue is useful in situations where the units used for the histogram's values are
75+
// much smaller that the minimal accuracy required.
76+
// E.g. when tracking time values stated in nanosecond units, where the minimal accuracy required is a microsecond,
77+
// the proper value for lowestDiscernibleValue would be 1000.
78+
//
79+
// Note: the numberOfSignificantValueDigits must be [1,5]. If lower than 1 the numberOfSignificantValueDigits will be
80+
// forced to 1, and if higher than 5 the numberOfSignificantValueDigits will be forced to 5.
81+
func New(lowestDiscernibleValue, highestTrackableValue int64, numberOfSignificantValueDigits int) *Histogram {
82+
if numberOfSignificantValueDigits < 1 {
83+
numberOfSignificantValueDigits = 1
84+
} else if numberOfSignificantValueDigits > 5 {
85+
numberOfSignificantValueDigits = 5
86+
}
87+
if lowestDiscernibleValue < 1 {
88+
lowestDiscernibleValue = 1
89+
}
90+
91+
// Given a 3 decimal point accuracy, the expectation is obviously for "+/- 1 unit at 1000". It also means that
92+
// it's "ok to be +/- 2 units at 2000". The "tricky" thing is that it is NOT ok to be +/- 2 units at 1999. Only
93+
// starting at 2000. So internally, we need to maintain single unit resolution to 2x 10^decimalPoints.
94+
largestValueWithSingleUnitResolution := 2 * math.Pow10(numberOfSignificantValueDigits)
95+
96+
// We need to maintain power-of-two subBucketCount (for clean direct indexing) that is large enough to
97+
// provide unit resolution to at least largestValueWithSingleUnitResolution. So figure out
98+
// largestValueWithSingleUnitResolution's nearest power-of-two (rounded up), and use that:
8099
subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))
81-
82100
subBucketHalfCountMagnitude := subBucketCountMagnitude
83101
if subBucketHalfCountMagnitude < 1 {
84102
subBucketHalfCountMagnitude = 1
85103
}
86104
subBucketHalfCountMagnitude--
87105

88-
unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
106+
unitMagnitude := int32(math.Floor(math.Log2(float64(lowestDiscernibleValue))))
89107
if unitMagnitude < 0 {
90108
unitMagnitude = 0
91109
}
@@ -98,20 +116,16 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
98116
// determine exponent range needed to support the trackable value with no
99117
// overflow:
100118
smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
101-
bucketsNeeded := int32(1)
102-
for smallestUntrackableValue < maxValue {
103-
smallestUntrackableValue <<= 1
104-
bucketsNeeded++
105-
}
119+
bucketsNeeded := getBucketsNeededToCoverValue(smallestUntrackableValue, highestTrackableValue)
106120

107121
bucketCount := bucketsNeeded
108122
countsLen := (bucketCount + 1) * (subBucketCount / 2)
109123

110124
return &Histogram{
111-
lowestTrackableValue: minValue,
112-
highestTrackableValue: maxValue,
125+
lowestDiscernibleValue: lowestDiscernibleValue,
126+
highestTrackableValue: highestTrackableValue,
113127
unitMagnitude: int64(unitMagnitude),
114-
significantFigures: int64(sigfigs),
128+
significantFigures: int64(numberOfSignificantValueDigits),
115129
subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
116130
subBucketHalfCount: subBucketHalfCount,
117131
subBucketMask: subBucketMask,
@@ -126,6 +140,21 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
126140
}
127141
}
128142

143+
func getBucketsNeededToCoverValue(smallestUntrackableValue int64, maxValue int64) int32 {
144+
// always have at least 1 bucket
145+
bucketsNeeded := int32(1)
146+
for smallestUntrackableValue < maxValue {
147+
if smallestUntrackableValue > (math.MaxInt64 / 2) {
148+
// next shift will overflow, meaning that bucket could represent values up to ones greater than
149+
// math.MaxInt64, so it's the last bucket
150+
return bucketsNeeded + 1
151+
}
152+
smallestUntrackableValue <<= 1
153+
bucketsNeeded++
154+
}
155+
return bucketsNeeded
156+
}
157+
129158
// ByteSize returns an estimate of the amount of memory allocated to the
130159
// histogram in bytes.
131160
//
@@ -277,7 +306,12 @@ func (h *Histogram) setCountAtIndex(idx int, n int64) {
277306
h.totalCount += n
278307
}
279308

280-
// ValueAtQuantile returns the recorded value at the given quantile (0..100).
309+
// ValueAtQuantile returns the largest value that (100% - percentile) of the overall recorded value entries
310+
// in the histogram are either larger than or equivalent to.
311+
//
312+
// Note that two values are "equivalent" if `ValuesAreEquivalent(value1,value2)` would return true.
313+
//
314+
// Returns 0 if no recorded values exist.
281315
func (h *Histogram) ValueAtQuantile(q float64) int64 {
282316
if q > 100 {
283317
q = 100
@@ -290,13 +324,24 @@ func (h *Histogram) ValueAtQuantile(q float64) int64 {
290324
for i.next() {
291325
total += i.countAtIdx
292326
if total >= countAtPercentile {
327+
if q == 0.0 {
328+
return h.lowestEquivalentValue(i.valueFromIdx)
329+
}
293330
return h.highestEquivalentValue(i.valueFromIdx)
294331
}
295332
}
296333

297334
return 0
298335
}
299336

337+
// Determine if two values are equivalent with the histogram's resolution.
338+
// Where "equivalent" means that value samples recorded for any two
339+
// equivalent values are counted in a common total count.
340+
func (h *Histogram) ValuesAreEquivalent(value1, value2 int64) (result bool) {
341+
result = h.lowestEquivalentValue(value1) == h.lowestEquivalentValue(value2)
342+
return
343+
}
344+
300345
// CumulativeDistribution returns an ordered list of brackets of the
301346
// distribution of recorded values.
302347
func (h *Histogram) CumulativeDistribution() []Bracket {
@@ -323,7 +368,7 @@ func (h *Histogram) SignificantFigures() int64 {
323368
// LowestTrackableValue returns the lower bound on values that will be added
324369
// to the histogram
325370
func (h *Histogram) LowestTrackableValue() int64 {
326-
return h.lowestTrackableValue
371+
return h.lowestDiscernibleValue
327372
}
328373

329374
// HighestTrackableValue returns the upper bound on values that will be added
@@ -361,7 +406,7 @@ func (h *Histogram) Distribution() (result []Bar) {
361406
func (h *Histogram) Equals(other *Histogram) bool {
362407
switch {
363408
case
364-
h.lowestTrackableValue != other.lowestTrackableValue,
409+
h.lowestDiscernibleValue != other.lowestDiscernibleValue,
365410
h.highestTrackableValue != other.highestTrackableValue,
366411
h.unitMagnitude != other.unitMagnitude,
367412
h.significantFigures != other.significantFigures,
@@ -387,7 +432,7 @@ func (h *Histogram) Equals(other *Histogram) bool {
387432
// Import to construct a new Histogram with the same state.
388433
func (h *Histogram) Export() *Snapshot {
389434
return &Snapshot{
390-
LowestTrackableValue: h.lowestTrackableValue,
435+
LowestTrackableValue: h.lowestDiscernibleValue,
391436
HighestTrackableValue: h.highestTrackableValue,
392437
SignificantFigures: h.significantFigures,
393438
Counts: append([]int64(nil), h.counts...), // copy
@@ -478,12 +523,21 @@ func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
478523
return bucketBaseIdx + offsetInBucket
479524
}
480525

526+
// return the lowest (and therefore highest precision) bucket index that can represent the value
527+
// Calculates the number of powers of two by which the value is greater than the biggest value that fits in
528+
// bucket 0. This is the bucket index since each successive bucket can hold a value 2x greater.
481529
func (h *Histogram) getBucketIndex(v int64) int32 {
482530
pow2Ceiling := bitLen(v | h.subBucketMask)
483531
return int32(pow2Ceiling - int64(h.unitMagnitude) -
484532
int64(h.subBucketHalfCountMagnitude+1))
485533
}
486534

535+
// For bucketIndex 0, this is just value, so it may be anywhere in 0 to subBucketCount.
536+
// For other bucketIndex, this will always end up in the top half of subBucketCount: assume that for some bucket
537+
// k > 0, this calculation will yield a value in the bottom half of 0 to subBucketCount. Then, because of how
538+
// buckets overlap, it would have also been in the top half of bucket k-1, and therefore would have
539+
// returned k-1 in getBucketIndex(). Since we would then shift it one fewer bits here, it would be twice as big,
540+
// and therefore in the top half of subBucketCount.
487541
func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 {
488542
return int32(v >> uint(int64(idx)+int64(h.unitMagnitude)))
489543
}
@@ -505,11 +559,11 @@ type iterator struct {
505559
highestEquivalentValue int64
506560
}
507561

562+
// Returns the next element in the iteration.
508563
func (i *iterator) next() bool {
509564
if i.countToIdx >= i.h.totalCount {
510565
return false
511566
}
512-
513567
// increment bucket
514568
i.subBucketIdx++
515569
if i.subBucketIdx >= i.h.subBucketCount {

hdr_encoding.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func (h *Histogram) encodeIntoByteBuffer() (*bytes.Buffer, error) {
127127
if err != nil {
128128
return nil, err
129129
}
130-
err = binary.Write(toCompress, binary.BigEndian, h.lowestTrackableValue) // 16-23
130+
err = binary.Write(toCompress, binary.BigEndian, h.lowestDiscernibleValue) // 16-23
131131
if err != nil {
132132
return nil, err
133133
}

hdr_test.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ func TestValueAtQuantile(t *testing.T) {
5454
}
5555
}
5656

57-
5857
func TestMean(t *testing.T) {
5958
h := hdrhistogram.New(1, 10000000, 3)
6059
for i := 0; i < 1000000; i++ {
@@ -386,3 +385,18 @@ func TestEquals(t *testing.T) {
386385
t.Error("Expected Histograms to be equivalent")
387386
}
388387
}
388+
389+
// nolint
390+
func TestHistogram_ValuesAreEquivalent(t *testing.T) {
391+
hist := hdrhistogram.New(1476573605, 1476593605, 3)
392+
assert.True(t, hist.ValuesAreEquivalent(1476583605, 2147483647))
393+
394+
// test large histograms
395+
hist = hdrhistogram.New(20000000, 100000000, 5)
396+
hist.RecordValue(100000000)
397+
hist.RecordValue(20000000)
398+
hist.RecordValue(30000000)
399+
assert.True(t, hist.ValuesAreEquivalent(20000000, hist.ValueAtQuantile(50.0)))
400+
assert.True(t, hist.ValuesAreEquivalent(100000000, hist.ValueAtQuantile(83.34)))
401+
assert.True(t, hist.ValuesAreEquivalent(100000000, hist.ValueAtQuantile(99.0)))
402+
}

hdr_whitebox_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package hdrhistogram
2+
3+
import (
4+
"github.com/stretchr/testify/assert"
5+
"testing"
6+
)
7+
8+
func TestHistogram_New_internals(t *testing.T) {
9+
// test for numberOfSignificantValueDigits if higher than 5 the numberOfSignificantValueDigits will be forced to 5
10+
hist := New(1, 9007199254740991, 6)
11+
assert.Equal(t, int64(5), hist.significantFigures)
12+
// test for numberOfSignificantValueDigits if lower than 1 the numberOfSignificantValueDigits will be forced to 1
13+
hist = New(1, 9007199254740991, 0)
14+
assert.Equal(t, int64(1), hist.significantFigures)
15+
}

0 commit comments

Comments
 (0)