@@ -28,7 +28,7 @@ type Snapshot struct {
2828// non-normally distributed data (like latency) with a high degree of accuracy
2929// and a bounded degree of precision.
3030type Histogram struct {
31- lowestTrackableValue int64
31+ lowestDiscernibleValue int64
3232 highestTrackableValue int64
3333 unitMagnitude int64
3434 significantFigures int64
@@ -69,23 +69,41 @@ func (h *Histogram) SetStartTimeMs(startTimeMs int64) {
6969 h .startTimeMs = startTimeMs
7070}
7171
72- // New returns a new Histogram instance capable of tracking values in the given
73- // range and with the given amount of precision.
74- func New (minValue , maxValue int64 , sigfigs int ) * Histogram {
75- if sigfigs < 1 || 5 < sigfigs {
76- panic (fmt .Errorf ("sigfigs must be [1,5] (was %d)" , sigfigs ))
77- }
78-
79- largestValueWithSingleUnitResolution := 2 * math .Pow10 (sigfigs )
72+ // Construct a Histogram given the Lowest and Highest values to be tracked and a number of significant decimal digits.
73+ //
74+ // Providing a lowestDiscernibleValue is useful in situations where the units used for the histogram's values are
75+ // much smaller that the minimal accuracy required.
76+ // E.g. when tracking time values stated in nanosecond units, where the minimal accuracy required is a microsecond,
77+ // the proper value for lowestDiscernibleValue would be 1000.
78+ //
79+ // Note: the numberOfSignificantValueDigits must be [1,5]. If lower than 1 the numberOfSignificantValueDigits will be
80+ // forced to 1, and if higher than 5 the numberOfSignificantValueDigits will be forced to 5.
81+ func New (lowestDiscernibleValue , highestTrackableValue int64 , numberOfSignificantValueDigits int ) * Histogram {
82+ if numberOfSignificantValueDigits < 1 {
83+ numberOfSignificantValueDigits = 1
84+ } else if numberOfSignificantValueDigits > 5 {
85+ numberOfSignificantValueDigits = 5
86+ }
87+ if lowestDiscernibleValue < 1 {
88+ lowestDiscernibleValue = 1
89+ }
90+
91+ // Given a 3 decimal point accuracy, the expectation is obviously for "+/- 1 unit at 1000". It also means that
92+ // it's "ok to be +/- 2 units at 2000". The "tricky" thing is that it is NOT ok to be +/- 2 units at 1999. Only
93+ // starting at 2000. So internally, we need to maintain single unit resolution to 2x 10^decimalPoints.
94+ largestValueWithSingleUnitResolution := 2 * math .Pow10 (numberOfSignificantValueDigits )
95+
96+ // We need to maintain power-of-two subBucketCount (for clean direct indexing) that is large enough to
97+ // provide unit resolution to at least largestValueWithSingleUnitResolution. So figure out
98+ // largestValueWithSingleUnitResolution's nearest power-of-two (rounded up), and use that:
8099 subBucketCountMagnitude := int32 (math .Ceil (math .Log2 (float64 (largestValueWithSingleUnitResolution ))))
81-
82100 subBucketHalfCountMagnitude := subBucketCountMagnitude
83101 if subBucketHalfCountMagnitude < 1 {
84102 subBucketHalfCountMagnitude = 1
85103 }
86104 subBucketHalfCountMagnitude --
87105
88- unitMagnitude := int32 (math .Floor (math .Log2 (float64 (minValue ))))
106+ unitMagnitude := int32 (math .Floor (math .Log2 (float64 (lowestDiscernibleValue ))))
89107 if unitMagnitude < 0 {
90108 unitMagnitude = 0
91109 }
@@ -98,20 +116,16 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
98116 // determine exponent range needed to support the trackable value with no
99117 // overflow:
100118 smallestUntrackableValue := int64 (subBucketCount ) << uint (unitMagnitude )
101- bucketsNeeded := int32 (1 )
102- for smallestUntrackableValue < maxValue {
103- smallestUntrackableValue <<= 1
104- bucketsNeeded ++
105- }
119+ bucketsNeeded := getBucketsNeededToCoverValue (smallestUntrackableValue , highestTrackableValue )
106120
107121 bucketCount := bucketsNeeded
108122 countsLen := (bucketCount + 1 ) * (subBucketCount / 2 )
109123
110124 return & Histogram {
111- lowestTrackableValue : minValue ,
112- highestTrackableValue : maxValue ,
125+ lowestDiscernibleValue : lowestDiscernibleValue ,
126+ highestTrackableValue : highestTrackableValue ,
113127 unitMagnitude : int64 (unitMagnitude ),
114- significantFigures : int64 (sigfigs ),
128+ significantFigures : int64 (numberOfSignificantValueDigits ),
115129 subBucketHalfCountMagnitude : subBucketHalfCountMagnitude ,
116130 subBucketHalfCount : subBucketHalfCount ,
117131 subBucketMask : subBucketMask ,
@@ -126,6 +140,21 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram {
126140 }
127141}
128142
143+ func getBucketsNeededToCoverValue (smallestUntrackableValue int64 , maxValue int64 ) int32 {
144+ // always have at least 1 bucket
145+ bucketsNeeded := int32 (1 )
146+ for smallestUntrackableValue < maxValue {
147+ if smallestUntrackableValue > (math .MaxInt64 / 2 ) {
148+ // next shift will overflow, meaning that bucket could represent values up to ones greater than
149+ // math.MaxInt64, so it's the last bucket
150+ return bucketsNeeded + 1
151+ }
152+ smallestUntrackableValue <<= 1
153+ bucketsNeeded ++
154+ }
155+ return bucketsNeeded
156+ }
157+
129158// ByteSize returns an estimate of the amount of memory allocated to the
130159// histogram in bytes.
131160//
@@ -277,7 +306,12 @@ func (h *Histogram) setCountAtIndex(idx int, n int64) {
277306 h .totalCount += n
278307}
279308
280- // ValueAtQuantile returns the recorded value at the given quantile (0..100).
309+ // ValueAtQuantile returns the largest value that (100% - percentile) of the overall recorded value entries
310+ // in the histogram are either larger than or equivalent to.
311+ //
312+ // Note that two values are "equivalent" if `ValuesAreEquivalent(value1,value2)` would return true.
313+ //
314+ // Returns 0 if no recorded values exist.
281315func (h * Histogram ) ValueAtQuantile (q float64 ) int64 {
282316 if q > 100 {
283317 q = 100
@@ -290,13 +324,24 @@ func (h *Histogram) ValueAtQuantile(q float64) int64 {
290324 for i .next () {
291325 total += i .countAtIdx
292326 if total >= countAtPercentile {
327+ if q == 0.0 {
328+ return h .lowestEquivalentValue (i .valueFromIdx )
329+ }
293330 return h .highestEquivalentValue (i .valueFromIdx )
294331 }
295332 }
296333
297334 return 0
298335}
299336
337+ // Determine if two values are equivalent with the histogram's resolution.
338+ // Where "equivalent" means that value samples recorded for any two
339+ // equivalent values are counted in a common total count.
340+ func (h * Histogram ) ValuesAreEquivalent (value1 , value2 int64 ) (result bool ) {
341+ result = h .lowestEquivalentValue (value1 ) == h .lowestEquivalentValue (value2 )
342+ return
343+ }
344+
300345// CumulativeDistribution returns an ordered list of brackets of the
301346// distribution of recorded values.
302347func (h * Histogram ) CumulativeDistribution () []Bracket {
@@ -323,7 +368,7 @@ func (h *Histogram) SignificantFigures() int64 {
323368// LowestTrackableValue returns the lower bound on values that will be added
324369// to the histogram
325370func (h * Histogram ) LowestTrackableValue () int64 {
326- return h .lowestTrackableValue
371+ return h .lowestDiscernibleValue
327372}
328373
329374// HighestTrackableValue returns the upper bound on values that will be added
@@ -361,7 +406,7 @@ func (h *Histogram) Distribution() (result []Bar) {
361406func (h * Histogram ) Equals (other * Histogram ) bool {
362407 switch {
363408 case
364- h .lowestTrackableValue != other .lowestTrackableValue ,
409+ h .lowestDiscernibleValue != other .lowestDiscernibleValue ,
365410 h .highestTrackableValue != other .highestTrackableValue ,
366411 h .unitMagnitude != other .unitMagnitude ,
367412 h .significantFigures != other .significantFigures ,
@@ -387,7 +432,7 @@ func (h *Histogram) Equals(other *Histogram) bool {
387432// Import to construct a new Histogram with the same state.
388433func (h * Histogram ) Export () * Snapshot {
389434 return & Snapshot {
390- LowestTrackableValue : h .lowestTrackableValue ,
435+ LowestTrackableValue : h .lowestDiscernibleValue ,
391436 HighestTrackableValue : h .highestTrackableValue ,
392437 SignificantFigures : h .significantFigures ,
393438 Counts : append ([]int64 (nil ), h .counts ... ), // copy
@@ -478,12 +523,21 @@ func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 {
478523 return bucketBaseIdx + offsetInBucket
479524}
480525
526+ // return the lowest (and therefore highest precision) bucket index that can represent the value
527+ // Calculates the number of powers of two by which the value is greater than the biggest value that fits in
528+ // bucket 0. This is the bucket index since each successive bucket can hold a value 2x greater.
481529func (h * Histogram ) getBucketIndex (v int64 ) int32 {
482530 pow2Ceiling := bitLen (v | h .subBucketMask )
483531 return int32 (pow2Ceiling - int64 (h .unitMagnitude ) -
484532 int64 (h .subBucketHalfCountMagnitude + 1 ))
485533}
486534
535+ // For bucketIndex 0, this is just value, so it may be anywhere in 0 to subBucketCount.
536+ // For other bucketIndex, this will always end up in the top half of subBucketCount: assume that for some bucket
537+ // k > 0, this calculation will yield a value in the bottom half of 0 to subBucketCount. Then, because of how
538+ // buckets overlap, it would have also been in the top half of bucket k-1, and therefore would have
539+ // returned k-1 in getBucketIndex(). Since we would then shift it one fewer bits here, it would be twice as big,
540+ // and therefore in the top half of subBucketCount.
487541func (h * Histogram ) getSubBucketIdx (v int64 , idx int32 ) int32 {
488542 return int32 (v >> uint (int64 (idx )+ int64 (h .unitMagnitude )))
489543}
@@ -505,11 +559,11 @@ type iterator struct {
505559 highestEquivalentValue int64
506560}
507561
562+ // Returns the next element in the iteration.
508563func (i * iterator ) next () bool {
509564 if i .countToIdx >= i .h .totalCount {
510565 return false
511566 }
512-
513567 // increment bucket
514568 i .subBucketIdx ++
515569 if i .subBucketIdx >= i .h .subBucketCount {
0 commit comments