Skip to content

Commit bec7251

Browse files
szegedinsavoire
authored andcommitted
Ensure advancement of sampling instant never takes much time (#6172)
* Ensure advancement of sampling instant never takes much time * Extract the poisson filter so it is unit-testable * Update packages/dd-trace/src/profiling/profilers/events.js Co-authored-by: Nicolas Savoire <[email protected]> * Fix another place where there's typo --------- Co-authored-by: Nicolas Savoire <[email protected]>
1 parent ae93326 commit bec7251

File tree

3 files changed

+297
-29
lines changed

3 files changed

+297
-29
lines changed

packages/dd-trace/src/profiling/profilers/events.js

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
const { performance, constants, PerformanceObserver } = require('perf_hooks')
44
const { END_TIMESTAMP_LABEL, SPAN_ID_LABEL, LOCAL_ROOT_SPAN_ID_LABEL, encodeProfileAsync } = require('./shared')
55
const { Function, Label, Line, Location, Profile, Sample, StringTable, ValueType } = require('pprof-format')
6+
const PoissonProcessSamplingFilter = require('./poisson')
67
const { availableParallelism, effectiveLibuvThreadCount } = require('../libuv-size')
78
// perf_hooks uses millis, with fractional part representing nanos. We emit nanos into the pprof file.
89
const MS_TO_NS = 1_000_000
10+
// The number of sampling intervals that need to pass before we reset the Poisson process sampling instant.
11+
const POISSON_RESET_FACTOR = 2
912

1013
// While this is an "events profiler", meaning it emits a pprof file based on events observed as
1114
// perf_hooks events, the emitted pprof file uses the type "timeline".
@@ -371,35 +374,12 @@ class DatadogInstrumentationEventSource {
371374
}
372375

373376
function createPoissonProcessSamplingFilter (samplingIntervalMillis) {
374-
let nextSamplingInstant = performance.now()
375-
let currentSamplingInstant = 0
376-
setNextSamplingInstant()
377-
378-
return event => {
379-
const endTime = event.startTime + event.duration
380-
while (endTime >= nextSamplingInstant) {
381-
setNextSamplingInstant()
382-
}
383-
// An event is sampled if it started before, and ended on or after a sampling instant. The above
384-
// while loop will ensure that the ending invariant is always true for the current sampling
385-
// instant so we don't have to test for it below. Across calls, the invariant also holds as long
386-
// as the events arrive in endTime order. This is true for events coming from
387-
// DatadogInstrumentationEventSource; they will be ordered by endTime by virtue of this method
388-
// being invoked synchronously with the plugins' finish() handler which evaluates
389-
// performance.now(). OTOH, events coming from NodeAPIEventSource (GC in typical setup) might be
390-
// somewhat delayed as they are queued by Node, so they can arrive out of order with regard to
391-
// events coming from the non-queued source. By omitting the endTime check, we will pass through
392-
// some short events that started and ended before the current sampling instant. OTOH, if we
393-
// were to check for this.currentSamplingInstant <= endTime, we would discard some long events
394-
// that also ended before the current sampling instant. We'd rather err on the side of including
395-
// some short events than excluding some long events.
396-
return event.startTime < currentSamplingInstant
397-
}
398-
399-
function setNextSamplingInstant () {
400-
currentSamplingInstant = nextSamplingInstant
401-
nextSamplingInstant -= Math.log(1 - Math.random()) * samplingIntervalMillis
402-
}
377+
const poissonFilter = new PoissonProcessSamplingFilter({
378+
samplingInterval: samplingIntervalMillis,
379+
resetInterval: samplingIntervalMillis * POISSON_RESET_FACTOR,
380+
now: performance.now.bind(performance)
381+
})
382+
return poissonFilter.filter.bind(poissonFilter)
403383
}
404384

405385
/**
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
'use strict'
2+
3+
class PoissonProcessSamplingFilter {
4+
#currentSamplingInstant = 0
5+
#nextSamplingInstant
6+
#samplingInterval
7+
#resetInterval
8+
#now
9+
#lastNow = Number.NEGATIVE_INFINITY
10+
#samplingInstantCount = 0
11+
12+
constructor ({ samplingInterval, now, resetInterval }) {
13+
if (samplingInterval <= 0) {
14+
throw new RangeError(`samplingInterval (${samplingInterval}) must be greater than 0`)
15+
}
16+
if (resetInterval < samplingInterval) {
17+
throw new RangeError(
18+
`resetInterval (${resetInterval}) must be greater than samplingInterval (${samplingInterval})`
19+
)
20+
}
21+
if (typeof now !== 'function') {
22+
throw new TypeError('now must be a function')
23+
}
24+
this.#samplingInterval = samplingInterval
25+
this.#resetInterval = resetInterval
26+
this.#now = now
27+
this.#nextSamplingInstant = this.#callNow()
28+
this.#setNextSamplingInstant()
29+
}
30+
31+
get currentSamplingInstant () {
32+
return this.#currentSamplingInstant
33+
}
34+
35+
get nextSamplingInstant () {
36+
return this.#nextSamplingInstant
37+
}
38+
39+
get samplingInstantCount () {
40+
return this.#samplingInstantCount
41+
}
42+
43+
#callNow () {
44+
const nowValue = this.#now()
45+
if (typeof nowValue !== 'number') {
46+
throw new TypeError('now() must return a number')
47+
}
48+
if (nowValue < this.#lastNow) {
49+
throw new RangeError('now() must return a value greater than or equal to the last returned value')
50+
}
51+
this.#lastNow = nowValue
52+
return nowValue
53+
}
54+
55+
filter (event) {
56+
const endTime = event.startTime + event.duration
57+
// We're using the end times of events as an approximation of current time as events are
58+
// expected to be reported close to where they ended. If the end time (and thus, presumably, the
59+
// current time) is past the next sampling instant, we make it the current sampling instant and
60+
// compute the next sampling instant in its future.
61+
if (endTime >= this.#nextSamplingInstant) {
62+
// All observed events are supposed to have happened in the past. For purposes of advancing
63+
// the next sampling instant, we cap endTime to now(). This protects us from advancing it far
64+
// into future if we receive an event with erroneously long duration, which would also take
65+
// many iterations of the below "while" loop.
66+
const cappedEndTime = Math.min(endTime, this.#callNow())
67+
68+
// If nextSamplingInstant is far in cappedEndTime's past, first advance it close to it. This
69+
// can happen if we didn't receive any events for a while. Since a Poisson process has no
70+
// memory, we can reset it anytime. This will ensure that the "while" loop below runs at most
71+
// few iterations.
72+
const earliestContinuousPast = cappedEndTime - this.#resetInterval
73+
if (this.#nextSamplingInstant < earliestContinuousPast) {
74+
this.#nextSamplingInstant = earliestContinuousPast
75+
}
76+
77+
// Advance the next sampling instant until it is in cappedEndTime's future.
78+
while (cappedEndTime >= this.#nextSamplingInstant) {
79+
this.#setNextSamplingInstant()
80+
}
81+
}
82+
// An event is sampled if it started before, and ended on or after a sampling instant. The above
83+
// while loop will ensure that the ending invariant is always true for the current sampling
84+
// instant so we don't have to test for it below. Across calls, the invariant also holds as long
85+
// as the events arrive in endTime order. This is true for events coming from
86+
// DatadogInstrumentationEventSource; they will be ordered by endTime by virtue of this method
87+
// being invoked synchronously with the plugins' finish() handler which evaluates
88+
// performance.now(). OTOH, events coming from NodeAPIEventSource (GC in typical setup) might be
89+
// somewhat delayed as they are queued by Node, so they can arrive out of order with regard to
90+
// events coming from the non-queued source. By omitting the endTime check, we will pass through
91+
// some short events that started and ended before the current sampling instant. OTOH, if we
92+
// were to check for this.currentSamplingInstant <= endTime, we would discard some long events
93+
// that also ended before the current sampling instant. We'd rather err on the side of including
94+
// some short events than excluding some long events.
95+
return event.startTime < this.#currentSamplingInstant
96+
}
97+
98+
#setNextSamplingInstant () {
99+
this.#currentSamplingInstant = this.#nextSamplingInstant
100+
this.#nextSamplingInstant -= Math.log(1 - Math.random()) * this.#samplingInterval
101+
this.#samplingInstantCount++
102+
}
103+
}
104+
105+
module.exports = PoissonProcessSamplingFilter
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
'use strict'
2+
3+
require('../../setup/tap')
4+
const assert = require('assert')
5+
6+
const PoissonProcessSamplingFilter = require('../../../src/profiling/profilers/poisson')
7+
8+
describe('PoissonProcessSamplingFilter', () => {
9+
let nowValue
10+
const now = () => nowValue
11+
12+
beforeEach(() => {
13+
nowValue = 0
14+
})
15+
16+
it('should throw if resetInterval < samplingInterval', () => {
17+
assert.throws(() => new PoissonProcessSamplingFilter({
18+
samplingInterval: 100,
19+
resetInterval: 50,
20+
now
21+
}), RangeError)
22+
})
23+
24+
it('should throw if now is not a function', () => {
25+
assert.throws(() => new PoissonProcessSamplingFilter({
26+
samplingInterval: 100,
27+
resetInterval: 200,
28+
now: 123
29+
}), (err) => err instanceof TypeError && err.message === 'now must be a function')
30+
})
31+
32+
it('should throw if now() returns a non-number', () => {
33+
const badNow = () => 'not-a-number'
34+
assert.throws(() => new PoissonProcessSamplingFilter({
35+
samplingInterval: 100,
36+
resetInterval: 200,
37+
now: badNow
38+
}), (err) => err instanceof TypeError && err.message === 'now() must return a number')
39+
})
40+
41+
it('should throw if now() returns a decreasing value', () => {
42+
let callCount = 0
43+
const decreasingNow = () => {
44+
callCount++
45+
return callCount === 1 ? 100 : 50
46+
}
47+
const filter = new PoissonProcessSamplingFilter({
48+
samplingInterval: 10,
49+
resetInterval: 20,
50+
now: decreasingNow
51+
})
52+
const event = { startTime: 0, duration: filter.nextSamplingInstant + 1 }
53+
assert.throws(() => filter.filter(event), (err) => err instanceof RangeError &&
54+
err.message === 'now() must return a value greater than or equal to the last returned value')
55+
})
56+
57+
it('should allow now() to return the same value as last time', () => {
58+
let callCount = 0
59+
const constantNow = () => {
60+
callCount++
61+
return 42
62+
}
63+
const filter = new PoissonProcessSamplingFilter({
64+
samplingInterval: 10,
65+
resetInterval: 20,
66+
now: constantNow
67+
})
68+
const event = { startTime: 0, duration: 100 }
69+
assert.doesNotThrow(() => filter.filter(event))
70+
assert.strictEqual(callCount, 2)
71+
})
72+
73+
it('should initialize with correct properties', () => {
74+
const filter = new PoissonProcessSamplingFilter({
75+
samplingInterval: 100,
76+
resetInterval: 200,
77+
now
78+
})
79+
assert.strictEqual(typeof filter.currentSamplingInstant, 'number')
80+
assert.strictEqual(filter.currentSamplingInstant, 0)
81+
assert.strictEqual(typeof filter.nextSamplingInstant, 'number')
82+
assert.ok(filter.nextSamplingInstant > 0)
83+
assert.strictEqual(filter.samplingInstantCount, 1)
84+
})
85+
86+
it('should advance sampling instant when event endTime >= nextSamplingInstant', () => {
87+
const filter = new PoissonProcessSamplingFilter({
88+
samplingInterval: 100,
89+
resetInterval: 200,
90+
now
91+
})
92+
const prevNextSamplingInstant = filter.nextSamplingInstant
93+
const event = {
94+
startTime: -10,
95+
duration: prevNextSamplingInstant + 15
96+
}
97+
assert.strictEqual(filter.currentSamplingInstant, 0)
98+
nowValue = prevNextSamplingInstant + 15
99+
filter.filter(event)
100+
assert.ok(filter.nextSamplingInstant > prevNextSamplingInstant)
101+
assert.ok(filter.currentSamplingInstant > 0)
102+
assert.ok(filter.samplingInstantCount > 1)
103+
})
104+
105+
it('should not advance sampling instant if event endTime < nextSamplingInstant', () => {
106+
const filter = new PoissonProcessSamplingFilter({
107+
samplingInterval: 100,
108+
resetInterval: 200,
109+
now
110+
})
111+
const prevSamplingInstant = filter.currentSamplingInstant
112+
const prevNextSamplingInstant = filter.nextSamplingInstant
113+
const event = { startTime: prevSamplingInstant - 10, duration: 1 }
114+
filter.filter(event)
115+
assert.strictEqual(filter.currentSamplingInstant, prevSamplingInstant)
116+
assert.strictEqual(filter.nextSamplingInstant, prevNextSamplingInstant)
117+
assert.strictEqual(filter.samplingInstantCount, 1)
118+
})
119+
120+
it('should cap endTime to now() if event endTime is in the future', () => {
121+
const filter = new PoissonProcessSamplingFilter({
122+
samplingInterval: 100,
123+
resetInterval: 200,
124+
now
125+
})
126+
const prevNextSamplingInstant = filter.nextSamplingInstant
127+
nowValue = 1000
128+
const event = { startTime: 0, duration: 1e6 }
129+
filter.filter(event)
130+
assert.ok(filter.currentSamplingInstant >= prevNextSamplingInstant)
131+
assert.strictEqual(typeof filter.nextSamplingInstant, 'number')
132+
assert.ok(filter.nextSamplingInstant < 500000)
133+
assert.ok(filter.samplingInstantCount < 10)
134+
})
135+
136+
it('should reset nextSamplingInstant if it is too far in the past', () => {
137+
const filter = new PoissonProcessSamplingFilter({
138+
samplingInterval: 100,
139+
resetInterval: 200,
140+
now
141+
})
142+
const event = { startTime: 100000, duration: 100 }
143+
nowValue = event.startTime + event.duration
144+
filter.filter(event)
145+
assert.ok(filter.nextSamplingInstant > nowValue)
146+
assert.ok(filter.samplingInstantCount < 10)
147+
})
148+
149+
it('should return true if event.startTime < currentSamplingInstant', () => {
150+
const filter = new PoissonProcessSamplingFilter({
151+
samplingInterval: 100,
152+
resetInterval: 200,
153+
now
154+
})
155+
const event = { startTime: filter.currentSamplingInstant - 1, duration: 1 }
156+
assert.strictEqual(filter.filter(event), true)
157+
})
158+
159+
it('should return false if event.startTime >= currentSamplingInstant', () => {
160+
const filter = new PoissonProcessSamplingFilter({
161+
samplingInterval: 100,
162+
resetInterval: 200,
163+
now
164+
})
165+
const event = { startTime: filter.currentSamplingInstant, duration: 1 }
166+
assert.strictEqual(filter.filter(event), false)
167+
})
168+
169+
it('should increment samplingInstantCount on each sampling instant', () => {
170+
const filter = new PoissonProcessSamplingFilter({
171+
samplingInterval: 10,
172+
resetInterval: 100,
173+
now
174+
})
175+
const initialCount = filter.samplingInstantCount
176+
for (let i = 0; i < 5; i++) {
177+
nowValue += 20
178+
const event = { startTime: 0, duration: filter.nextSamplingInstant - 0 }
179+
filter.filter(event)
180+
}
181+
assert.ok(filter.samplingInstantCount > initialCount)
182+
})
183+
})

0 commit comments

Comments
 (0)