DataDog · mormubis · Aug 29, 2025 · Aug 29, 2025 · Sep 1, 2025 · Jan 9, 2026
@@ -73,6 +73,8 @@ scripts/             # Build, deploy, release automation
 - Spec files co-located with implementation: `feature.ts` → `feature.spec.ts`
 - Use `registerCleanupTask()` for cleanup, NOT `afterEach()`
 - Test framework: Jasmine + Karma
+- Prefer `collectAsyncCalls(spy, n)` over `waitFor(() => spy.calls.count() > 0)` for waiting on spy calls
+- Don't destructure methods from `spy.calls` (e.g., `argsFor`, `mostRecent`) - use `calls.argsFor()` to avoid `@typescript-eslint/unbound-method` errors
 
 ## Commit Messages
 

@@ -16,7 +16,7 @@ describe('user context', () => {
     findTrackedSession: () =>
       ({
         anonymousId: 'device-123',
-      }) as SessionContext<string>,
+      }) as SessionContext,
   }
 
   beforeEach(() => {

@@ -0,0 +1,86 @@
+import { isSampled, resetSampleDecisionCache, sampleUsingKnuthFactor } from './sampler'
+
+// UUID known to yield a low hash value using the Knuth formula, making it more likely to be sampled
+const LOW_HASH_UUID = '29a4b5e3-9859-4290-99fa-4bc4a1a348b9'
+// UUID known to yield a high hash value using the Knuth formula, making it less likely to be
+// sampled
+const HIGH_HASH_UUID = '5321b54a-d6ec-4b24-996d-dd70c617e09a'
+
+// UUID chosen arbitrarily, to be used when the test doesn't actually depend on it.
+const ARBITRARY_UUID = '1ff81c8c-6e32-473b-869b-55af08048323'
+
+describe('isSampled', () => {
+  beforeEach(() => {
+    resetSampleDecisionCache()
+  })
+
+  it('returns true when sampleRate is 100', () => {
+    expect(isSampled(ARBITRARY_UUID, 100)).toBeTrue()
+  })
+
+  it('returns false when sampleRate is 0', () => {
+    expect(isSampled(ARBITRARY_UUID, 0)).toBeFalse()
+  })
+
+  describe('deterministic sampling', () => {
+    it('a session id with a low hash value should be sampled with a rate close to 0%', () => {
+      expect(isSampled(LOW_HASH_UUID, 0.1)).toBeTrue()
+      resetSampleDecisionCache()
+      expect(isSampled(LOW_HASH_UUID, 0.01)).toBeTrue()
+      resetSampleDecisionCache()
+      expect(isSampled(LOW_HASH_UUID, 0.001)).toBeTrue()
+      resetSampleDecisionCache()
+      expect(isSampled(LOW_HASH_UUID, 0.0001)).toBeTrue()
+      resetSampleDecisionCache()
+      // At some point the sample rate is so low that the session is not sampled even if the hash
+      // is low. This is not an error: we can probably find a UUID with an even lower hash.
+      expect(isSampled(LOW_HASH_UUID, 0.0000000001)).toBeFalse()
+    })
+
+    it('a session id with a high hash value should not be sampled even if the rate is close to 100%', () => {
+      expect(isSampled(HIGH_HASH_UUID, 99.9)).toBeFalse()
+      resetSampleDecisionCache()
+      expect(isSampled(HIGH_HASH_UUID, 99.99)).toBeFalse()
+      resetSampleDecisionCache()
+      expect(isSampled(HIGH_HASH_UUID, 99.999)).toBeFalse()
+      resetSampleDecisionCache()
+      expect(isSampled(HIGH_HASH_UUID, 99.9999)).toBeFalse()
+      resetSampleDecisionCache()
+      // At some point the sample rate is so high that the session is sampled even if the hash is
+      // high. This is not an error: we can probably find a UUID with an even higher hash.
+      expect(isSampled(HIGH_HASH_UUID, 99.9999999999)).toBeTrue()
+    })
+  })
+})
+
+describe('sampleUsingKnuthFactor', () => {
+  it('sampling should be based on the trace id', () => {
+    // Generated using the dd-trace-go implementation with the following program: https://go.dev/play/p/CUrDJtze8E_e
+    const inputs: Array<[bigint, number, boolean]> = [
+      [BigInt('5577006791947779410'), 94.0509, true],
+      [BigInt('15352856648520921629'), 43.7714, true],
+      [BigInt('3916589616287113937'), 68.6823, true],
+      [BigInt('894385949183117216'), 30.0912, true],
+      [BigInt('12156940908066221323'), 46.889, true],
+
+      [BigInt('9828766684487745566'), 15.6519, false],
+      [BigInt('4751997750760398084'), 81.364, false],
+      [BigInt('11199607447739267382'), 38.0657, false],
+      [BigInt('6263450610539110790'), 21.8553, false],
+      [BigInt('1874068156324778273'), 36.0871, false],
+    ]
+
+    for (const [identifier, sampleRate, expected] of inputs) {
+      expect(sampleUsingKnuthFactor(identifier, sampleRate))
+        .withContext(`identifier=${identifier}, sampleRate=${sampleRate}`)
+        .toBe(expected)
+    }
+  })
+
+  it('should cache sampling decision per sampling rate', () => {
+    // For the same session id, the sampling decision should be different for trace and profiling, eg. trace should not cache profiling decisions and vice versa
+    expect(isSampled(HIGH_HASH_UUID, 99.9999999999)).toBeTrue()
+    expect(isSampled(HIGH_HASH_UUID, 0.0000001)).toBeFalse()
+    expect(isSampled(HIGH_HASH_UUID, 99.9999999999)).toBeTrue()
+  })
+})
@@ -0,0 +1,60 @@
+const sampleDecisionCache: Map<number, { sessionId: string; decision: boolean }> = new Map()
+
+export function isSampled(sessionId: string, sampleRate: number) {
+  // Shortcuts for common cases. This is not strictly necessary, but it makes the code faster for
+  // customers willing to ingest all traces.
+  if (sampleRate === 100) {
+    return true
+  }
+
+  if (sampleRate === 0) {
+    return false
+  }
+
+  const cachedDecision = sampleDecisionCache.get(sampleRate)
+  if (cachedDecision && sessionId === cachedDecision.sessionId) {
+    return cachedDecision.decision
+  }
+
+  const decision = sampleUsingKnuthFactor(BigInt(`0x${sessionId.split('-')[4]}`), sampleRate)
+  sampleDecisionCache.set(sampleRate, { sessionId, decision })
+  return decision
+}
+
+// Exported for tests
+export function resetSampleDecisionCache() {
+  sampleDecisionCache.clear()
+}
+
+/**
+ * Perform sampling using the Knuth factor method. This method offer consistent sampling result
+ * based on the provided identifier.
+ *
+ * @param identifier - The identifier to use for sampling.
+ * @param sampleRate - The sample rate in percentage between 0 and 100.
+ */
+export function sampleUsingKnuthFactor(identifier: bigint, sampleRate: number) {
+  // The formula is:
+  //
+  //   (identifier * knuthFactor) % 2^64 < sampleRate * 2^64
+  //
+  // Because JavaScript numbers are 64-bit floats, we can't represent 64-bit integers, and the
+  // modulo would be incorrect. Thus, we are using BigInts here.
+  //
+  // Implementation in other languages:
+  // * Go     https://github.com/DataDog/dd-trace-go/blob/ec6fbb1f2d517b7b8e69961052adf7136f3af773/ddtrace/tracer/sampler.go#L86-L91
+  // * Python https://github.com/DataDog/dd-trace-py/blob/0cee2f066fb6e79aa15947c1514c0f406dea47c5/ddtrace/sampling_rule.py#L197
+  // * Ruby   https://github.com/DataDog/dd-trace-rb/blob/1a6e255cdcb7e7e22235ea5955f90f6dfa91045d/lib/datadog/tracing/sampling/rate_sampler.rb#L42
+  // * C++    https://github.com/DataDog/dd-trace-cpp/blob/159629edc438ae45f2bb318eb7bd51abd05e94b5/src/datadog/trace_sampler.cpp#L58
+  // * Java   https://github.com/DataDog/dd-trace-java/blob/896dd6b380533216e0bdee59614606c8272d313e/dd-trace-core/src/main/java/datadog/trace/common/sampling/DeterministicSampler.java#L48
+  //
+  // Note: All implementations have slight variations. Some of them use '<=' instead of '<', and
+  // use `sampleRate * 2^64 - 1` instead of `sampleRate * 2^64`. The following implementation
+  // should adhere to the spec and is a bit simpler than using a 2^64-1 limit as there are less
+  // BigInt arithmetic to write. In practice this does not matter, as we are using floating point
+  // numbers in the end, and Number(2n**64n-1n) === Number(2n**64n).
+  const knuthFactor = BigInt('1111111111111111111')
+  const twoPow64 = BigInt('0x10000000000000000') // 2n ** 64n
+  const hash = (identifier * knuthFactor) % twoPow64
+  return Number(hash) <= (sampleRate / 100) * Number(twoPow64)
+}