perf: improve span id generation (#1378)

Kyle-Verhoog · jd · web-flow · commit 258788496717 · 2020-05-14T13:09:29.000+02:00
* add random number generators; use for span id

* make threadsafe; add interval generator

* experiment with cython

* explicitly call __next__ on generators

* add build to setup.py

* reorganize to use only for python 2

* remove unnecessary thread-safety

* assume c-extension

* use gil directly for threadsafety

* move test to own file

* remove benchmark.py from black excludes

* use def instead of cpdef

* black test_rand.py

* fix copy paste error

Co-authored-by: Julien Danjou &lt;julien@danjou.info&gt;
diff --git a/ddtrace/compat.py b/ddtrace/compat.py
@@ -1,4 +1,5 @@
 import platform
+import random
 import re
 import sys
 import textwrap
@@ -93,6 +94,12 @@ def process_time_ns():
         return int(_process_time() * 1e9)
 
 
+if sys.version_info.major < 3:
+    getrandbits = random.SystemRandom().getrandbits
+else:
+    getrandbits = random.getrandbits
+
+
 if PYTHON_VERSION_INFO[0:2] >= (3, 4):
     from asyncio import iscoroutinefunction
 
diff --git a/ddtrace/internal/_rand.pyx b/ddtrace/internal/_rand.pyx
@@ -0,0 +1,55 @@
+"""Generator for pseudorandom 64-bit integers.
+
+Implements the xorshift* algorithm with a non-linear transformation
+(multiplication) applied to the result.
+
+This implementation uses the recommended constants from Numerical Recipes
+Chapter 7 (Ranq1 algorithm).
+
+According to TPV, the period is approx. 1.8 x 10^19. So it should not be used
+by an application that makes more than 10^12 calls.
+
+To put this into perspective: we cap the max number of traces at 1k/s let's be
+conservative and say each trace contains 100 spans.
+
+That's 100k spans/second which would be 100k + 1 calls to this fn per second.
+
+That's 10,000,000 seconds until we hit the period. That's 115 days of
+100k spans/second (with no application restart) until the period is reached.
+
+
+rand64bits() is thread-safe as it is written in C and is interfaced with via
+a single Python step. This is the same mechanism in which CPython achieves
+thread-safety:
+https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Lib/random.py#L37-L38
+
+
+Python 2.7:
+Name (time in ns)                         Min                   Max                  Mean              StdDev                Median                 IQR            Outliers  OPS (Kops/s)            Rounds  Iterations
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+rand64bits                           144.1789 (1.01)       221.2596 (1.0)        155.5800 (1.0)       15.4198 (1.0)        151.3100 (1.00)       7.6687 (1.0)           4;6    6,427.5628 (1.0)          61      100000
+random.SystemRandom().getrandbits  1,626.8015 (11.37)    2,178.9074 (9.85)     1,766.1762 (11.35)    133.8990 (8.68)     1,714.4561 (11.35)    113.9164 (14.85)        11;8      566.1949 (0.09)         60       10000
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+Python 3.7:
+Name (time in ns)                       Min                 Max                Mean             StdDev              Median                IQR            Outliers  OPS (Mops/s)            Rounds  Iterations
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+rand64bits                         167.5956 (1.0)      211.3155 (1.0)      190.2803 (1.0)       9.5815 (1.0)      187.7187 (1.0)      11.4513 (1.0)          15;1        5.2554 (1.0)          52      100000
+random.randbits                    222.7103 (1.33)     367.4459 (1.74)     250.2699 (1.32)     26.5930 (2.78)     242.1607 (1.29)     26.4550 (2.31)          6;1        3.9957 (0.76)         36      100000
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+"""
+from libc.stdint cimport uint64_t
+
+from ddtrace import compat
+
+
+cdef uint64_t x = compat.getrandbits(64) ^ 4101842887655102017
+
+
+def rand64bits():
+    global x
+    x ^= x >> 21
+    x ^= x << 35
+    x ^= x >> 4
+    return x * <uint64_t>2685821657736338717
diff --git a/ddtrace/span.py b/ddtrace/span.py
@@ -1,5 +1,4 @@
 import math
-import random
 import sys
 import traceback
 
@@ -16,17 +15,11 @@
 )
 from .ext import SpanTypes, errors, priority, net, http
 from .internal.logger import get_logger
-
+from .internal import _rand
 
 log = get_logger(__name__)
 
 
-if sys.version_info.major < 3:
-    _getrandbits = random.SystemRandom().getrandbits
-else:
-    _getrandbits = random.getrandbits
-
-
 class Span(object):
 
     __slots__ = [
@@ -90,7 +83,7 @@ def __init__(
         self.resource = resource or name
         self.span_type = span_type.value if isinstance(span_type, SpanTypes) else span_type
 
-        # tags / metatdata
+        # tags / metadata
         self.meta = {}
         self.error = 0
         self.metrics = {}
@@ -100,8 +93,8 @@ def __init__(
         self.duration_ns = None
 
         # tracing
-        self.trace_id = trace_id or _new_id()
-        self.span_id = span_id or _new_id()
+        self.trace_id = trace_id or _rand.rand64bits()
+        self.span_id = span_id or _rand.rand64bits()
         self.parent_id = parent_id
         self.tracer = tracer
 
@@ -426,8 +419,3 @@ def __repr__(self):
             self.parent_id,
             self.name,
         )
-
-
-def _new_id():
-    """Generate a random trace_id or span_id"""
-    return _getrandbits(64)
diff --git a/pyproject.toml b/pyproject.toml
@@ -91,7 +91,6 @@ exclude = '''
   | tests/
   (
     base
-    | benchmark.py
     | commands
     | contrib/
     (
diff --git a/setup.py b/setup.py
@@ -149,6 +149,9 @@ def get_exts_for(name):
         setup_requires=["setuptools_scm", "cython"],
         ext_modules=cythonize(
             [
+                Cython.Distutils.Extension(
+                    "ddtrace.internal._rand", sources=["ddtrace/internal/_rand.pyx"], language="c",
+                ),
                 Cython.Distutils.Extension(
                     "ddtrace.profiling.collector.stack",
                     sources=["ddtrace/profiling/collector/stack.pyx"],
diff --git a/tests/benchmark.py b/tests/benchmark.py
@@ -13,7 +13,7 @@ def tracer():
 
 def test_tracer_context(benchmark, tracer):
     def func(tracer):
-        with tracer.trace('a', service='s', resource='r', span_type='t'):
+        with tracer.trace("a", service="s", resource="r", span_type="t"):
             pass
 
     benchmark(func, tracer)
@@ -51,24 +51,20 @@ def func(self):
     benchmark(f.func)
 
 
-def test_tracer_start_span(benchmark, tracer):
-    benchmark(tracer.start_span, 'benchmark')
-
-
 def test_tracer_start_finish_span(benchmark, tracer):
     def func(tracer):
-        s = tracer.start_span('benchmark')
+        s = tracer.start_span("benchmark")
         s.finish()
 
     benchmark(func, tracer)
 
 
 def test_trace_simple_trace(benchmark, tracer):
     def func(tracer):
-        with tracer.trace('parent'):
+        with tracer.trace("parent"):
             for i in range(5):
-                with tracer.trace('child') as c:
-                    c.set_tag('i', i)
+                with tracer.trace("child") as c:
+                    c.set_tag("i", i)
 
     benchmark(func, tracer)
 
@@ -83,10 +79,24 @@ def func(tracer, level=0):
 
         # do some work
         num = random.randint(1, 10)
-        span.set_tag('num', num)
+        span.set_tag("num", num)
 
         if level < 10:
             func(tracer, level + 1)
             func(tracer, level + 1)
 
     benchmark(func, tracer)
+
+
+def test_tracer_start_span(benchmark, tracer):
+    benchmark(tracer.start_span, "benchmark")
+
+
+@pytest.mark.benchmark(group="span-id", min_time=0.005)
+def test_span_id_rand64bits(benchmark):
+    from ddtrace.internal import _rand
+
+    @benchmark
+    def f():
+        _ = _rand.rand64bits()
+        _ = _rand.rand64bits()
diff --git a/tests/test_rand.py b/tests/test_rand.py
@@ -0,0 +1,10 @@
+from ddtrace.internal import _rand
+
+
+def test_random():
+    m = set()
+    for i in range(0, 2 ** 16):
+        n = _rand.rand64bits()
+        assert 0 <= n <= 2 ** 64 - 1
+        assert n not in m
+        m.add(n)
diff --git a/tox.ini b/tox.ini
@@ -158,7 +158,7 @@ isolated_build = true
 #      meaning running on py3.x will fail
 #      https://stackoverflow.com/questions/57459123/why-do-i-need-to-run-tox-twice-to-test-a-python-package-with-c-extension
 whitelist_externals=rm
-commands_pre=rm -f ddtrace/profiling/_build.c ddtrace/profiling/collector/stack.c ddtrace/profiling/collector/_traceback.c
+commands_pre=rm -f ddtrace/profiling/_build.c ddtrace/profiling/collector/stack.c ddtrace/profiling/collector/_traceback.c ddtrace/internal/_rand.c
              {envpython} {toxinidir}/setup.py develop
 usedevelop =
   # do not use develop mode with celery as running multiple python versions within

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,6 @@ exclude = '''`
`91`	`91`	`\| tests/`
`92`	`92`	`(`
`93`	`93`	`base`
`94`		`- \| benchmark.py`
`95`	`94`	`\| commands`
`96`	`95`	`\| contrib/`
`97`	`96`	`(`