perf: improve performance of internal RateLimiter class

brettlangdon · web-flow · commit fc8eb2801560 · 2021-12-16T20:51:49.000Z
The main improvement comes from removing the calls to `compat.monotonic`, and instead passing in the `span.start_ns` to `RateLimiter.is_allowed`. This change removes 2 calls to `compat.monotonic()` and drastically improves the performance of the rate limiter. See #3041 for more details.
diff --git a/ddtrace/internal/rate_limiter.py b/ddtrace/internal/rate_limiter.py
@@ -13,8 +13,8 @@ class RateLimiter(object):
 
     __slots__ = (
         "_lock",
-        "current_window",
-        "last_update",
+        "current_window_ns",
+        "last_update_ns",
         "max_tokens",
         "prev_window_rate",
         "rate_limit",
@@ -38,54 +38,54 @@ def __init__(self, rate_limit):
         self.tokens = rate_limit  # type: float
         self.max_tokens = rate_limit
 
-        self.last_update = compat.monotonic()
+        self.last_update_ns = compat.monotonic_ns()
 
-        self.current_window = 0  # type: float
+        self.current_window_ns = 0  # type: float
         self.tokens_allowed = 0
         self.tokens_total = 0
         self.prev_window_rate = None  # type: Optional[float]
 
         self._lock = threading.Lock()
 
-    def is_allowed(self):
-        # type: () -> bool
+    def is_allowed(self, timestamp_ns):
+        # type: (int) -> bool
         """
         Check whether the current request is allowed or not
 
         This method will also reduce the number of available tokens by 1
 
+        :param int timestamp_ns: timestamp in nanoseconds for the current request.
         :returns: Whether the current request is allowed or not
         :rtype: :obj:`bool`
         """
         # Determine if it is allowed
-        allowed = self._is_allowed()
+        allowed = self._is_allowed(timestamp_ns)
         # Update counts used to determine effective rate
-        self._update_rate_counts(allowed)
+        self._update_rate_counts(allowed, timestamp_ns)
         return allowed
 
-    def _update_rate_counts(self, allowed):
-        # type: (bool) -> None
-        now = compat.monotonic()
-
+    def _update_rate_counts(self, allowed, timestamp_ns):
+        # type: (bool, int) -> None
         # No tokens have been seen yet, start a new window
-        if not self.current_window:
-            self.current_window = now
+        if not self.current_window_ns:
+            self.current_window_ns = timestamp_ns
 
         # If more than 1 second has past since last window, reset
-        elif now - self.current_window >= 1.0:
+        # DEV: We are comparing nanoseconds, so 1e9 is 1 second
+        elif timestamp_ns - self.current_window_ns >= 1e9:
             # Store previous window's rate to average with current for `.effective_rate`
             self.prev_window_rate = self._current_window_rate()
             self.tokens_allowed = 0
             self.tokens_total = 0
-            self.current_window = now
+            self.current_window_ns = timestamp_ns
 
         # Keep track of total tokens seen vs allowed
         if allowed:
             self.tokens_allowed += 1
         self.tokens_total += 1
 
-    def _is_allowed(self):
-        # type: () -> bool
+    def _is_allowed(self, timestamp_ns):
+        # type: (int) -> bool
         # Rate limit of 0 blocks everything
         if self.rate_limit == 0:
             return False
@@ -96,24 +96,24 @@ def _is_allowed(self):
 
         # Lock, we need this to be thread safe, it should be shared by all threads
         with self._lock:
-            self._replenish()
+            self._replenish(timestamp_ns)
 
             if self.tokens >= 1:
                 self.tokens -= 1
                 return True
 
             return False
 
-    def _replenish(self):
-        # type: () -> None
+    def _replenish(self, timestamp_ns):
+        # type: (int) -> None
         # If we are at the max, we do not need to add any more
         if self.tokens == self.max_tokens:
             return
 
         # Add more available tokens based on how much time has passed
-        now = compat.monotonic()
-        elapsed = now - self.last_update
-        self.last_update = now
+        # DEV: We store as nanoseconds, convert to seconds
+        elapsed = (timestamp_ns - self.last_update_ns) / 1e9
+        self.last_update_ns = timestamp_ns
 
         # Update the number of available tokens, but ensure we do not exceed the max
         self.tokens = min(
@@ -147,11 +147,11 @@ def effective_rate(self):
         return (self._current_window_rate() + self.prev_window_rate) / 2.0
 
     def __repr__(self):
-        return "{}(rate_limit={!r}, tokens={!r}, last_update={!r}, effective_rate={!r})".format(
+        return "{}(rate_limit={!r}, tokens={!r}, last_update_ns={!r}, effective_rate={!r})".format(
             self.__class__.__name__,
             self.rate_limit,
             self.tokens,
-            self.last_update,
+            self.last_update_ns,
             self.effective_rate,
         )
 
diff --git a/ddtrace/sampler.py b/ddtrace/sampler.py
@@ -290,7 +290,7 @@ def sample(self, span):
             self._set_priority(span, USER_KEEP)
 
         # Ensure all allowed traces adhere to the global rate limit
-        allowed = self.limiter.is_allowed()
+        allowed = self.limiter.is_allowed(span.start_ns)
         # Always set the sample rate metric whether it was allowed or not
         # DEV: Setting this allows us to properly compute metrics and debug the
         #      various sample rates that are getting applied to this span
diff --git a/ddtrace/span.py b/ddtrace/span.py
@@ -140,7 +140,7 @@ def __init__(
         self.metrics = {}  # type: _MetricDictType
 
         # timing
-        self.start_ns = time_ns() if start is None else int(start * 1e9)
+        self.start_ns = time_ns() if start is None else int(start * 1e9)  # type: int
         self.duration_ns = None  # type: Optional[int]
 
         # tracing
diff --git a/tests/tracer/test_rate_limiter.py b/tests/tracer/test_rate_limiter.py