Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class CompressionAlgo(Enum):
from typing import Any
from typing import Sequence
from typing import Tuple
from typing_extensions import Literal
from typing_extensions import TypedDict

from sentry_sdk._types import (
Expand Down Expand Up @@ -528,6 +529,7 @@ def __init__(
profiles_sample_rate=None, # type: Optional[float]
profiles_sampler=None, # type: Optional[TracesSampler]
profiler_mode=None, # type: Optional[ProfilerMode]
profile_lifecycle="manual", # type: Literal["manual", "auto"]
profile_session_sample_rate=None, # type: Optional[float]
auto_enabling_integrations=True, # type: bool
disabled_integrations=None, # type: Optional[Sequence[sentry_sdk.integrations.Integration]]
Expand Down
159 changes: 133 additions & 26 deletions sentry_sdk/profiler/continuous_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import threading
import time
import uuid
from collections import deque
from datetime import datetime, timezone

from sentry_sdk.consts import VERSION
Expand All @@ -27,6 +28,7 @@
if TYPE_CHECKING:
from typing import Any
from typing import Callable
from typing import Deque
from typing import Dict
from typing import List
from typing import Optional
Expand Down Expand Up @@ -120,6 +122,9 @@ def setup_continuous_profiler(options, sdk_info, capture_func):

def try_autostart_continuous_profiler():
# type: () -> None

# TODO: deprecate this as it'll be replaced by the auto lifecycle option

if _scheduler is None:
return

Expand All @@ -129,6 +134,22 @@ def try_autostart_continuous_profiler():
_scheduler.manual_start()


def try_profile_lifecycle_auto_start():
# type: () -> bool
if _scheduler is None:
return False

return _scheduler.auto_start()


def try_profile_lifecycle_auto_stop():
# type: () -> None
if _scheduler is None:
return

_scheduler.auto_stop()


def start_profiler():
# type: () -> None
if _scheduler is None:
Expand Down Expand Up @@ -179,16 +200,22 @@ def __init__(self, frequency, options, sdk_info, capture_func):
self.options = options
self.sdk_info = sdk_info
self.capture_func = capture_func

self.lifecycle = self.options.get("profile_lifecycle")
profile_session_sample_rate = self.options.get("profile_session_sample_rate")
self.sampled = determine_profile_session_sampling_decision(
profile_session_sample_rate
)

self.sampler = self.make_sampler()
self.buffer = None # type: Optional[ProfileBuffer]
self.pid = None # type: Optional[int]

self.running = False

profile_session_sample_rate = self.options.get("profile_session_sample_rate")
self.sampled = determine_profile_session_sampling_decision(
profile_session_sample_rate
)
self.active_spans = 0
self.started_spans = deque(maxlen=128) # type: Deque[None]
self.finished_spans = deque(maxlen=128) # type: Deque[None]
Copy link
Contributor

@sentrivana sentrivana Feb 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the thought behind having these be deques? Is it so that the counters are thread safe?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Because we need to count the number of active spans (just transactions here) in order to determine if we should run the profiler. This is tracked across all threads so we need a thread safe way of counting them. To avoid using a python lock which can have significant performance impact, we work around it with a deque.

I'm also updating it to reflect the implementation in transaction_profiler.py more as I realize this current implementation can have some issues when the deque overflows while the other implementation handles it better.


def is_auto_start_enabled(self):
# type: () -> bool
Expand All @@ -207,15 +234,45 @@ def is_auto_start_enabled(self):

return experiments.get("continuous_profiling_auto_start")

def auto_start(self):
# type: () -> bool
if not self.sampled:
return False

if self.lifecycle != "auto":
return False

logger.debug("[Profiling] Auto starting profiler")

self.started_spans.append(None)
self.ensure_running()

return True

def auto_stop(self):
# type: () -> None
if self.lifecycle != "auto":
return

logger.debug("[Profiling] Auto stopping profiler")

self.finished_spans.append(None)

def manual_start(self):
# type: () -> None
if not self.sampled:
return

if self.lifecycle != "manual":
return

self.ensure_running()

def manual_stop(self):
# type: () -> None
if self.lifecycle != "manual":
return

self.teardown()

def ensure_running(self):
Expand Down Expand Up @@ -249,28 +306,77 @@ def make_sampler(self):

cache = LRUCache(max_size=256)

def _sample_stack(*args, **kwargs):
# type: (*Any, **Any) -> None
"""
Take a sample of the stack on all the threads in the process.
This should be called at a regular interval to collect samples.
"""

ts = now()

try:
sample = [
(str(tid), extract_stack(frame, cache, cwd))
for tid, frame in sys._current_frames().items()
]
except AttributeError:
# For some reason, the frame we get doesn't have certain attributes.
# When this happens, we abandon the current sample as it's bad.
capture_internal_exception(sys.exc_info())
return

if self.buffer is not None:
self.buffer.write(ts, sample)
if self.lifecycle == "auto":

def _sample_stack(*args, **kwargs):
# type: (*Any, **Any) -> None
"""
Take a sample of the stack on all the threads in the process.
This should be called at a regular interval to collect samples.
"""

if (
not self.active_spans
and not self.started_spans
and not self.finished_spans
):
self.running = False
return

started_spans = len(self.started_spans)
finished_spans = len(self.finished_spans)

ts = now()

try:
sample = [
(str(tid), extract_stack(frame, cache, cwd))
for tid, frame in sys._current_frames().items()
]
except AttributeError:
# For some reason, the frame we get doesn't have certain attributes.
# When this happens, we abandon the current sample as it's bad.
capture_internal_exception(sys.exc_info())
return

for _ in range(started_spans):
self.started_spans.popleft()

for _ in range(finished_spans):
self.finished_spans.popleft()

self.active_spans = self.active_spans + started_spans - finished_spans

if self.buffer is None:
self.reset_buffer()

if self.buffer is not None:
self.buffer.write(ts, sample)

else:

def _sample_stack(*args, **kwargs):
# type: (*Any, **Any) -> None
"""
Take a sample of the stack on all the threads in the process.
This should be called at a regular interval to collect samples.
"""

ts = now()

try:
sample = [
(str(tid), extract_stack(frame, cache, cwd))
for tid, frame in sys._current_frames().items()
]
except AttributeError:
# For some reason, the frame we get doesn't have certain attributes.
# When this happens, we abandon the current sample as it's bad.
capture_internal_exception(sys.exc_info())
return

if self.buffer is not None:
self.buffer.write(ts, sample)

return _sample_stack

Expand All @@ -294,6 +400,7 @@ def run(self):

if self.buffer is not None:
self.buffer.flush()
self.buffer = None


class ThreadContinuousScheduler(ContinuousScheduler):
Expand Down
14 changes: 13 additions & 1 deletion sentry_sdk/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
from sentry_sdk.attachments import Attachment
from sentry_sdk.consts import DEFAULT_MAX_BREADCRUMBS, FALSE_VALUES, INSTRUMENTER
from sentry_sdk.feature_flags import FlagBuffer, DEFAULT_FLAG_CAPACITY
from sentry_sdk.profiler.continuous_profiler import try_autostart_continuous_profiler
from sentry_sdk.profiler.continuous_profiler import (
get_profiler_id,
try_autostart_continuous_profiler,
try_profile_lifecycle_auto_start,
)
from sentry_sdk.profiler.transaction_profiler import Profile
from sentry_sdk.session import Session
from sentry_sdk.tracing_utils import (
Expand Down Expand Up @@ -1051,6 +1055,14 @@ def start_transaction(

transaction._profile = profile

transaction._started_profile_lifecycle = try_profile_lifecycle_auto_start()

# Typically, the profiler is set when the transaction is created. But when
# using the auto lifecycle, the profiler isn't running when the first
# transaction is started. So make sure we update the profiler id on it.
if transaction._started_profile_lifecycle:
transaction.set_profiler_id(get_profiler_id())

# we don't bother to keep spans if we already know we're not going to
# send the transaction
max_spans = (client.options["_experiments"].get("max_spans")) or 1000
Expand Down
10 changes: 9 additions & 1 deletion sentry_sdk/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import sentry_sdk
from sentry_sdk.consts import INSTRUMENTER, SPANSTATUS, SPANDATA
from sentry_sdk.profiler.continuous_profiler import get_profiler_id
from sentry_sdk.profiler.continuous_profiler import (
get_profiler_id,
try_profile_lifecycle_auto_stop,
)
from sentry_sdk.utils import (
get_current_thread_meta,
is_valid_sample_rate,
Expand Down Expand Up @@ -268,6 +271,7 @@ class Span:
"scope",
"origin",
"name",
"_started_profile_lifecycle",
)

def __init__(
Expand Down Expand Up @@ -790,6 +794,7 @@ def __init__( # type: ignore[misc]
self._profile = (
None
) # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile]
self._started_profile_lifecycle = False # type: bool
self._baggage = baggage

def __repr__(self):
Expand Down Expand Up @@ -842,6 +847,9 @@ def __exit__(self, ty, value, tb):
if self._profile is not None:
self._profile.__exit__(ty, value, tb)

if self._started_profile_lifecycle:
try_profile_lifecycle_auto_stop()

super().__exit__(ty, value, tb)

@property
Expand Down
Loading
Loading