-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
feat(aci): Separate Buffer for Workflows #97549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,11 @@ | |
|
||
from celery import Task | ||
|
||
from sentry import buffer, options | ||
from sentry.buffer.base import BufferField | ||
from sentry.buffer.redis import BufferHookEvent, redis_buffer_registry | ||
from sentry import options | ||
from sentry.buffer.base import Buffer, BufferField | ||
from sentry.db import models | ||
from sentry.utils import metrics | ||
from sentry.utils.lazy_service_wrapper import LazyServiceWrapper | ||
from sentry.utils.registry import NoRegistrationExistsError, Registry | ||
|
||
logger = logging.getLogger("sentry.delayed_processing") | ||
|
@@ -56,12 +56,19 @@ def get_buffer_keys(cls) -> list[str]: | |
for shard in range(cls.buffer_shards) | ||
] | ||
|
||
@staticmethod | ||
def buffer_backend() -> LazyServiceWrapper[Buffer]: | ||
raise NotImplementedError | ||
|
||
|
||
delayed_processing_registry = Registry[type[DelayedProcessingBase]]() | ||
|
||
|
||
def fetch_group_to_event_data( | ||
project_id: int, model: type[models.Model], batch_key: str | None = None | ||
buffer: LazyServiceWrapper[Buffer], | ||
project_id: int, | ||
model: type[models.Model], | ||
batch_key: str | None = None, | ||
) -> dict[str, str]: | ||
field: dict[str, models.Model | int | str] = { | ||
"project_id": project_id, | ||
|
@@ -70,7 +77,7 @@ def fetch_group_to_event_data( | |
if batch_key: | ||
field["batch_key"] = batch_key | ||
|
||
return buffer.backend.get_hash(model=model, field=field) | ||
return buffer.get_hash(model=model, field=field) | ||
|
||
|
||
def bucket_num_groups(num_groups: int) -> str: | ||
|
@@ -80,7 +87,9 @@ def bucket_num_groups(num_groups: int) -> str: | |
return "1" | ||
|
||
|
||
def process_in_batches(project_id: int, processing_type: str) -> None: | ||
def process_in_batches( | ||
buffer: LazyServiceWrapper[Buffer], project_id: int, processing_type: str | ||
) -> None: | ||
""" | ||
This will check the number of alertgroup_to_event_data items in the Redis buffer for a project. | ||
|
@@ -109,7 +118,7 @@ def process_in_batches(project_id: int, processing_type: str) -> None: | |
task = processing_info.processing_task | ||
filters: dict[str, BufferField] = asdict(hash_args.filters) | ||
|
||
event_count = buffer.backend.get_hash_length(model=hash_args.model, field=filters) | ||
event_count = buffer.get_hash_length(model=hash_args.model, field=filters) | ||
metrics.incr( | ||
f"{processing_type}.num_groups", tags={"num_groups": bucket_num_groups(event_count)} | ||
) | ||
|
@@ -127,22 +136,22 @@ def process_in_batches(project_id: int, processing_type: str) -> None: | |
) | ||
|
||
# if the dictionary is large, get the items and chunk them. | ||
alertgroup_to_event_data = fetch_group_to_event_data(project_id, hash_args.model) | ||
alertgroup_to_event_data = fetch_group_to_event_data(buffer, project_id, hash_args.model) | ||
|
||
with metrics.timer(f"{processing_type}.process_batch.duration"): | ||
items = iter(alertgroup_to_event_data.items()) | ||
|
||
while batch := dict(islice(items, batch_size)): | ||
batch_key = str(uuid.uuid4()) | ||
|
||
buffer.backend.push_to_hash_bulk( | ||
buffer.push_to_hash_bulk( | ||
model=hash_args.model, | ||
filters={**filters, "batch_key": batch_key}, | ||
data=batch, | ||
) | ||
|
||
# remove the batched items from the project alertgroup_to_event_data | ||
buffer.backend.delete_hash(**asdict(hash_args), fields=list(batch.keys())) | ||
buffer.delete_hash(**asdict(hash_args), fields=list(batch.keys())) | ||
|
||
task.apply_async( | ||
kwargs={"project_id": project_id, "batch_key": batch_key}, | ||
|
@@ -159,6 +168,8 @@ def process_buffer() -> None: | |
logger.info(log_name, extra={"option": handler.option}) | ||
continue | ||
|
||
buffer = handler.buffer_backend() | ||
|
||
with metrics.timer(f"{processing_type}.process_all_conditions.duration"): | ||
# We need to use a very fresh timestamp here; project scores (timestamps) are | ||
# updated with each relevant event, and some can be updated every few milliseconds. | ||
|
@@ -167,7 +178,7 @@ def process_buffer() -> None: | |
# retrieved and processed here. | ||
fetch_time = datetime.now(tz=timezone.utc).timestamp() | ||
buffer_keys = handler.get_buffer_keys() | ||
all_project_ids_and_timestamps = buffer.backend.bulk_get_sorted_set( | ||
all_project_ids_and_timestamps = buffer.bulk_get_sorted_set( | ||
buffer_keys, | ||
min=0, | ||
max=fetch_time, | ||
|
@@ -183,14 +194,10 @@ def process_buffer() -> None: | |
|
||
project_ids = list(all_project_ids_and_timestamps.keys()) | ||
for project_id in project_ids: | ||
process_in_batches(project_id, processing_type) | ||
process_in_batches(buffer, project_id, processing_type) | ||
|
||
buffer.backend.delete_keys( | ||
buffer.delete_keys( | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
buffer_keys, | ||
min=0, | ||
max=fetch_time, | ||
) | ||
|
||
|
||
if not redis_buffer_registry.has(BufferHookEvent.FLUSH): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 - i never liked this code. just curious, if we want to add another handler when the buffer is flushed, would we have to manually add it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we've made it so there isn't just One buffer anymore, and the dispatching to the appropriate buffer is now handled by the config in |
||
redis_buffer_registry.add_handler(BufferHookEvent.FLUSH, process_buffer) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from django.conf import settings | ||
|
||
import sentry.buffer as old_buffer | ||
from sentry import options | ||
from sentry.buffer.base import Buffer | ||
from sentry.utils.services import LazyServiceWrapper | ||
|
||
# Workflows-specific Buffer that can be configured separately from the default Buffer. | ||
_backend = LazyServiceWrapper( | ||
Buffer, settings.SENTRY_WORKFLOW_BUFFER, settings.SENTRY_WORKFLOW_BUFFER_OPTIONS | ||
) | ||
|
||
|
||
def get_backend() -> LazyServiceWrapper[Buffer]: | ||
""" | ||
Retrieve the appropriate Buffer to use for the workflow engine. | ||
""" | ||
if options.get("workflow_engine.buffer.use_new_buffer"): | ||
return _backend | ||
else: | ||
return old_buffer.backend |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we remove this yet? i'm mostly concerned about the legacy
delayed_processor
code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be okay, though it is hard to reason about.
The task now called
process_buffer
directly, and process buffer uses the registry to pick the right backend for the right delayed processor.Though, I didn't think too hard about what the registry was trying to accomplish, so I may be missing something. 😬