Skip to content

Commit a333ae5

Browse files
armenzgcursoragentgetsantry[bot]
authored
feat(code_review): Validate before scheduling (#108545)
<!-- Describe your PR here. --> Adds pre-scheduling validation for `event_payload` within `schedule_task` to prevent invalid tasks from being queued. This addresses issues where model changes could lead to tasks being scheduled with incompatible payloads when task brokers are updated before Sentry workers, causing failures downstream. Validation now occurs before the Celery task is dispatched, catching schema mismatches early and improving system robustness. --- Linear Issue: [CW-837](https://linear.app/getsentry/issue/CW-837/validate-the-event-payload-before-scheduling-as-a-task) <p><a href="https://cursor.com/background-agent?bcId=bc-888aa40a-d917-4614-bde4-31cbca092587"><picture><source media="(prefers-color-scheme: dark)" srcset="https://cursor.com/assets/images/open-in-cursor-dark.png"><source media="(prefers-color-scheme: light)" srcset="https://cursor.com/assets/images/open-in-cursor-light.png"><img alt="Open in Cursor" width="131" height="28" src="https://cursor.com/assets/images/open-in-cursor-dark.png"></picture></a>&nbsp;<a href="https://cursor.com/agents?id=bc-888aa40a-d917-4614-bde4-31cbca092587"><picture><source media="(prefers-color-scheme: dark)" srcset="https://cursor.com/assets/images/open-in-web-dark.png"><source media="(prefers-color-scheme: light)" srcset="https://cursor.com/assets/images/open-in-web-light.png"><img alt="Open in Web" width="114" height="28" src="https://cursor.com/assets/images/open-in-web-dark.png"></picture></a></p> --------- Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Armen Zambrano G. <armenzg@users.noreply.github.com> Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 9d5491e commit a333ae5

File tree

3 files changed

+86
-121
lines changed

3 files changed

+86
-121
lines changed

src/sentry/seer/code_review/webhooks/task.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,34 @@ def schedule_task(
6767
)
6868
return
6969

70+
# Validate payload before scheduling to catch schema mismatches early
71+
from pydantic import ValidationError
72+
73+
try:
74+
request_type = transformed_event.get("request_type")
75+
validated_payload: (
76+
SeerCodeReviewTaskRequestForPrClosed | SeerCodeReviewTaskRequestForPrReview
77+
)
78+
if request_type == "pr-closed":
79+
validated_payload = SeerCodeReviewTaskRequestForPrClosed.parse_obj(transformed_event)
80+
else:
81+
validated_payload = SeerCodeReviewTaskRequestForPrReview.parse_obj(transformed_event)
82+
# Convert to dict and handle enum keys (Pydantic v1 converts string keys to enums,
83+
# but JSON requires string keys, so we need to convert them back)
84+
payload = convert_enum_keys_to_strings(validated_payload.dict())
85+
# When upgrading to Pydantic v2, we can remove the convert_enum_keys_to_strings call.
86+
# Pydantic v2 will automatically convert enum keys to strings.
87+
# payload = validated_payload.model_dump(mode="json")
88+
except ValidationError:
89+
logger.warning("%s.validation_failed_before_scheduling", PREFIX)
90+
record_webhook_filtered(
91+
github_event, github_event_action, WebhookFilteredReason.INVALID_PAYLOAD
92+
)
93+
return
94+
7095
process_github_webhook_event.delay(
7196
github_event=github_event.value,
72-
event_payload=transformed_event,
97+
event_payload=payload,
7398
enqueued_at_str=datetime.now(timezone.utc).isoformat(),
7499
tags=tags,
75100
)
@@ -96,7 +121,7 @@ def process_github_webhook_event(
96121
Args:
97122
enqueued_at_str: The timestamp when the task was enqueued
98123
github_event: The GitHub webhook event type from X-GitHub-Event header (e.g., "check_run", "pull_request")
99-
event_payload: The payload of the webhook event
124+
event_payload: The payload of the webhook event (already validated before scheduling)
100125
tags: Sentry SDK tags to set on this task's scope for error correlation
101126
**kwargs: Parameters to pass to webhook handler functions
102127
"""
@@ -106,29 +131,7 @@ def process_github_webhook_event(
106131
if tags:
107132
sentry_sdk.set_tags(tags)
108133
path = get_seer_endpoint_for_event(github_event).value
109-
110-
# Validate payload with Pydantic (except for CHECK_RUN events which use minimal payload)
111-
if github_event != GithubWebhookType.CHECK_RUN:
112-
# Parse with appropriate model based on request type to enforce
113-
# organization_id and integration_id requirements for PR closed
114-
request_type = event_payload.get("request_type")
115-
validated_payload: (
116-
SeerCodeReviewTaskRequestForPrClosed | SeerCodeReviewTaskRequestForPrReview
117-
)
118-
if request_type == "pr-closed":
119-
validated_payload = SeerCodeReviewTaskRequestForPrClosed.parse_obj(event_payload)
120-
else:
121-
validated_payload = SeerCodeReviewTaskRequestForPrReview.parse_obj(event_payload)
122-
# Convert to dict and handle enum keys (Pydantic v1 converts string keys to enums,
123-
# but JSON requires string keys, so we need to convert them back)
124-
payload = convert_enum_keys_to_strings(validated_payload.dict())
125-
# When upgrading to Pydantic v2, we can remove the convert_enum_keys_to_strings call.
126-
# Pydantic v2 will automatically convert enum keys to strings.
127-
# payload = validated_payload.model_dump(mode="json")
128-
else:
129-
payload = event_payload
130-
131-
make_seer_request(path=path, payload=payload)
134+
make_seer_request(path=path, payload=event_payload)
132135
except Exception as e:
133136
status = e.__class__.__name__
134137
# Retryable errors are automatically retried by taskworker.

tests/sentry/seer/code_review/test_webhooks.py

Lines changed: 0 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -581,102 +581,6 @@ def test_pr_review_validation_passes_without_organization_id(
581581

582582
assert mock_request.call_count == 1
583583

584-
@patch("sentry.seer.code_review.utils.make_signed_seer_api_request")
585-
def test_pr_closed_validation_fails_without_organization_id(
586-
self, mock_request: MagicMock
587-
) -> None:
588-
"""Test that PR closed validation fails without organization_id (it's required)."""
589-
mock_request.return_value = self._mock_response(200, b"{}")
590-
591-
event_payload = {
592-
"request_type": "pr-closed",
593-
"external_owner_id": "456",
594-
"data": {
595-
"repo": {
596-
"provider": "github",
597-
"owner": "test-owner",
598-
"name": "test-repo",
599-
"external_id": "123456",
600-
"base_commit_sha": "abc123",
601-
"integration_id": "99999",
602-
# organization_id intentionally omitted
603-
},
604-
"pr_id": 123,
605-
"bug_prediction_specific_information": {
606-
"organization_id": 789,
607-
"organization_slug": "test-org",
608-
},
609-
"config": {
610-
"features": {"bug_prediction": True},
611-
"trigger": "on_new_commit",
612-
"trigger_at": "2024-01-15T10:30:00Z",
613-
"sentry_received_trigger_at": "2024-01-15T10:30:00Z",
614-
},
615-
},
616-
}
617-
618-
# Should raise validation error
619-
from pydantic import ValidationError
620-
621-
with pytest.raises(ValidationError) as exc_info:
622-
process_github_webhook_event._func(
623-
github_event=GithubWebhookType.PULL_REQUEST,
624-
event_payload=event_payload,
625-
enqueued_at_str=self.enqueued_at_str,
626-
)
627-
628-
# Verify the error is about organization_id
629-
errors = exc_info.value.errors()
630-
assert any("organization_id" in str(error) for error in errors)
631-
632-
@patch("sentry.seer.code_review.utils.make_signed_seer_api_request")
633-
def test_pr_closed_validation_fails_without_integration_id(
634-
self, mock_request: MagicMock
635-
) -> None:
636-
"""Test that PR closed validation fails without integration_id (it's required)."""
637-
mock_request.return_value = self._mock_response(200, b"{}")
638-
639-
event_payload = {
640-
"request_type": "pr-closed",
641-
"external_owner_id": "456",
642-
"data": {
643-
"repo": {
644-
"provider": "github",
645-
"owner": "test-owner",
646-
"name": "test-repo",
647-
"external_id": "123456",
648-
"base_commit_sha": "abc123",
649-
"organization_id": 789,
650-
# integration_id intentionally omitted
651-
},
652-
"pr_id": 123,
653-
"bug_prediction_specific_information": {
654-
"organization_id": 789,
655-
"organization_slug": "test-org",
656-
},
657-
"config": {
658-
"features": {"bug_prediction": True},
659-
"trigger": "on_new_commit",
660-
"trigger_at": "2024-01-15T10:30:00Z",
661-
"sentry_received_trigger_at": "2024-01-15T10:30:00Z",
662-
},
663-
},
664-
}
665-
666-
# Should raise validation error
667-
from pydantic import ValidationError
668-
669-
with pytest.raises(ValidationError) as exc_info:
670-
process_github_webhook_event._func(
671-
github_event=GithubWebhookType.PULL_REQUEST,
672-
event_payload=event_payload,
673-
enqueued_at_str=self.enqueued_at_str,
674-
)
675-
676-
# Verify the error is about integration_id
677-
errors = exc_info.value.errors()
678-
assert any("integration_id" in str(error) for error in errors)
679-
680584
@patch("sentry.seer.code_review.utils.make_signed_seer_api_request")
681585
def test_pr_closed_validation_passes_with_required_fields(
682586
self, mock_request: MagicMock

tests/sentry/seer/code_review/webhooks/test_pull_request.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,61 @@ def test_pull_request_closed_draft_still_sends_to_seer(self) -> None:
381381
call_kwargs = self.mock_seer.call_args[1]
382382
payload = call_kwargs["payload"]
383383
assert payload["request_type"] == SeerCodeReviewRequestType.PR_CLOSED.value
384+
385+
def test_validation_happens_before_task_scheduling_pr_closed(self) -> None:
386+
"""Test that invalid pr-closed payloads are caught before scheduling the Celery task."""
387+
with (
388+
self.code_review_setup(),
389+
self.tasks(),
390+
patch(
391+
"sentry.seer.code_review.webhooks.task.transform_webhook_to_codegen_request"
392+
) as mock_transform,
393+
):
394+
# Return an invalid payload missing required fields for pr-closed
395+
mock_transform.return_value = {
396+
"request_type": "pr-closed",
397+
"data": {
398+
# Missing required fields like repo, pr_id, etc.
399+
"invalid": "payload"
400+
},
401+
}
402+
403+
event = orjson.loads(PULL_REQUEST_OPENED_EVENT_EXAMPLE)
404+
event["action"] = "closed"
405+
406+
self._send_webhook_event(
407+
GithubWebhookType.PULL_REQUEST,
408+
orjson.dumps(event),
409+
)
410+
411+
# Task should NOT be scheduled due to validation failure
412+
self.mock_seer.assert_not_called()
413+
414+
def test_validation_happens_before_task_scheduling_pr_review(self) -> None:
415+
"""Test that invalid pr-review payloads are caught before scheduling the Celery task."""
416+
with (
417+
self.code_review_setup(),
418+
self.tasks(),
419+
patch(
420+
"sentry.seer.code_review.webhooks.task.transform_webhook_to_codegen_request"
421+
) as mock_transform,
422+
):
423+
# Return an invalid payload missing required fields for pr-review
424+
mock_transform.return_value = {
425+
"request_type": "pr-review",
426+
"data": {
427+
# Missing required fields
428+
"invalid": "payload"
429+
},
430+
}
431+
432+
event = orjson.loads(PULL_REQUEST_OPENED_EVENT_EXAMPLE)
433+
event["action"] = "opened"
434+
435+
self._send_webhook_event(
436+
GithubWebhookType.PULL_REQUEST,
437+
orjson.dumps(event),
438+
)
439+
440+
# Task should NOT be scheduled due to validation failure
441+
self.mock_seer.assert_not_called()

0 commit comments

Comments
 (0)