Skip to content

Commit 0661bce

Browse files
authored
fix(aws): Don't crash if event isn't a single dict (#915)
Per https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html, the `event` argument passed to the lambda function handler can be any jsonifiable type - string, int, list, etc - rather than just the dictionary we've previously assumed it to be. (This is particularly relevant for batch requests, which come in as a list of event dictionaries.) When faced with such an `event`, our current integration crashes, because it tries to run `.get()` on it. This fixes that, by introducing the following behavior: - If `event` is a list, tag the transaction as a batch and with the batch size. - If `event` is a list, take the first entry as representative for the purposes of grabbing request data. - If `event` (or the representative) isn't a dictionary, handle it gracefully and move on without request data.
1 parent 7fe9e06 commit 0661bce

File tree

2 files changed

+208
-33
lines changed

2 files changed

+208
-33
lines changed

sentry_sdk/integrations/aws_lambda.py

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ def sentry_init_error(*args, **kwargs):
5151

5252
exc_info = sys.exc_info()
5353
if exc_info and all(exc_info):
54-
event, hint = event_from_exception(
54+
sentry_event, hint = event_from_exception(
5555
exc_info,
5656
client_options=client.options,
5757
mechanism={"type": "aws_lambda", "handled": False},
5858
)
59-
hub.capture_event(event, hint=hint)
59+
hub.capture_event(sentry_event, hint=hint)
6060

6161
return init_error(*args, **kwargs)
6262

@@ -65,12 +65,36 @@ def sentry_init_error(*args, **kwargs):
6565

6666
def _wrap_handler(handler):
6767
# type: (F) -> F
68-
def sentry_handler(event, context, *args, **kwargs):
68+
def sentry_handler(aws_event, context, *args, **kwargs):
6969
# type: (Any, Any, *Any, **Any) -> Any
70+
71+
# Per https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html,
72+
# `event` here is *likely* a dictionary, but also might be a number of
73+
# other types (str, int, float, None).
74+
#
75+
# In some cases, it is a list (if the user is batch-invoking their
76+
# function, for example), in which case we'll use the first entry as a
77+
# representative from which to try pulling request data. (Presumably it
78+
# will be the same for all events in the list, since they're all hitting
79+
# the lambda in the same request.)
80+
81+
if isinstance(aws_event, list):
82+
request_data = aws_event[0]
83+
batch_size = len(aws_event)
84+
else:
85+
request_data = aws_event
86+
batch_size = 1
87+
88+
if not isinstance(request_data, dict):
89+
# If we're not dealing with a dictionary, we won't be able to get
90+
# headers, path, http method, etc in any case, so it's fine that
91+
# this is empty
92+
request_data = {}
93+
7094
hub = Hub.current
7195
integration = hub.get_integration(AwsLambdaIntegration)
7296
if integration is None:
73-
return handler(event, context, *args, **kwargs)
97+
return handler(aws_event, context, *args, **kwargs)
7498

7599
# If an integration is there, a client has to be there.
76100
client = hub.client # type: Any
@@ -80,9 +104,14 @@ def sentry_handler(event, context, *args, **kwargs):
80104
with capture_internal_exceptions():
81105
scope.clear_breadcrumbs()
82106
scope.add_event_processor(
83-
_make_request_event_processor(event, context, configured_time)
107+
_make_request_event_processor(
108+
request_data, context, configured_time
109+
)
84110
)
85111
scope.set_tag("aws_region", context.invoked_function_arn.split(":")[3])
112+
if batch_size > 1:
113+
scope.set_tag("batch_request", True)
114+
scope.set_tag("batch_size", batch_size)
86115

87116
timeout_thread = None
88117
# Starting the Timeout thread only if the configured time is greater than Timeout warning
@@ -103,21 +132,21 @@ def sentry_handler(event, context, *args, **kwargs):
103132
# Starting the thread to raise timeout warning exception
104133
timeout_thread.start()
105134

106-
headers = event.get("headers", {})
135+
headers = request_data.get("headers", {})
107136
transaction = Transaction.continue_from_headers(
108137
headers, op="serverless.function", name=context.function_name
109138
)
110139
with hub.start_transaction(transaction):
111140
try:
112-
return handler(event, context, *args, **kwargs)
141+
return handler(aws_event, context, *args, **kwargs)
113142
except Exception:
114143
exc_info = sys.exc_info()
115-
event, hint = event_from_exception(
144+
sentry_event, hint = event_from_exception(
116145
exc_info,
117146
client_options=client.options,
118147
mechanism={"type": "aws_lambda", "handled": False},
119148
)
120-
hub.capture_event(event, hint=hint)
149+
hub.capture_event(sentry_event, hint=hint)
121150
reraise(*exc_info)
122151
finally:
123152
if timeout_thread:
@@ -255,12 +284,12 @@ def _make_request_event_processor(aws_event, aws_context, configured_timeout):
255284
# type: (Any, Any, Any) -> EventProcessor
256285
start_time = datetime.utcnow()
257286

258-
def event_processor(event, hint, start_time=start_time):
287+
def event_processor(sentry_event, hint, start_time=start_time):
259288
# type: (Event, Hint, datetime) -> Optional[Event]
260289
remaining_time_in_milis = aws_context.get_remaining_time_in_millis()
261290
exec_duration = configured_timeout - remaining_time_in_milis
262291

263-
extra = event.setdefault("extra", {})
292+
extra = sentry_event.setdefault("extra", {})
264293
extra["lambda"] = {
265294
"function_name": aws_context.function_name,
266295
"function_version": aws_context.function_version,
@@ -276,7 +305,7 @@ def event_processor(event, hint, start_time=start_time):
276305
"log_stream": aws_context.log_stream_name,
277306
}
278307

279-
request = event.get("request", {})
308+
request = sentry_event.get("request", {})
280309

281310
if "httpMethod" in aws_event:
282311
request["method"] = aws_event["httpMethod"]
@@ -290,7 +319,7 @@ def event_processor(event, hint, start_time=start_time):
290319
request["headers"] = _filter_headers(aws_event["headers"])
291320

292321
if _should_send_default_pii():
293-
user_info = event.setdefault("user", {})
322+
user_info = sentry_event.setdefault("user", {})
294323

295324
id = aws_event.get("identity", {}).get("userArn")
296325
if id is not None:
@@ -308,31 +337,31 @@ def event_processor(event, hint, start_time=start_time):
308337
# event. Meaning every body is unstructured to us.
309338
request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]})
310339

311-
event["request"] = request
340+
sentry_event["request"] = request
312341

313-
return event
342+
return sentry_event
314343

315344
return event_processor
316345

317346

318-
def _get_url(event, context):
347+
def _get_url(aws_event, aws_context):
319348
# type: (Any, Any) -> str
320-
path = event.get("path", None)
321-
headers = event.get("headers", {})
349+
path = aws_event.get("path", None)
350+
headers = aws_event.get("headers", {})
322351
host = headers.get("Host", None)
323352
proto = headers.get("X-Forwarded-Proto", None)
324353
if proto and host and path:
325354
return "{}://{}{}".format(proto, host, path)
326-
return "awslambda:///{}".format(context.function_name)
355+
return "awslambda:///{}".format(aws_context.function_name)
327356

328357

329-
def _get_cloudwatch_logs_url(context, start_time):
358+
def _get_cloudwatch_logs_url(aws_context, start_time):
330359
# type: (Any, datetime) -> str
331360
"""
332361
Generates a CloudWatchLogs console URL based on the context object
333362
334363
Arguments:
335-
context {Any} -- context from lambda handler
364+
aws_context {Any} -- context from lambda handler
336365
337366
Returns:
338367
str -- AWS Console URL to logs.
@@ -345,8 +374,8 @@ def _get_cloudwatch_logs_url(context, start_time):
345374
";start={start_time};end={end_time}"
346375
).format(
347376
region=environ.get("AWS_REGION"),
348-
log_group=context.log_group_name,
349-
log_stream=context.log_stream_name,
377+
log_group=aws_context.log_group_name,
378+
log_stream=aws_context.log_stream_name,
350379
start_time=(start_time - timedelta(seconds=1)).strftime(formatstring),
351380
end_time=(datetime.utcnow() + timedelta(seconds=2)).strftime(formatstring),
352381
)

tests/integrations/aws_lambda/test_aws.py

Lines changed: 156 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,21 +35,37 @@
3535
from sentry_sdk.transport import HttpTransport
3636
3737
def event_processor(event):
38-
# AWS Lambda truncates the log output to 4kb. If you only need a
39-
# subsection of the event, override this function in your test
40-
# to print less to logs.
41-
return event
38+
# AWS Lambda truncates the log output to 4kb, which is small enough to miss
39+
# parts of even a single error-event/transaction-envelope pair if considered
40+
# in full, so only grab the data we need.
41+
42+
event_data = {}
43+
event_data["contexts"] = {}
44+
event_data["contexts"]["trace"] = event.get("contexts", {}).get("trace")
45+
event_data["exception"] = event.get("exception")
46+
event_data["extra"] = event.get("extra")
47+
event_data["level"] = event.get("level")
48+
event_data["request"] = event.get("request")
49+
event_data["tags"] = event.get("tags")
50+
event_data["transaction"] = event.get("transaction")
51+
52+
return event_data
4253
4354
def envelope_processor(envelope):
55+
# AWS Lambda truncates the log output to 4kb, which is small enough to miss
56+
# parts of even a single error-event/transaction-envelope pair if considered
57+
# in full, so only grab the data we need.
58+
4459
(item,) = envelope.items
4560
envelope_json = json.loads(item.get_bytes())
4661
4762
envelope_data = {}
48-
envelope_data[\"contexts\"] = {}
49-
envelope_data[\"type\"] = envelope_json[\"type\"]
50-
envelope_data[\"transaction\"] = envelope_json[\"transaction\"]
51-
envelope_data[\"contexts\"][\"trace\"] = envelope_json[\"contexts\"][\"trace\"]
52-
envelope_data[\"request\"] = envelope_json[\"request\"]
63+
envelope_data["contexts"] = {}
64+
envelope_data["type"] = envelope_json["type"]
65+
envelope_data["transaction"] = envelope_json["transaction"]
66+
envelope_data["contexts"]["trace"] = envelope_json["contexts"]["trace"]
67+
envelope_data["request"] = envelope_json["request"]
68+
envelope_data["tags"] = envelope_json["tags"]
5369
5470
return envelope_data
5571
@@ -107,10 +123,15 @@ def inner(code, payload, timeout=30, syntax_check=True):
107123
syntax_check=syntax_check,
108124
)
109125

126+
# for better debugging
127+
response["LogResult"] = base64.b64decode(response["LogResult"]).splitlines()
128+
response["Payload"] = response["Payload"].read()
129+
del response["ResponseMetadata"]
130+
110131
events = []
111132
envelopes = []
112133

113-
for line in base64.b64decode(response["LogResult"]).splitlines():
134+
for line in response["LogResult"]:
114135
print("AWS:", line)
115136
if line.startswith(b"EVENT: "):
116137
line = line[len(b"EVENT: ") :]
@@ -362,3 +383,128 @@ def test_handler(event, context):
362383
assert envelope["contexts"]["trace"]["op"] == "serverless.function"
363384
assert envelope["transaction"].startswith("test_function_")
364385
assert envelope["transaction"] in envelope["request"]["url"]
386+
387+
388+
@pytest.mark.parametrize(
389+
"aws_event, has_request_data, batch_size",
390+
[
391+
(b"1231", False, 1),
392+
(b"11.21", False, 1),
393+
(b'"Good dog!"', False, 1),
394+
(b"true", False, 1),
395+
(
396+
b"""
397+
[
398+
{"good dog": "Maisey"},
399+
{"good dog": "Charlie"},
400+
{"good dog": "Cory"},
401+
{"good dog": "Bodhi"}
402+
]
403+
""",
404+
False,
405+
4,
406+
),
407+
(
408+
b"""
409+
[
410+
{
411+
"headers": {
412+
"Host": "dogs.are.great",
413+
"X-Forwarded-Proto": "http"
414+
},
415+
"httpMethod": "GET",
416+
"path": "/tricks/kangaroo",
417+
"queryStringParameters": {
418+
"completed_successfully": "true",
419+
"treat_provided": "true",
420+
"treat_type": "cheese"
421+
},
422+
"dog": "Maisey"
423+
},
424+
{
425+
"headers": {
426+
"Host": "dogs.are.great",
427+
"X-Forwarded-Proto": "http"
428+
},
429+
"httpMethod": "GET",
430+
"path": "/tricks/kangaroo",
431+
"queryStringParameters": {
432+
"completed_successfully": "true",
433+
"treat_provided": "true",
434+
"treat_type": "cheese"
435+
},
436+
"dog": "Charlie"
437+
}
438+
]
439+
""",
440+
True,
441+
2,
442+
),
443+
],
444+
)
445+
def test_non_dict_event(
446+
run_lambda_function,
447+
aws_event,
448+
has_request_data,
449+
batch_size,
450+
DictionaryContaining, # noqa:N803
451+
):
452+
envelopes, events, response = run_lambda_function(
453+
LAMBDA_PRELUDE
454+
+ dedent(
455+
"""
456+
init_sdk(traces_sample_rate=1.0)
457+
458+
def test_handler(event, context):
459+
raise Exception("More treats, please!")
460+
"""
461+
),
462+
aws_event,
463+
)
464+
465+
assert response["FunctionError"] == "Unhandled"
466+
467+
error_event = events[0]
468+
assert error_event["level"] == "error"
469+
assert error_event["contexts"]["trace"]["op"] == "serverless.function"
470+
471+
function_name = error_event["extra"]["lambda"]["function_name"]
472+
assert function_name.startswith("test_function_")
473+
assert error_event["transaction"] == function_name
474+
475+
exception = error_event["exception"]["values"][0]
476+
assert exception["type"] == "Exception"
477+
assert exception["value"] == "More treats, please!"
478+
assert exception["mechanism"]["type"] == "aws_lambda"
479+
480+
envelope = envelopes[0]
481+
assert envelope["type"] == "transaction"
482+
assert envelope["contexts"]["trace"] == DictionaryContaining(
483+
error_event["contexts"]["trace"]
484+
)
485+
assert envelope["contexts"]["trace"]["status"] == "internal_error"
486+
assert envelope["transaction"] == error_event["transaction"]
487+
assert envelope["request"]["url"] == error_event["request"]["url"]
488+
489+
if has_request_data:
490+
request_data = {
491+
"headers": {"Host": "dogs.are.great", "X-Forwarded-Proto": "http"},
492+
"method": "GET",
493+
"url": "http://dogs.are.great/tricks/kangaroo",
494+
"query_string": {
495+
"completed_successfully": "true",
496+
"treat_provided": "true",
497+
"treat_type": "cheese",
498+
},
499+
}
500+
else:
501+
request_data = {"url": "awslambda:///{}".format(function_name)}
502+
503+
assert error_event["request"] == request_data
504+
assert envelope["request"] == request_data
505+
506+
if batch_size > 1:
507+
assert error_event["tags"]["batch_size"] == batch_size
508+
assert error_event["tags"]["batch_request"] is True
509+
assert envelope["tags"]["batch_size"] == batch_size
510+
assert envelope["tags"]["batch_request"] is True

0 commit comments

Comments
 (0)