|
4 | 4 |
|
5 | 5 | import sentry_sdk |
6 | 6 | from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE |
| 7 | +from sentry_sdk.data_collection import ( |
| 8 | + BODY_TYPE_INCOMING_REQUEST, |
| 9 | + COLLECTION_OFF, |
| 10 | + apply_key_value_collection, |
| 11 | + filter_request_headers, |
| 12 | + scrub_query_string, |
| 13 | + should_collect_body_type, |
| 14 | +) |
7 | 15 | from sentry_sdk.scope import should_send_default_pii |
8 | 16 | from sentry_sdk.utils import AnnotatedValue, logger |
9 | 17 |
|
@@ -90,15 +98,34 @@ def extract_into_event(self, event: "Event") -> None: |
90 | 98 | if not client.is_active(): |
91 | 99 | return |
92 | 100 |
|
| 101 | + dc = client.data_collection |
| 102 | + |
93 | 103 | data: "Optional[Union[AnnotatedValue, Dict[str, Any]]]" = None |
94 | 104 |
|
95 | 105 | content_length = self.content_length() |
96 | 106 | request_info = event.get("request", {}) |
97 | 107 |
|
98 | | - if should_send_default_pii(): |
| 108 | + # Cookies. When data_collection is set explicitly, collect according to |
| 109 | + # the cookies behavior (default denyList scrubs sensitive cookie values); |
| 110 | + # otherwise fall back to the legacy send_default_pii gate. |
| 111 | + if dc.explicit: |
| 112 | + if dc.cookies.mode != COLLECTION_OFF: |
| 113 | + request_info["cookies"] = apply_key_value_collection( |
| 114 | + dict(self.cookies()), dc.cookies |
| 115 | + ) |
| 116 | + elif should_send_default_pii(): |
99 | 117 | request_info["cookies"] = dict(self.cookies()) |
100 | 118 |
|
101 | | - if not request_body_within_bounds(client, content_length): |
| 119 | + # Request body. When data_collection is set explicitly, only collect the |
| 120 | + # incoming request body if that body type is enabled; size is still |
| 121 | + # bounded by max_request_body_size. |
| 122 | + collect_body = True |
| 123 | + if dc.explicit: |
| 124 | + collect_body = should_collect_body_type(dc, BODY_TYPE_INCOMING_REQUEST) |
| 125 | + |
| 126 | + if not collect_body: |
| 127 | + data = None |
| 128 | + elif not request_body_within_bounds(client, content_length): |
102 | 129 | data = AnnotatedValue.removed_because_over_size_limit() |
103 | 130 | else: |
104 | 131 | # First read the raw body data |
@@ -213,21 +240,68 @@ def _filter_headers( |
213 | 240 | headers: "Mapping[str, str]", |
214 | 241 | use_annotated_value: bool = True, |
215 | 242 | ) -> "Mapping[str, Union[AnnotatedValue, str]]": |
216 | | - if should_send_default_pii(): |
217 | | - return headers |
218 | | - |
219 | 243 | substitute: "Union[AnnotatedValue, str]" = ( |
220 | 244 | SENSITIVE_DATA_SUBSTITUTE |
221 | 245 | if not use_annotated_value |
222 | 246 | else AnnotatedValue.removed_because_over_size_limit() |
223 | 247 | ) |
224 | 248 |
|
| 249 | + dc = sentry_sdk.get_client().data_collection |
| 250 | + if dc.explicit: |
| 251 | + # Apply the configured request-header collection behavior (default |
| 252 | + # denyList scrubs sensitive header values; the raw Cookie/Set-Cookie |
| 253 | + # header is always filtered). |
| 254 | + return filter_request_headers( |
| 255 | + headers, dc.http_headers.request, substitute=substitute |
| 256 | + ) |
| 257 | + |
| 258 | + # Legacy behavior (data_collection not set explicitly). |
| 259 | + if should_send_default_pii(): |
| 260 | + return headers |
| 261 | + |
225 | 262 | return { |
226 | 263 | k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute) |
227 | 264 | for k, v in headers.items() |
228 | 265 | } |
229 | 266 |
|
230 | 267 |
|
| 268 | +def collect_query_string( |
| 269 | + raw_query_string: "Optional[str]", |
| 270 | +) -> "Optional[str]": |
| 271 | + """ |
| 272 | + Return the (possibly scrubbed) query string to attach to span attributes |
| 273 | + (``http.query`` / ``url.query`` / the query portion of ``url.full``), or |
| 274 | + ``None`` if the query string should not be collected. |
| 275 | +
|
| 276 | + When ``data_collection`` is set explicitly, the ``query_params`` behavior |
| 277 | + governs collection/scrubbing. Otherwise the legacy ``send_default_pii`` gate |
| 278 | + applies (preserving current behavior). |
| 279 | + """ |
| 280 | + if not raw_query_string: |
| 281 | + return None |
| 282 | + |
| 283 | + dc = sentry_sdk.get_client().data_collection |
| 284 | + if dc.explicit: |
| 285 | + return scrub_query_string(raw_query_string, dc.query_params) |
| 286 | + |
| 287 | + if should_send_default_pii(): |
| 288 | + return raw_query_string |
| 289 | + return None |
| 290 | + |
| 291 | + |
| 292 | +def should_collect_url() -> bool: |
| 293 | + """ |
| 294 | + Whether to collect non-query URL attributes (``url.full`` base and |
| 295 | + ``url.path``). These never contain query strings, so they are treated as |
| 296 | + technical context and collected whenever ``data_collection`` is set |
| 297 | + explicitly. Otherwise the legacy ``send_default_pii`` gate applies. |
| 298 | + """ |
| 299 | + dc = sentry_sdk.get_client().data_collection |
| 300 | + if dc.explicit: |
| 301 | + return True |
| 302 | + return should_send_default_pii() |
| 303 | + |
| 304 | + |
231 | 305 | def _in_http_status_code_range( |
232 | 306 | code: object, code_ranges: "list[HttpStatusCodeRange]" |
233 | 307 | ) -> bool: |
|
0 commit comments