Skip to content

Commit 212ab54

Browse files
authored
fix(access-logs): filter out 'None' values from access logs (#97649)
We're currently logging a lot of 'None's in our access logs, which add to storage costs, so we should filter them out. `api.access` cost can be seen [here](https://console.cloud.google.com/monitoring/dashboards/builder/2fac5642-c4b9-4359-8f84-fb9065a40cbe;duration=P1D?invt=Ab5OsA&project=internal-sentry&pageState=(%22eventTypes%22:(%22selected%22:%5B%22CLOUD_ALERTING_ALERT%22,%22SERVICE_HEALTH_INCIDENT%22%5D))&rapt=AEjHL4NL5TS0_HZRIGGC_2egzRlxKZeImiELNSwRDfxXfNQQK4cOqTRpTFVtft8OU7vUwd3dNvnbTsF2fvAcTLuuObMcjhsxVKQ9VKqesD5qpM3kuqYnWG0&inv=1) — some more notes on cost in [my ticket comment here](https://linear.app/getsentry/issue/ID-819/remove-none-values-from-access-logs#comment-9b40f675) [Query showing that every log has the required access log fields](https://console.cloud.google.com/logs/query;query=resource.type%3D%22k8s_container%22%0Alabels.name%3D%22sentry.access.api%22%0AjsonPayload.method%3D%22None%22%20OR%20jsonPayload.view%3D%22None%22%20OR%20jsonPayload.response%20%3D%22None%22%20OR%20jsonPayload.path%3D%22None%22%20OR%20jsonPayload.rate_limit_type%3D%22None%22%20OR%20jsonPayload.rate_limited%3D%22None%22%20OR%20jsonPayload.caller_ip%3D%22None%22%20OR%20jsonPayload.request_duration_seconds%3D%22None%22%20%0A--%20jsonPayload.organization_id%3D%22None%22%0A--%20jsonPayload.response%3D%22429%22;summaryFields=jsonPayload%252Fsnuba_policy:true:32:beginning;cursorTimestamp=2025-08-11T23:13:43.985230382Z;duration=P1D?project=internal-sentry&inv=1&invt=Ab5O6A&rapt=AEjHL4O_3xg8i0ALGUrkxIfT6o5Gkmflt3EqYTMyq8xfsGOFMgTSfVgDVyRvp-Tx-8R6m26gf2gLcvF1Hwars-j7jZMXtWqcY_CWrv2Wi1QqK7QbyGDVxAs&pli=1) Closes https://linear.app/getsentry/issue/ID-819/remove-none-values-from-access-logs
1 parent b8c828c commit 212ab54

File tree

2 files changed

+45
-36
lines changed

2 files changed

+45
-36
lines changed

src/sentry/middleware/access_log.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _get_token_name(auth: AuthenticatedToken | None) -> str | None:
4747
raise AssertionError(f"unreachable: {auth}")
4848

4949

50-
def _get_rate_limit_stats_dict(request: Request) -> dict[str, str]:
50+
def _get_rate_limit_stats_dict(request: Request) -> dict[str, str | int | None]:
5151

5252
rate_limit_metadata: RateLimitMeta | None = getattr(request, "rate_limit_metadata", None)
5353
snuba_rate_limit_metadata: SnubaRateLimitMeta | None = getattr(
@@ -62,20 +62,20 @@ def _get_rate_limit_stats_dict(request: Request) -> dict[str, str]:
6262

6363
rate_limit_stats = {
6464
"rate_limit_type": rate_limit_type,
65-
"concurrent_limit": str(getattr(rate_limit_metadata, "concurrent_limit", None)),
66-
"concurrent_requests": str(getattr(rate_limit_metadata, "concurrent_requests", None)),
67-
"reset_time": str(getattr(rate_limit_metadata, "reset_time", None)),
68-
"group": str(getattr(rate_limit_metadata, "group", None)),
69-
"limit": str(getattr(rate_limit_metadata, "limit", None)),
70-
"remaining": str(getattr(rate_limit_metadata, "remaining", None)),
65+
"concurrent_limit": getattr(rate_limit_metadata, "concurrent_limit", None),
66+
"concurrent_requests": getattr(rate_limit_metadata, "concurrent_requests", None),
67+
"reset_time": getattr(rate_limit_metadata, "reset_time", None),
68+
"group": getattr(rate_limit_metadata, "group", None),
69+
"limit": getattr(rate_limit_metadata, "limit", None),
70+
"remaining": getattr(rate_limit_metadata, "remaining", None),
7171
# We prefix the snuba fields with snuba_ to avoid confusion with the standard rate limit metadata
72-
"snuba_policy": str(getattr(snuba_rate_limit_metadata, "policy", None)),
73-
"snuba_quota_unit": str(getattr(snuba_rate_limit_metadata, "quota_unit", None)),
74-
"snuba_quota_used": str(getattr(snuba_rate_limit_metadata, "quota_used", None)),
75-
"snuba_rejection_threshold": str(
76-
getattr(snuba_rate_limit_metadata, "rejection_threshold", None)
72+
"snuba_policy": getattr(snuba_rate_limit_metadata, "policy", None),
73+
"snuba_quota_unit": getattr(snuba_rate_limit_metadata, "quota_unit", None),
74+
"snuba_quota_used": getattr(snuba_rate_limit_metadata, "quota_used", None),
75+
"snuba_rejection_threshold": getattr(
76+
snuba_rate_limit_metadata, "rejection_threshold", None
7777
),
78-
"snuba_storage_key": str(getattr(snuba_rate_limit_metadata, "storage_key", None)),
78+
"snuba_storage_key": getattr(snuba_rate_limit_metadata, "storage_key", None),
7979
}
8080

8181
return rate_limit_stats
@@ -94,7 +94,7 @@ def _create_api_access_log(
9494
view = request.resolver_match._func_path
9595

9696
request_auth = _get_request_auth(request)
97-
token_type = str(_get_token_name(request_auth))
97+
token_type = _get_token_name(request_auth)
9898
if token_type == "system":
9999
# if its an internal request, no need to log
100100
return
@@ -108,26 +108,30 @@ def _create_api_access_log(
108108
entity_id = getattr(request_auth, "entity_id", None)
109109
status_code = getattr(response, "status_code", 500)
110110
log_metrics = dict(
111-
method=str(request.method),
111+
method=request.method,
112112
view=view,
113113
response=status_code,
114-
user_id=str(user_id),
115-
is_app=str(is_app),
114+
user_id=user_id,
115+
is_app=is_app,
116116
token_type=token_type,
117-
is_frontend_request=str(is_frontend_request(request)),
118-
organization_id=str(org_id),
119-
entity_id=str(entity_id),
120-
path=str(request.path),
121-
caller_ip=str(request.META.get("REMOTE_ADDR")),
122-
user_agent=str(request.META.get("HTTP_USER_AGENT")),
123-
rate_limited=str(getattr(request, "will_be_rate_limited", False)),
124-
rate_limit_category=str(getattr(request, "rate_limit_category", None)),
117+
is_frontend_request=is_frontend_request(request),
118+
organization_id=org_id,
119+
entity_id=entity_id,
120+
path=request.path,
121+
caller_ip=request.META.get("REMOTE_ADDR"),
122+
user_agent=request.META.get("HTTP_USER_AGENT"),
123+
rate_limited=getattr(request, "will_be_rate_limited", False),
124+
rate_limit_category=getattr(request, "rate_limit_category", None),
125125
request_duration_seconds=access_log_metadata.get_request_duration(),
126126
**_get_rate_limit_stats_dict(request),
127127
)
128128
auth = get_authorization_header(request).split()
129129
if len(auth) == 2:
130130
log_metrics["token_last_characters"] = force_str(auth[1])[-4:]
131+
132+
# Filter out None values and convert remaining values to string
133+
log_metrics = {k: str(v) for k, v in log_metrics.items() if v is not None}
134+
131135
api_access_logger.info("api.access", extra=log_metrics)
132136
metrics.incr("middleware.access_log.created")
133137

tests/sentry/middleware/test_access_log_middleware.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -141,23 +141,27 @@ def get(self, request, organization_context, organization):
141141
),
142142
]
143143

144-
access_log_fields = (
144+
required_access_log_fields = (
145145
"method",
146146
"view",
147147
"response",
148-
"user_id",
148+
"path",
149+
"rate_limit_type",
150+
"rate_limited",
151+
"caller_ip",
152+
"request_duration_seconds",
153+
)
154+
155+
# All of these fields may be None, and thus may not appear in every access log
156+
optional_access_log_fields = (
157+
"organization_id",
149158
"is_app",
159+
"user_id",
150160
"token_type",
151-
"organization_id",
152161
"entity_id",
153-
"path",
154-
"caller_ip",
155162
"user_agent",
156-
"rate_limited",
157163
"rate_limit_category",
158-
"request_duration_seconds",
159164
"group",
160-
"rate_limit_type",
161165
"concurrent_limit",
162166
"concurrent_requests",
163167
"reset_time",
@@ -168,6 +172,7 @@ def get(self, request, organization_context, organization):
168172
"snuba_storage_key",
169173
"snuba_quota_used",
170174
"snuba_rejection_threshold",
175+
"token_last_characters",
171176
)
172177

173178

@@ -181,7 +186,7 @@ def inject_fixtures(self, caplog):
181186
def assert_access_log_recorded(self):
182187
sentinel = object()
183188
for record in self.captured_logs:
184-
for field in access_log_fields:
189+
for field in required_access_log_fields:
185190
assert getattr(record, field, sentinel) != sentinel, field
186191

187192
@property
@@ -231,7 +236,7 @@ def test_access_log_rate_limited(self) -> None:
231236
self.get_error_response(status_code=429)
232237
self.assert_access_log_recorded()
233238
# no token because the endpoint was not hit
234-
assert self.captured_logs[0].token_type == "None"
239+
assert not hasattr(self.captured_logs[0], "token_type")
235240
assert self.captured_logs[0].limit == "0"
236241
assert self.captured_logs[0].remaining == "0"
237242
assert self.captured_logs[0].group == RateLimitedEndpoint.rate_limits.group
@@ -250,7 +255,7 @@ def test_concurrent_request_finishes(self) -> None:
250255
# rate limiting
251256
self.assert_access_log_recorded()
252257
for i in range(10):
253-
assert self.captured_logs[i].token_type == "None"
258+
assert not hasattr(self.captured_logs[i], "token_type")
254259
assert self.captured_logs[0].group == RateLimitedEndpoint.rate_limits.group
255260
assert self.captured_logs[i].concurrent_requests == "1"
256261
assert self.captured_logs[i].concurrent_limit == "1"

0 commit comments

Comments
 (0)