Skip to content

Commit 88ccbac

Browse files
authored
OTel log scrubbing (#903)
1 parent 99a2242 commit 88ccbac

File tree

7 files changed

+184
-18
lines changed

7 files changed

+184
-18
lines changed

logfire/_internal/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
ShowParentsConsoleSpanExporter,
8484
SimpleConsoleSpanExporter,
8585
)
86-
from .exporters.logs import CheckSuppressInstrumentationLogProcessorWrapper
86+
from .exporters.logs import CheckSuppressInstrumentationLogProcessorWrapper, MainLogProcessorWrapper
8787
from .exporters.otlp import OTLPExporterHttpSession, QuietLogExporter, QuietSpanExporter, RetryFewerSpansSpanExporter
8888
from .exporters.processor_wrapper import CheckSuppressInstrumentationProcessorWrapper, MainSpanProcessorWrapper
8989
from .exporters.quiet_metrics import QuietMetricExporter
@@ -1015,7 +1015,9 @@ def fix_pid(): # pragma: no cover
10151015
multi_log_processor = SynchronousMultiLogRecordProcessor()
10161016
for processor in log_record_processors:
10171017
multi_log_processor.add_log_record_processor(processor)
1018-
root_log_processor = CheckSuppressInstrumentationLogProcessorWrapper(multi_log_processor)
1018+
root_log_processor = CheckSuppressInstrumentationLogProcessorWrapper(
1019+
MainLogProcessorWrapper(multi_log_processor, self.scrubber)
1020+
)
10191021
logger_provider = SDKLoggerProvider(resource)
10201022
logger_provider.add_log_record_processor(root_log_processor)
10211023

logfire/_internal/exporters/logs.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
from dataclasses import dataclass
2+
13
from opentelemetry.sdk._logs import LogData
24

35
import logfire
46
from logfire._internal.exporters.wrapper import WrapperLogProcessor
7+
from logfire._internal.scrubbing import BaseScrubber
58
from logfire._internal.utils import is_instrumentation_suppressed
69

710

@@ -16,3 +19,12 @@ def emit(self, log_data: LogData):
1619
return
1720
with logfire.suppress_instrumentation():
1821
return super().emit(log_data)
22+
23+
24+
@dataclass
25+
class MainLogProcessorWrapper(WrapperLogProcessor):
26+
scrubber: BaseScrubber
27+
28+
def emit(self, log_data: LogData):
29+
log_data.log_record = self.scrubber.scrub_log(log_data.log_record)
30+
return super().emit(log_data)

logfire/_internal/exporters/test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,9 @@ def build_attributes(
171171
class TestLogExporter(InMemoryLogExporter):
172172
"""A LogExporter that stores exported logs in a list for asserting in tests."""
173173

174+
# NOTE: Avoid test discovery by pytest.
175+
__test__ = False
176+
174177
def __init__(self, ns_timestamp_generator: typing.Callable[[], int]) -> None:
175178
super().__init__()
176179
self.ns_timestamp_generator = ns_timestamp_generator
@@ -222,3 +225,6 @@ def build_log(log_data: LogData) -> dict[str, Any]:
222225
return res
223226

224227
return [build_log(log) for log in self.get_finished_logs()]
228+
229+
def shutdown(self) -> None:
230+
self.clear()

logfire/_internal/scrubbing.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import copy
34
import json
45
import re
56
from abc import ABC, abstractmethod
@@ -8,6 +9,7 @@
89

910
import typing_extensions
1011
from opentelemetry.attributes import BoundedAttributes
12+
from opentelemetry.sdk._logs import LogRecord
1113
from opentelemetry.sdk.trace import Event
1214
from opentelemetry.semconv.trace import SpanAttributes
1315
from opentelemetry.trace import Link
@@ -129,11 +131,15 @@ class BaseScrubber(ABC):
129131
SpanAttributes.URL_FULL,
130132
SpanAttributes.URL_PATH,
131133
SpanAttributes.URL_QUERY,
134+
'event.name',
132135
}
133136

134137
@abstractmethod
135138
def scrub_span(self, span: ReadableSpanDict): ... # pragma: no cover
136139

140+
@abstractmethod
141+
def scrub_log(self, log: LogRecord) -> LogRecord: ... # pragma: no cover
142+
137143
@abstractmethod
138144
def scrub_value(self, path: JsonPath, value: Any) -> tuple[Any, list[ScrubbedNote]]: ... # pragma: no cover
139145

@@ -142,6 +148,9 @@ class NoopScrubber(BaseScrubber):
142148
def scrub_span(self, span: ReadableSpanDict):
143149
pass
144150

151+
def scrub_log(self, log: LogRecord) -> LogRecord:
152+
return log
153+
145154
def scrub_value(self, path: JsonPath, value: Any) -> tuple[Any, list[ScrubbedNote]]: # pragma: no cover
146155
return value, []
147156

@@ -158,6 +167,10 @@ def __init__(self, patterns: Sequence[str] | None, callback: ScrubCallback | Non
158167
self._pattern = re.compile('|'.join(patterns), re.IGNORECASE | re.DOTALL)
159168
self._callback = callback
160169

170+
def scrub_log(self, log: LogRecord) -> LogRecord:
171+
span_scrubber = SpanScrubber(self)
172+
return span_scrubber.scrub_log(log)
173+
161174
def scrub_span(self, span: ReadableSpanDict):
162175
scope = span['instrumentation_scope']
163176
if scope and scope.name in ['logfire.openai', 'logfire.anthropic']:
@@ -194,15 +207,19 @@ def __init__(self, parent: Scrubber):
194207
self._pattern = parent._pattern # type: ignore
195208
self._callback = parent._callback # type: ignore
196209
self.scrubbed: list[ScrubbedNote] = []
210+
self.did_scrub = False
197211

198212
def scrub_span(self, span: ReadableSpanDict):
199213
# We need to use BoundedAttributes because:
200214
# 1. For events and links, we get an error otherwise:
201215
# https://github.com/open-telemetry/opentelemetry-python/issues/3761
202216
# 2. The callback might return a value that isn't of the type required by OTEL,
203217
# in which case BoundAttributes will discard it to prevent an error.
204-
# TODO silently throwing away the result is bad, and BoundedAttributes might be bad for performance.
205-
span['attributes'] = BoundedAttributes(attributes=self.scrub(('attributes',), span['attributes']))
218+
# TODO silently throwing away the result is bad, and BoundedAttributes is bad for performance.
219+
new_attributes = self.scrub(('attributes',), span['attributes'])
220+
if self.did_scrub:
221+
span['attributes'] = BoundedAttributes(attributes=new_attributes)
222+
206223
span['events'] = [
207224
Event(
208225
# We don't scrub the event name because in theory it should be a low-cardinality general description,
@@ -221,6 +238,22 @@ def scrub_span(self, span: ReadableSpanDict):
221238
for i, link in enumerate(span['links'])
222239
]
223240

241+
def scrub_log(self, log: LogRecord) -> LogRecord:
242+
new_attributes: dict[str, Any] | None = self.scrub(('attributes',), log.attributes)
243+
new_body = self.scrub(('log_body',), log.body)
244+
245+
if not self.did_scrub:
246+
return log
247+
248+
if self.scrubbed:
249+
new_attributes = new_attributes or {}
250+
new_attributes[ATTRIBUTES_SCRUBBED_KEY] = json.dumps(self.scrubbed)
251+
252+
result = copy.copy(log)
253+
result.attributes = BoundedAttributes(attributes=new_attributes)
254+
result.body = new_body
255+
return result
256+
224257
def scrub_event_attributes(self, event: Event, index: int):
225258
attributes = event.attributes or {}
226259
path = ('otel_events', index, 'attributes')
@@ -265,7 +298,9 @@ def scrub(self, path: JsonPath, value: Any) -> Any:
265298

266299
def _redact(self, match: ScrubMatch) -> Any:
267300
if self._callback and (result := self._callback(match)) is not None:
301+
self.did_scrub = self.did_scrub or result is not match.value
268302
return result
303+
self.did_scrub = True
269304
matched_substring = match.pattern_match.group(0)
270305
self.scrubbed.append(ScrubbedNote(path=match.path, matched_substring=matched_substring))
271306
return f'[Scrubbed due to {matched_substring!r}]'

tests/test_configure.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
sanitize_project_name,
5454
)
5555
from logfire._internal.exporters.console import ConsoleLogExporter, ShowParentsConsoleSpanExporter
56-
from logfire._internal.exporters.logs import CheckSuppressInstrumentationLogProcessorWrapper
56+
from logfire._internal.exporters.logs import CheckSuppressInstrumentationLogProcessorWrapper, MainLogProcessorWrapper
5757
from logfire._internal.exporters.otlp import QuietLogExporter, QuietSpanExporter
5858
from logfire._internal.exporters.processor_wrapper import (
5959
CheckSuppressInstrumentationProcessorWrapper,
@@ -1630,11 +1630,14 @@ def get_metric_readers() -> Iterable[SpanProcessor]:
16301630

16311631

16321632
def get_log_record_processors() -> Iterable[LogRecordProcessor]:
1633-
[root] = get_logger_provider().provider._multi_log_record_processor._log_record_processors # type: ignore
1634-
assert isinstance(root, CheckSuppressInstrumentationLogProcessorWrapper)
1635-
assert isinstance(root.processor, SynchronousMultiLogRecordProcessor)
1636-
1637-
return root.processor._log_record_processors # type: ignore
1633+
[processor] = get_logger_provider().provider._multi_log_record_processor._log_record_processors # type: ignore
1634+
assert isinstance(processor, CheckSuppressInstrumentationLogProcessorWrapper)
1635+
processor = processor.processor
1636+
assert isinstance(processor, MainLogProcessorWrapper)
1637+
processor = processor.processor
1638+
assert isinstance(processor, SynchronousMultiLogRecordProcessor)
1639+
1640+
return processor._log_record_processors # type: ignore
16381641

16391642

16401643
def test_dynamic_module_ignored_in_ensure_flush_after_aws_lambda(

tests/test_otel_logs.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Any, Sequence
3+
from typing import Any, Sequence
44
from unittest import mock
55

66
import pytest
@@ -16,9 +16,7 @@
1616
import logfire
1717
from logfire import suppress_instrumentation
1818
from logfire._internal.exporters.otlp import QuietLogExporter
19-
20-
if TYPE_CHECKING:
21-
from logfire._internal.exporters.test import TestLogExporter
19+
from logfire.testing import TestLogExporter
2220

2321

2422
def test_otel_logs_supress_scopes(logs_exporter: InMemoryLogExporter, config_kwargs: dict[str, Any]) -> None:

0 commit comments

Comments
 (0)