Skip to content

Commit 0b9c5bb

Browse files
committed
Use an allowlist for dynamic keys per PR feedback.
1 parent cb4ca3b commit 0b9c5bb

File tree

7 files changed

+253
-39
lines changed

7 files changed

+253
-39
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
from typing import Optional, Set, Callable, List, Union
17+
18+
ALLOWED = True
19+
DENIED = False
20+
21+
def _parse_env_list(s: str) -> Set[str]:
22+
result = set()
23+
for entry in s.split(','):
24+
stripped_entry = entry.strip()
25+
if not stripped_entry:
26+
continue
27+
result.add(stripped_entry)
28+
return result
29+
30+
31+
class AllowList:
32+
33+
def __init__(
34+
self,
35+
includes: Optional[Union[Set[str], List[str]]] = None,
36+
excludes: Optional[Union[Set[str], List[str]]] = None,
37+
if_none_match: Optional[Callable[str, bool]] = None):
38+
self._includes = set(includes or [])
39+
self._excludes = set(excludes or [])
40+
self._include_all = '*' in self._includes
41+
self._exclude_all = '*' in self._excludes
42+
assert (not self._include_all) or (not self._exclude_all), "Can't have '*' in both includes and excludes."
43+
44+
def allowed(self, x: str):
45+
if self._exclude_all:
46+
return x in self._includes
47+
if self._include_all:
48+
return x not in self._excludes
49+
return (x in self._includes) and (x not in self._excludes)
50+
51+
@staticmethod
52+
def from_env(
53+
includes_env_var: str,
54+
excludes_env_var: Optional[str] = None):
55+
includes = _parse_env_list(os.getenv(includes_env_var) or '')
56+
excludes = set()
57+
if excludes_env_var:
58+
excludes = _parse_env_list(os.getenv(excludes_env_var) or '')
59+
return AllowList(
60+
includes=includes,
61+
excludes=excludes)

instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from opentelemetry.semconv.attributes import error_attributes
4141

42+
from .allowlist_util import AllowList
4243
from .custom_semconv import CUSTOM_LLM_REQUEST_PREFIX
4344
from .dict_util import flatten_dict
4445
from .flags import is_content_recording_enabled
@@ -140,7 +141,7 @@ def _to_dict(value: object):
140141

141142

142143
def _add_request_options_to_span(
143-
span, config: Optional[GenerateContentConfigOrDict]
144+
span, config: Optional[GenerateContentConfigOrDict], allow_list: AllowList
144145
):
145146
if config is None:
146147
return
@@ -152,7 +153,9 @@ def _add_request_options_to_span(
152153
# Automatically derive attributes from the contents of the
153154
# config object. This ensures that all relevant parameters
154155
# are captured in the telemetry data (except for those
155-
# that are excluded via "exclude_keys").
156+
# that are excluded via "exclude_keys"). Dynamic attributes (those
157+
# starting with "gcp.gen_ai." instead of simply "gen_ai.request.")
158+
# are filtered with the "allow_list" before inclusion in the span.
156159
attributes = flatten_dict(
157160
_to_dict(config),
158161
# A custom prefix is used, because the names/structure of the
@@ -162,9 +165,6 @@ def _add_request_options_to_span(
162165
# System instruction can be overly long for a span attribute.
163166
# Additionally, it is recorded as an event (log), instead.
164167
"gcp.gen_ai.request.system_instruction",
165-
# Headers could include sensitive information, therefore it is
166-
# best that we not record these options.
167-
"gcp.gen_ai.request.http_options.headers",
168168
],
169169
# Although a custom prefix is used by default, some of the attributes
170170
# are captured in common, standard, Semantic Conventions. For the
@@ -184,7 +184,11 @@ def _add_request_options_to_span(
184184
"gcp.gen_ai.request.seed": gen_ai_attributes.GEN_AI_REQUEST_SEED,
185185
},
186186
)
187-
span.set_attributes(attributes)
187+
for key, value in attributes.items():
188+
if key.startswith(CUSTOM_LLM_REQUEST_PREFIX) and not allow_list.allowed(key):
189+
# The allowlist is used to control inclusion of the dynamic keys.
190+
continue
191+
span.set_attribute(key, value)
188192

189193

190194
def _get_response_property(response: GenerateContentResponse, path: str):
@@ -206,6 +210,7 @@ def __init__(
206210
models_object: Union[Models, AsyncModels],
207211
otel_wrapper: OTelWrapper,
208212
model: str,
213+
generate_content_config_key_allowlist: Optional[AllowList] = None,
209214
):
210215
self._start_time = time.time_ns()
211216
self._otel_wrapper = otel_wrapper
@@ -218,6 +223,7 @@ def __init__(
218223
self._content_recording_enabled = is_content_recording_enabled()
219224
self._response_index = 0
220225
self._candidate_index = 0
226+
self._generate_content_config_key_allowlist = generate_content_config_key_allowlist or AllowList()
221227

222228
def start_span_as_current_span(
223229
self, model_name, function_name, end_on_exit=True
@@ -240,7 +246,7 @@ def process_request(
240246
config: Optional[GenerateContentConfigOrDict],
241247
):
242248
span = trace.get_current_span()
243-
_add_request_options_to_span(span, config)
249+
_add_request_options_to_span(span, config, self._generate_content_config_key_allowlist)
244250
self._maybe_log_system_instruction(config=config)
245251
self._maybe_log_user_prompt(contents)
246252

@@ -501,7 +507,7 @@ def _record_duration_metric(self):
501507

502508

503509
def _create_instrumented_generate_content(
504-
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper
510+
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper, generate_content_config_key_allowlist: Optional[AllowList] = None
505511
):
506512
wrapped_func = snapshot.generate_content
507513

@@ -515,7 +521,7 @@ def instrumented_generate_content(
515521
**kwargs: Any,
516522
) -> GenerateContentResponse:
517523
helper = _GenerateContentInstrumentationHelper(
518-
self, otel_wrapper, model
524+
self, otel_wrapper, model, generate_content_config_key_allowlist=generate_content_config_key_allowlist,
519525
)
520526
with helper.start_span_as_current_span(
521527
model, "google.genai.Models.generate_content"
@@ -541,7 +547,7 @@ def instrumented_generate_content(
541547

542548

543549
def _create_instrumented_generate_content_stream(
544-
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper
550+
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper, generate_content_config_key_allowlist: Optional[AllowList] = None
545551
):
546552
wrapped_func = snapshot.generate_content_stream
547553

@@ -555,7 +561,7 @@ def instrumented_generate_content_stream(
555561
**kwargs: Any,
556562
) -> Iterator[GenerateContentResponse]:
557563
helper = _GenerateContentInstrumentationHelper(
558-
self, otel_wrapper, model
564+
self, otel_wrapper, model, generate_content_config_key_allowlist=generate_content_config_key_allowlist
559565
)
560566
with helper.start_span_as_current_span(
561567
model, "google.genai.Models.generate_content_stream"
@@ -581,7 +587,7 @@ def instrumented_generate_content_stream(
581587

582588

583589
def _create_instrumented_async_generate_content(
584-
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper
590+
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper, generate_content_config_key_allowlist: Optional[AllowList] = None
585591
):
586592
wrapped_func = snapshot.async_generate_content
587593

@@ -595,7 +601,7 @@ async def instrumented_generate_content(
595601
**kwargs: Any,
596602
) -> GenerateContentResponse:
597603
helper = _GenerateContentInstrumentationHelper(
598-
self, otel_wrapper, model
604+
self, otel_wrapper, model, generate_content_config_key_allowlist=generate_content_config_key_allowlist,
599605
)
600606
with helper.start_span_as_current_span(
601607
model, "google.genai.AsyncModels.generate_content"
@@ -622,7 +628,7 @@ async def instrumented_generate_content(
622628

623629
# Disabling type checking because this is not yet implemented and tested fully.
624630
def _create_instrumented_async_generate_content_stream( # type: ignore
625-
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper
631+
snapshot: _MethodsSnapshot, otel_wrapper: OTelWrapper, generate_content_config_key_allowlist: Optional[AllowList] = None
626632
):
627633
wrapped_func = snapshot.async_generate_content_stream
628634

@@ -636,7 +642,7 @@ async def instrumented_generate_content_stream(
636642
**kwargs: Any,
637643
) -> Awaitable[AsyncIterator[GenerateContentResponse]]: # type: ignore
638644
helper = _GenerateContentInstrumentationHelper(
639-
self, otel_wrapper, model
645+
self, otel_wrapper, model, generate_content_config_key_allowlist=generate_content_config_key_allowlist
640646
)
641647
with helper.start_span_as_current_span(
642648
model,
@@ -680,20 +686,22 @@ def uninstrument_generate_content(snapshot: object):
680686
snapshot.restore()
681687

682688

683-
def instrument_generate_content(otel_wrapper: OTelWrapper) -> object:
689+
def instrument_generate_content(
690+
otel_wrapper: OTelWrapper,
691+
generate_content_config_key_allowlist: Optional[AllowList]=None) -> object:
684692
snapshot = _MethodsSnapshot()
685693
Models.generate_content = _create_instrumented_generate_content(
686-
snapshot, otel_wrapper
694+
snapshot, otel_wrapper, generate_content_config_key_allowlist=generate_content_config_key_allowlist,
687695
)
688696
Models.generate_content_stream = (
689-
_create_instrumented_generate_content_stream(snapshot, otel_wrapper)
697+
_create_instrumented_generate_content_stream(snapshot, otel_wrapper, generate_content_config_key_allowlist=generate_content_config_key_allowlist)
690698
)
691699
AsyncModels.generate_content = _create_instrumented_async_generate_content(
692-
snapshot, otel_wrapper
700+
snapshot, otel_wrapper, generate_content_config_key_allowlist=generate_content_config_key_allowlist
693701
)
694702
AsyncModels.generate_content_stream = (
695703
_create_instrumented_async_generate_content_stream(
696-
snapshot, otel_wrapper
704+
snapshot, otel_wrapper, generate_content_config_key_allowlist=generate_content_config_key_allowlist,
697705
)
698706
)
699707
return snapshot

instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/instrumentor.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import Any, Collection
15+
from typing import Any, Collection, Optional
1616

1717
from opentelemetry._events import get_event_logger_provider
1818
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
1919
from opentelemetry.metrics import get_meter_provider
2020
from opentelemetry.trace import get_tracer_provider
2121

22+
from .allowlist_util import AllowList
2223
from .generate_content import (
2324
instrument_generate_content,
2425
uninstrument_generate_content,
@@ -27,8 +28,11 @@
2728

2829

2930
class GoogleGenAiSdkInstrumentor(BaseInstrumentor):
30-
def __init__(self):
31+
def __init__(self, generate_content_config_key_allowlist: Optional[AllowList] = None):
3132
self._generate_content_snapshot = None
33+
self._generate_content_config_key_allowlist = (generate_content_config_key_allowlist or AllowList.from_env(
34+
'OTEL_GOOGLE_GENAI_GENERATE_CONTENT_CONFIG_INCLUDES',
35+
excludes_env_var='OTEL_GOOGLE_GENAI_GENERATE_CONTENT_CONFIG_EXCLUDES'))
3236

3337
# Inherited, abstract function from 'BaseInstrumentor'. Even though 'self' is
3438
# not used in the definition, a method is required per the API contract.
@@ -49,8 +53,8 @@ def _instrument(self, **kwargs: Any):
4953
meter_provider=meter_provider,
5054
)
5155
self._generate_content_snapshot = instrument_generate_content(
52-
otel_wrapper
53-
)
56+
otel_wrapper,
57+
generate_content_config_key_allowlist=self._generate_content_config_key_allowlist)
5458

5559
def _uninstrument(self, **kwargs: Any):
5660
uninstrument_generate_content(self._generate_content_snapshot)

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/common/base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,15 @@ def setUp(self):
3333
self._client = None
3434
self._uses_vertex = False
3535
self._credentials = FakeCredentials()
36+
self._instrumentor_args = {}
3637

3738
def _lazy_init(self):
38-
self._instrumentation_context = InstrumentationContext()
39+
self._instrumentation_context = InstrumentationContext(**self._instrumentor_args)
3940
self._instrumentation_context.install()
4041

42+
def set_instrumentor_constructor_kwarg(self, key, value):
43+
self._instrumentor_args[key] = value
44+
4145
@property
4246
def client(self):
4347
if self._client is None:

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/common/instrumentation_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919

2020
class InstrumentationContext:
21-
def __init__(self):
22-
self._instrumentor = GoogleGenAiSdkInstrumentor()
21+
def __init__(self, **kwargs):
22+
self._instrumentor = GoogleGenAiSdkInstrumentor(**kwargs)
2323

2424
def install(self):
2525
self._instrumentor.instrument()

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/test_config_span_attributes.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from google.genai.types import GenerateContentConfig
15+
import os
16+
from unittest import mock
1617

18+
from opentelemetry.instrumentation.google_genai.allowlist_util import AllowList
19+
from google.genai.types import GenerateContentConfig
1720
from .base import TestCase
1821

1922

@@ -93,6 +96,7 @@ def test_option_reflected_to_span_attribute_top_p(self):
9396
span = self.generate_and_get_span(config={"top_p": 10})
9497
self.assertEqual(span.attributes["gen_ai.request.top_p"], 10)
9598

99+
@mock.patch.dict(os.environ, {"OTEL_GOOGLE_GENAI_GENERATE_CONTENT_CONFIG_INCLUDES": "*"})
96100
def test_option_not_reflected_to_span_attribute_system_instruction(self):
97101
span = self.generate_and_get_span(
98102
config={"system_instruction": "Yadda yadda yadda"}
@@ -106,27 +110,38 @@ def test_option_not_reflected_to_span_attribute_system_instruction(self):
106110
if isinstance(value, str):
107111
self.assertNotIn("Yadda yadda yadda", value)
108112

109-
def test_option_not_reflected_to_span_attribute_http_headers(self):
113+
@mock.patch.dict(os.environ, {"OTEL_GOOGLE_GENAI_GENERATE_CONTENT_CONFIG_INCLUDES": "*"})
114+
def test_option_reflected_to_span_attribute_automatic_func_calling(self):
110115
span = self.generate_and_get_span(
111116
config={
112-
"http_options": {
113-
"base_url": "my.backend.override",
114-
"headers": {
115-
"sensitive": 12345,
116-
},
117+
"automatic_function_calling": {
118+
"ignore_call_history": True,
117119
}
118120
}
119121
)
120-
self.assertEqual(
121-
span.attributes["gcp.gen_ai.request.http_options.base_url"],
122-
"my.backend.override",
122+
self.assertTrue(
123+
span.attributes[
124+
"gcp.gen_ai.request.automatic_function_calling.ignore_call_history"
125+
]
126+
)
127+
128+
def test_dynamic_config_options_not_included_without_allow_list(self):
129+
span = self.generate_and_get_span(
130+
config={
131+
"automatic_function_calling": {
132+
"ignore_call_history": True,
133+
}
134+
}
123135
)
124136
self.assertNotIn(
125-
"gcp.gen_ai.request.http_options.headers.sensitive",
137+
"gcp.gen_ai.request.automatic_function_calling.ignore_call_history",
126138
span.attributes,
127139
)
128140

129-
def test_option_reflected_to_span_attribute_automatic_func_calling(self):
141+
def test_can_supply_allow_list_via_instrumentor_constructor(self):
142+
self.set_instrumentor_constructor_kwarg(
143+
"generate_content_config_key_allowlist",
144+
AllowList(includes=["*"]))
130145
span = self.generate_and_get_span(
131146
config={
132147
"automatic_function_calling": {

0 commit comments

Comments
 (0)