Skip to content

Commit cba5505

Browse files
fix(openai): ensure embeddings input gets prompt-completion sampled correctly [backport 1.15] (#6073)
Backport 93d700e from #6062 to 1.15. Fixes #5963. The traced embeddings endpoint always tags the `openai.request.input` argument regardless of the configured prompt-completion sample rate. This PR ensures that input tag will be tagged based on the prompt-completion sample rate. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. Co-authored-by: Yun Kim <[email protected]>
1 parent 32f38f2 commit cba5505

File tree

3 files changed

+34
-10
lines changed

3 files changed

+34
-10
lines changed

ddtrace/contrib/openai/_endpoint_hooks.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -241,19 +241,17 @@ def _post_response(self, pin, integration, span, args, kwargs, resp, error):
241241

242242

243243
class _EmbeddingHook(_EndpointHook):
244+
_request_tag_attrs = ["model", "user"]
244245
_default_name = "embeddings"
245246

246247
def _pre_response(self, pin, integration, span, args, kwargs):
247-
for kw_attr in ["model", "input", "user"]:
248-
if kw_attr in kwargs:
249-
if kw_attr == "input" and integration.is_pc_sampled_span(span):
250-
if isinstance(kwargs["input"], list):
251-
for idx, inp in enumerate(kwargs["input"]):
252-
span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(str(inp)))
253-
else:
254-
span.set_tag("openai.request.%s" % kw_attr, kwargs[kw_attr])
255-
else:
256-
span.set_tag("openai.request.%s" % kw_attr, kwargs[kw_attr])
248+
embedding_input = kwargs.get("input", "")
249+
if integration.is_pc_sampled_span(span):
250+
if isinstance(embedding_input, list):
251+
for idx, inp in enumerate(embedding_input):
252+
span.set_tag_str("openai.request.input.%d" % idx, integration.trunc(str(inp)))
253+
else:
254+
span.set_tag("openai.request.input", embedding_input)
257255
return
258256

259257
def _post_response(self, pin, integration, span, args, kwargs, resp, error):
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
openai: This fix resolves an issue where embeddings inputs were always tagged regardless of the
5+
configured prompt-completion sample rate.

tests/contrib/openai/test_openai.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,27 @@ def test_completion_truncation(openai, openai_vcr, mock_tracer):
951951
assert len(completion.replace("...", "")) == limit
952952

953953

954+
@pytest.mark.parametrize(
955+
"ddtrace_config_openai",
956+
[
957+
dict(
958+
_api_key="<not-real-but-it's-something>",
959+
span_prompt_completion_sample_rate=0,
960+
)
961+
],
962+
)
963+
def test_embedding_unsampled_prompt_completion(openai, openai_vcr, ddtrace_config_openai, mock_logs, mock_tracer):
964+
if not hasattr(openai, "Embedding"):
965+
pytest.skip("embedding not supported for this version of openai")
966+
with openai_vcr.use_cassette("embedding.yaml"):
967+
openai.Embedding.create(input="hello world", model="text-embedding-ada-002")
968+
logs = mock_logs.enqueue.call_count
969+
traces = mock_tracer.pop_traces()
970+
assert len(traces) == 1
971+
assert traces[0][0].get_tag("openai.request.input") is None
972+
assert logs == 0
973+
974+
954975
@pytest.mark.parametrize(
955976
"ddtrace_config_openai",
956977
[

0 commit comments

Comments
 (0)