Skip to content

Commit 86507f4

Browse files
maxzhangddYun-Kim
andauthored
chore(llmobs): refactor google utility functions (#14025)
[MLOB-2935] This PR extracts google utils from burgeoning utils.py file and combines relevant methods from gemini, vertex, and genai integrations into a single google_utils file. In addition, it aims to clean or dedupe similar methods. Note: get_generation_config_google was deleted as it is not used anywhere in the codebase. testing: exisiting tests properly capture this ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) [MLOB-2935]: https://datadoghq.atlassian.net/browse/MLOB-2935?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ --------- Co-authored-by: Yun Kim <[email protected]>
1 parent 40bf75c commit 86507f4

File tree

10 files changed

+156
-152
lines changed

10 files changed

+156
-152
lines changed

ddtrace/contrib/internal/google_genai/_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import wrapt
88

9-
from ddtrace.llmobs._integrations.google_genai_utils import DEFAULT_MODEL_ROLE
9+
from ddtrace.llmobs._integrations.google_utils import GOOGLE_GENAI_DEFAULT_MODEL_ROLE
1010
from ddtrace.llmobs._utils import _get_attr
1111

1212

@@ -32,7 +32,7 @@ def _join_chunks(chunks: List[Any]) -> Optional[Dict[str, Any]]:
3232
continue
3333

3434
if role is None:
35-
role = _get_attr(content, "role", DEFAULT_MODEL_ROLE)
35+
role = _get_attr(content, "role", GOOGLE_GENAI_DEFAULT_MODEL_ROLE)
3636

3737
parts = _get_attr(content, "parts", [])
3838
for part in parts:

ddtrace/contrib/internal/google_genai/patch.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ddtrace.contrib.internal.trace_utils import with_traced_module
1010
from ddtrace.contrib.internal.trace_utils import wrap
1111
from ddtrace.llmobs._integrations import GoogleGenAIIntegration
12-
from ddtrace.llmobs._integrations.google_genai_utils import extract_provider_and_model_name
12+
from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
1313
from ddtrace.trace import Pin
1414

1515

@@ -27,7 +27,7 @@ def get_version() -> str:
2727
@with_traced_module
2828
def traced_generate(genai, pin, func, instance, args, kwargs):
2929
integration = genai._datadog_integration
30-
provider_name, model_name = extract_provider_and_model_name(kwargs)
30+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
3131
with integration.trace(
3232
pin,
3333
"%s.%s" % (instance.__class__.__name__, func.__name__),
@@ -46,7 +46,7 @@ def traced_generate(genai, pin, func, instance, args, kwargs):
4646
@with_traced_module
4747
async def traced_async_generate(genai, pin, func, instance, args, kwargs):
4848
integration = genai._datadog_integration
49-
provider_name, model_name = extract_provider_and_model_name(kwargs)
49+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
5050
with integration.trace(
5151
pin,
5252
"%s.%s" % (instance.__class__.__name__, func.__name__),
@@ -65,7 +65,7 @@ async def traced_async_generate(genai, pin, func, instance, args, kwargs):
6565
@with_traced_module
6666
def traced_generate_stream(genai, pin, func, instance, args, kwargs):
6767
integration = genai._datadog_integration
68-
provider_name, model_name = extract_provider_and_model_name(kwargs)
68+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
6969
span = integration.trace(
7070
pin,
7171
"%s.%s" % (instance.__class__.__name__, func.__name__),
@@ -86,7 +86,7 @@ def traced_generate_stream(genai, pin, func, instance, args, kwargs):
8686
@with_traced_module
8787
async def traced_async_generate_stream(genai, pin, func, instance, args, kwargs):
8888
integration = genai._datadog_integration
89-
provider_name, model_name = extract_provider_and_model_name(kwargs)
89+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
9090
span = integration.trace(
9191
pin,
9292
"%s.%s" % (instance.__class__.__name__, func.__name__),
@@ -107,7 +107,7 @@ async def traced_async_generate_stream(genai, pin, func, instance, args, kwargs)
107107
@with_traced_module
108108
def traced_embed_content(genai, pin, func, instance, args, kwargs):
109109
integration = genai._datadog_integration
110-
provider_name, model_name = extract_provider_and_model_name(kwargs)
110+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
111111
with integration.trace(
112112
pin,
113113
"%s.%s" % (instance.__class__.__name__, func.__name__),
@@ -126,7 +126,7 @@ def traced_embed_content(genai, pin, func, instance, args, kwargs):
126126
@with_traced_module
127127
async def traced_async_embed_content(genai, pin, func, instance, args, kwargs):
128128
integration = genai._datadog_integration
129-
provider_name, model_name = extract_provider_and_model_name(kwargs)
129+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
130130
with integration.trace(
131131
pin,
132132
"%s.%s" % (instance.__class__.__name__, func.__name__),

ddtrace/contrib/internal/google_generativeai/patch.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ddtrace.contrib.internal.trace_utils import with_traced_module
1212
from ddtrace.contrib.internal.trace_utils import wrap
1313
from ddtrace.llmobs._integrations import GeminiIntegration
14-
from ddtrace.llmobs._integrations.utils import extract_model_name_google
14+
from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
1515
from ddtrace.trace import Pin
1616

1717

@@ -40,11 +40,12 @@ def traced_generate(genai, pin, func, instance, args, kwargs):
4040
integration = genai._datadog_integration
4141
stream = kwargs.get("stream", False)
4242
generations = None
43+
provider_name, model_name = extract_provider_and_model_name(instance=instance, model_name_attr="model_name")
4344
span = integration.trace(
4445
pin,
4546
"%s.%s" % (instance.__class__.__name__, func.__name__),
46-
provider="google",
47-
model=extract_model_name_google(instance, "model_name"),
47+
provider=provider_name,
48+
model=model_name,
4849
submit_to_llmobs=True,
4950
)
5051
try:
@@ -68,11 +69,12 @@ async def traced_agenerate(genai, pin, func, instance, args, kwargs):
6869
integration = genai._datadog_integration
6970
stream = kwargs.get("stream", False)
7071
generations = None
72+
provider_name, model_name = extract_provider_and_model_name(instance=instance, model_name_attr="model_name")
7173
span = integration.trace(
7274
pin,
7375
"%s.%s" % (instance.__class__.__name__, func.__name__),
74-
provider="google",
75-
model=extract_model_name_google(instance, "model_name"),
76+
provider=provider_name,
77+
model=model_name,
7678
submit_to_llmobs=True,
7779
)
7880
try:

ddtrace/contrib/internal/vertexai/patch.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from ddtrace.contrib.internal.vertexai._utils import TracedAsyncVertexAIStreamResponse
1515
from ddtrace.contrib.internal.vertexai._utils import TracedVertexAIStreamResponse
1616
from ddtrace.llmobs._integrations import VertexAIIntegration
17-
from ddtrace.llmobs._integrations.utils import extract_model_name_google
17+
from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
1818
from ddtrace.trace import Pin
1919

2020

@@ -60,11 +60,12 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
6060
integration = vertexai._datadog_integration
6161
stream = kwargs.get("stream", False)
6262
generations = None
63+
provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
6364
span = integration.trace(
6465
pin,
6566
"%s.%s" % (instance.__class__.__name__, func.__name__),
66-
provider="google",
67-
model=extract_model_name_google(model_instance, "_model_name"),
67+
provider=provider_name,
68+
model=model_name,
6869
submit_to_llmobs=True,
6970
)
7071
# history must be copied since it is modified during the LLM interaction
@@ -92,11 +93,12 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
9293
integration = vertexai._datadog_integration
9394
stream = kwargs.get("stream", False)
9495
generations = None
96+
provider_name, model_name = extract_provider_and_model_name(instance=model_instance, model_name_attr="_model_name")
9597
span = integration.trace(
9698
pin,
9799
"%s.%s" % (instance.__class__.__name__, func.__name__),
98-
provider="google",
99-
model=extract_model_name_google(model_instance, "_model_name"),
100+
provider=provider_name,
101+
model=model_name,
100102
submit_to_llmobs=True,
101103
)
102104
# history must be copied since it is modified during the LLM interaction

ddtrace/llmobs/_integrations/gemini.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
from ddtrace.llmobs._constants import SPAN_KIND
1717
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
1818
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
19-
from ddtrace.llmobs._integrations.utils import extract_message_from_part_google
20-
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
21-
from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
19+
from ddtrace.llmobs._integrations.google_utils import extract_message_from_part_gemini_vertexai
20+
from ddtrace.llmobs._integrations.google_utils import get_system_instructions_gemini_vertexai
21+
from ddtrace.llmobs._integrations.google_utils import llmobs_get_metadata_gemini_vertexai
2222
from ddtrace.llmobs._utils import _get_attr
2323
from ddtrace.trace import Span
2424

@@ -43,9 +43,9 @@ def _llmobs_set_tags(
4343
operation: str = "",
4444
) -> None:
4545
instance = kwargs.get("instance", None)
46-
metadata = llmobs_get_metadata_google(kwargs, instance)
46+
metadata = llmobs_get_metadata_gemini_vertexai(kwargs, instance)
4747

48-
system_instruction = get_system_instructions_from_google_model(instance)
48+
system_instruction = get_system_instructions_gemini_vertexai(instance)
4949
input_contents = get_argument_value(args, kwargs, 0, "contents")
5050
input_messages = self._extract_input_message(input_contents, system_instruction)
5151

@@ -95,7 +95,7 @@ def _extract_input_message(self, contents, system_instruction=None):
9595
messages.append(message)
9696
continue
9797
for part in parts:
98-
message = extract_message_from_part_google(part, role)
98+
message = extract_message_from_part_gemini_vertexai(part, role)
9999
messages.append(message)
100100
return messages
101101

@@ -107,7 +107,7 @@ def _extract_output_message(self, generations):
107107
role = content.get("role", "model")
108108
parts = content.get("parts", [])
109109
for part in parts:
110-
message = extract_message_from_part_google(part, role)
110+
message = extract_message_from_part_gemini_vertexai(part, role)
111111
output_messages.append(message)
112112
return output_messages
113113

ddtrace/llmobs/_integrations/google_genai.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
from ddtrace.llmobs._constants import OUTPUT_VALUE
1515
from ddtrace.llmobs._constants import SPAN_KIND
1616
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
17-
from ddtrace.llmobs._integrations.google_genai_utils import DEFAULT_MODEL_ROLE
18-
from ddtrace.llmobs._integrations.google_genai_utils import extract_embedding_metrics_google_genai
19-
from ddtrace.llmobs._integrations.google_genai_utils import extract_generation_metrics_google_genai
20-
from ddtrace.llmobs._integrations.google_genai_utils import extract_message_from_part_google_genai
21-
from ddtrace.llmobs._integrations.google_genai_utils import extract_provider_and_model_name
22-
from ddtrace.llmobs._integrations.google_genai_utils import normalize_contents
17+
from ddtrace.llmobs._integrations.google_utils import GOOGLE_GENAI_DEFAULT_MODEL_ROLE
18+
from ddtrace.llmobs._integrations.google_utils import extract_embedding_metrics_google_genai
19+
from ddtrace.llmobs._integrations.google_utils import extract_generation_metrics_google_genai
20+
from ddtrace.llmobs._integrations.google_utils import extract_message_from_part_google_genai
21+
from ddtrace.llmobs._integrations.google_utils import extract_provider_and_model_name
22+
from ddtrace.llmobs._integrations.google_utils import normalize_contents_google_genai
2323
from ddtrace.llmobs._utils import _get_attr
2424
from ddtrace.llmobs.utils import Document
2525

@@ -71,7 +71,7 @@ def _llmobs_set_tags(
7171
response: Optional[Any] = None,
7272
operation: str = "",
7373
) -> None:
74-
provider_name, model_name = extract_provider_and_model_name(kwargs)
74+
provider_name, model_name = extract_provider_and_model_name(kwargs=kwargs)
7575
span._set_ctx_items(
7676
{
7777
SPAN_KIND: operation,
@@ -120,23 +120,23 @@ def _extract_input_messages(self, args: List[Any], kwargs: Dict[str, Any], confi
120120

121121
def _extract_messages_from_contents(self, contents, default_role: str) -> List[Dict[str, Any]]:
122122
messages = []
123-
for content in normalize_contents(contents):
123+
for content in normalize_contents_google_genai(contents):
124124
role = content.get("role") or default_role
125125
for part in content.get("parts", []):
126126
messages.append(extract_message_from_part_google_genai(part, role))
127127
return messages
128128

129129
def _extract_output_messages(self, response) -> List[Dict[str, Any]]:
130130
if not response:
131-
return [{"content": "", "role": DEFAULT_MODEL_ROLE}]
131+
return [{"content": "", "role": GOOGLE_GENAI_DEFAULT_MODEL_ROLE}]
132132
messages = []
133133
candidates = _get_attr(response, "candidates", [])
134134
for candidate in candidates:
135135
content = _get_attr(candidate, "content", None)
136136
if not content:
137137
continue
138138
parts = _get_attr(content, "parts", [])
139-
role = _get_attr(content, "role", DEFAULT_MODEL_ROLE)
139+
role = _get_attr(content, "role", GOOGLE_GENAI_DEFAULT_MODEL_ROLE)
140140
for part in parts:
141141
message = extract_message_from_part_google_genai(part, role)
142142
messages.append(message)

0 commit comments

Comments
 (0)