Skip to content

Commit 116d86b

Browse files
authored
fix(mcp+anthropic): vanilla mcp crashed due to argument manipulation (#2881)
1 parent 7a1b8bb commit 116d86b

File tree

7 files changed

+418
-411
lines changed

7 files changed

+418
-411
lines changed

packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
build_from_streaming_response,
1616
)
1717
from opentelemetry.instrumentation.anthropic.utils import (
18+
JSONEncoder,
1819
acount_prompt_tokens_from_request,
1920
dont_throw,
2021
error_metrics_attributes,
@@ -28,7 +29,9 @@
2829
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
2930
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
3031
from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
31-
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
32+
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
33+
GEN_AI_RESPONSE_ID,
34+
)
3235
from opentelemetry.semconv_ai import (
3336
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
3437
LLMRequestTypeValues,
@@ -145,7 +148,7 @@ async def _dump_content(message_index, content, span):
145148
for j, item in enumerate(content)
146149
]
147150

148-
return json.dumps(content)
151+
return json.dumps(content, cls=JSONEncoder)
149152

150153

151154
@dont_throw
@@ -207,10 +210,14 @@ async def _aset_input_attributes(span, kwargs):
207210
for i, tool in enumerate(kwargs.get("tools")):
208211
prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
209212
set_span_attribute(span, f"{prefix}.name", tool.get("name"))
210-
set_span_attribute(span, f"{prefix}.description", tool.get("description"))
213+
set_span_attribute(
214+
span, f"{prefix}.description", tool.get("description")
215+
)
211216
input_schema = tool.get("input_schema")
212217
if input_schema is not None:
213-
set_span_attribute(span, f"{prefix}.input_schema", json.dumps(input_schema))
218+
set_span_attribute(
219+
span, f"{prefix}.parameters", json.dumps(input_schema)
220+
)
214221

215222

216223
def _set_span_completions(span, response):
@@ -315,7 +322,9 @@ async def _aset_token_usage(
315322
completion_tokens = 0
316323
if hasattr(anthropic, "count_tokens"):
317324
if response.get("completion"):
318-
completion_tokens = await anthropic.count_tokens(response.get("completion"))
325+
completion_tokens = await anthropic.count_tokens(
326+
response.get("completion")
327+
)
319328
elif response.get("content"):
320329
completion_tokens = await anthropic.count_tokens(
321330
response.get("content")[0].text
@@ -357,7 +366,9 @@ async def _aset_token_usage(
357366
span, SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
358367
)
359368
set_span_attribute(
360-
span, SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS, cache_creation_tokens
369+
span,
370+
SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS,
371+
cache_creation_tokens,
361372
)
362373

363374

@@ -402,7 +413,9 @@ def _set_token_usage(
402413
if response.get("completion"):
403414
completion_tokens = anthropic.count_tokens(response.get("completion"))
404415
elif response.get("content"):
405-
completion_tokens = anthropic.count_tokens(response.get("content")[0].text)
416+
completion_tokens = anthropic.count_tokens(
417+
response.get("content")[0].text
418+
)
406419

407420
if token_histogram and type(completion_tokens) is int and completion_tokens >= 0:
408421
token_histogram.record(
@@ -440,7 +453,9 @@ def _set_token_usage(
440453
span, SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, cache_read_tokens
441454
)
442455
set_span_attribute(
443-
span, SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS, cache_creation_tokens
456+
span,
457+
SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS,
458+
cache_creation_tokens,
444459
)
445460

446461

packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import json
23
import os
34
import logging
45
import threading
@@ -133,3 +134,19 @@ def run_async(method):
133134
thread.join()
134135
else:
135136
asyncio.run(method)
137+
138+
139+
class JSONEncoder(json.JSONEncoder):
140+
def default(self, o):
141+
if hasattr(o, "to_json"):
142+
return o.to_json()
143+
144+
if hasattr(o, "model_dump_json"):
145+
return o.model_dump_json()
146+
147+
try:
148+
return str(o)
149+
except Exception:
150+
logger = logging.getLogger(__name__)
151+
logger.debug("Failed to serialize object of type: %s", type(o).__name__)
152+
return ""

packages/opentelemetry-instrumentation-anthropic/tests/test_messages.py

Lines changed: 86 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ def test_anthropic_message_create(exporter, reader):
5353
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
5454
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
5555
)
56-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01TPXhkPo8jy6yQMrMhjpiAE"
56+
assert (
57+
anthropic_span.attributes.get("gen_ai.response.id")
58+
== "msg_01TPXhkPo8jy6yQMrMhjpiAE"
59+
)
5760

5861
metrics_data = reader.get_metrics_data()
5962
resource_metrics = metrics_data.resource_metrics
@@ -120,7 +123,10 @@ def test_anthropic_multi_modal(exporter):
120123
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
121124
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
122125
)
123-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01B37ySLPzYj8KY6uZmiPoxd"
126+
assert (
127+
anthropic_span.attributes.get("gen_ai.response.id")
128+
== "msg_01B37ySLPzYj8KY6uZmiPoxd"
129+
)
124130

125131

126132
@pytest.mark.vcr
@@ -184,7 +190,10 @@ async def test_anthropic_async_multi_modal(exporter):
184190
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
185191
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
186192
)
187-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01DWnmUo9hWk4Fk7V7Ddfa2w"
193+
assert (
194+
anthropic_span.attributes.get("gen_ai.response.id")
195+
== "msg_01DWnmUo9hWk4Fk7V7Ddfa2w"
196+
)
188197

189198

190199
@pytest.mark.vcr
@@ -237,7 +246,10 @@ def test_anthropic_message_streaming(exporter, reader):
237246
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
238247
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
239248
)
240-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01MXWxhWoPSgrYhjTuMDM6F1"
249+
assert (
250+
anthropic_span.attributes.get("gen_ai.response.id")
251+
== "msg_01MXWxhWoPSgrYhjTuMDM6F1"
252+
)
241253

242254
metrics_data = reader.get_metrics_data()
243255
resource_metrics = metrics_data.resource_metrics
@@ -290,7 +302,10 @@ async def test_async_anthropic_message_create(exporter, reader):
290302
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
291303
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
292304
)
293-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01UFDDjsFn5BPQnfNwmsMnAY"
305+
assert (
306+
anthropic_span.attributes.get("gen_ai.response.id")
307+
== "msg_01UFDDjsFn5BPQnfNwmsMnAY"
308+
)
294309

295310
metrics_data = reader.get_metrics_data()
296311
resource_metrics = metrics_data.resource_metrics
@@ -349,7 +364,10 @@ async def test_async_anthropic_message_streaming(exporter, reader):
349364
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
350365
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
351366
)
352-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_016o6A7zDmgjucf5mWv1rrPD"
367+
assert (
368+
anthropic_span.attributes.get("gen_ai.response.id")
369+
== "msg_016o6A7zDmgjucf5mWv1rrPD"
370+
)
353371

354372
metrics_data = reader.get_metrics_data()
355373
resource_metrics = metrics_data.resource_metrics
@@ -371,16 +389,16 @@ def test_anthropic_tools(exporter, reader):
371389
"properties": {
372390
"location": {
373391
"type": "string",
374-
"description": "The city and state, e.g. San Francisco, CA"
392+
"description": "The city and state, e.g. San Francisco, CA",
375393
},
376394
"unit": {
377395
"type": "string",
378396
"enum": ["celsius", "fahrenheit"],
379-
"description": "The unit of temperature, either 'celsius' or 'fahrenheit'"
380-
}
397+
"description": "The unit of temperature, either 'celsius' or 'fahrenheit'",
398+
},
381399
},
382-
"required": ["location"]
383-
}
400+
"required": ["location"],
401+
},
384402
},
385403
{
386404
"name": "get_time",
@@ -390,19 +408,19 @@ def test_anthropic_tools(exporter, reader):
390408
"properties": {
391409
"timezone": {
392410
"type": "string",
393-
"description": "The IANA time zone name, e.g. America/Los_Angeles"
411+
"description": "The IANA time zone name, e.g. America/Los_Angeles",
394412
}
395413
},
396-
"required": ["timezone"]
397-
}
398-
}
414+
"required": ["timezone"],
415+
},
416+
},
399417
],
400418
messages=[
401419
{
402420
"role": "user",
403-
"content": "What is the weather like right now in New York? Also what time is it there now?"
421+
"content": "What is the weather like right now in New York? Also what time is it there now?",
404422
}
405-
]
423+
],
406424
)
407425
try:
408426
client.messages.create(
@@ -428,69 +446,92 @@ def test_anthropic_tools(exporter, reader):
428446

429447
# verify request and inputs
430448
assert (
431-
anthropic_span.attributes["gen_ai.prompt.0.content"] ==
432-
"What is the weather like right now in New York? Also what time is it there now?"
449+
anthropic_span.attributes["gen_ai.prompt.0.content"]
450+
== "What is the weather like right now in New York? Also what time is it there now?"
433451
)
434452
assert anthropic_span.attributes["gen_ai.prompt.0.role"] == "user"
435-
assert (
436-
anthropic_span.attributes["llm.request.functions.0.name"] == "get_weather"
437-
)
453+
assert anthropic_span.attributes["llm.request.functions.0.name"] == "get_weather"
438454
assert (
439455
anthropic_span.attributes["llm.request.functions.0.description"]
440456
== "Get the current weather in a given location"
441457
)
442-
assert (
443-
anthropic_span.attributes["llm.request.functions.0.input_schema"]
444-
== json.dumps({
458+
assert anthropic_span.attributes[
459+
"llm.request.functions.0.parameters"
460+
] == json.dumps(
461+
{
445462
"type": "object",
446463
"properties": {
447464
"location": {
448465
"type": "string",
449-
"description": "The city and state, e.g. San Francisco, CA"
466+
"description": "The city and state, e.g. San Francisco, CA",
450467
},
451468
"unit": {
452469
"type": "string",
453470
"enum": ["celsius", "fahrenheit"],
454-
"description": "The unit of temperature, either 'celsius' or 'fahrenheit'"
455-
}
471+
"description": "The unit of temperature, either 'celsius' or 'fahrenheit'",
472+
},
456473
},
457-
"required": ["location"]
458-
})
474+
"required": ["location"],
475+
}
459476
)
460477
assert anthropic_span.attributes["llm.request.functions.1.name"] == "get_time"
461478
assert (
462479
anthropic_span.attributes["llm.request.functions.1.description"]
463480
== "Get the current time in a given time zone"
464481
)
465-
assert (
466-
anthropic_span.attributes["llm.request.functions.1.input_schema"]
467-
== json.dumps({
482+
assert anthropic_span.attributes[
483+
"llm.request.functions.1.parameters"
484+
] == json.dumps(
485+
{
468486
"type": "object",
469487
"properties": {
470488
"timezone": {
471489
"type": "string",
472-
"description": "The IANA time zone name, e.g. America/Los_Angeles"
490+
"description": "The IANA time zone name, e.g. America/Los_Angeles",
473491
}
474492
},
475-
"required": ["timezone"]
476-
})
493+
"required": ["timezone"],
494+
}
477495
)
478496

479497
# verify response and output
480-
assert anthropic_span.attributes["gen_ai.completion.0.finish_reason"] == response.stop_reason
481-
assert anthropic_span.attributes["gen_ai.completion.0.content"] == response.content[0].text
498+
assert (
499+
anthropic_span.attributes["gen_ai.completion.0.finish_reason"]
500+
== response.stop_reason
501+
)
502+
assert (
503+
anthropic_span.attributes["gen_ai.completion.0.content"]
504+
== response.content[0].text
505+
)
482506
assert anthropic_span.attributes["gen_ai.completion.0.role"] == "assistant"
483507

484-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.id"]) == response.content[1].id
485-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.name"]) == response.content[1].name
508+
assert (
509+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.id"]
510+
) == response.content[1].id
511+
assert (
512+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.name"]
513+
) == response.content[1].name
486514
response_input = json.dumps(response.content[1].input)
487-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.arguments"] == response_input)
515+
assert (
516+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.0.arguments"]
517+
== response_input
518+
)
488519

489-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.id"]) == response.content[2].id
490-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]) == response.content[2].name
520+
assert (
521+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.id"]
522+
) == response.content[2].id
523+
assert (
524+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]
525+
) == response.content[2].name
491526
response_input = json.dumps(response.content[2].input)
492-
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"] == response_input)
493-
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01RBkXFe9TmDNNWThMz2HmGt"
527+
assert (
528+
anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"]
529+
== response_input
530+
)
531+
assert (
532+
anthropic_span.attributes.get("gen_ai.response.id")
533+
== "msg_01RBkXFe9TmDNNWThMz2HmGt"
534+
)
494535

495536
# verify metrics
496537
metrics_data = reader.get_metrics_data()

0 commit comments

Comments
 (0)