Skip to content

Commit ac39251

Browse files
committed
reasoning
1 parent 23c1a09 commit ac39251

File tree

4 files changed

+250
-27
lines changed

4 files changed

+250
-27
lines changed

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/main/java/datadog/trace/instrumentation/openai_java/OpenAiDecorator.java

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.openai.core.http.Headers;
66
import com.openai.core.http.HttpResponse;
77
import com.openai.helpers.ChatCompletionAccumulator;
8+
import com.openai.models.Reasoning;
89
import com.openai.models.ResponsesModel;
910
import com.openai.models.chat.completions.ChatCompletion;
1011
import com.openai.models.chat.completions.ChatCompletionChunk;
@@ -24,7 +25,9 @@
2425
import com.openai.models.responses.ResponseOutputItem;
2526
import com.openai.models.responses.ResponseOutputMessage;
2627
import com.openai.models.responses.ResponseOutputText;
28+
import com.openai.models.responses.ResponseReasoningItem;
2729
import com.openai.models.responses.ResponseStreamEvent;
30+
import datadog.json.JsonWriter;
2831
import datadog.trace.api.DDSpanId;
2932
import datadog.trace.api.llmobs.LLMObs;
3033
import datadog.trace.api.llmobs.LLMObsContext;
@@ -366,21 +369,15 @@ public void withResponseCreateParams(AgentSpan span, ResponseCreateParams params
366369
String modelName = extractResponseModel(params._model());
367370
span.setTag(REQUEST_MODEL, modelName);
368371

369-
// Set model_name and model_provider as fallback (will be overridden by withResponse if called)
370-
// span.setTag("_ml_obs_tag.model_name", modelName);
371-
// span.setTag("_ml_obs_tag.model_provider", "openai");
372-
373372
List<LLMObs.LLMMessage> inputMessages = new ArrayList<>();
374373

375-
// Add instructions as system message first (if present)
376374
params
377375
.instructions()
378376
.ifPresent(
379377
instructions -> {
380378
inputMessages.add(LLMObs.LLMMessage.from("system", instructions));
381379
});
382380

383-
// Add user input message
384381
Optional<String> textOpt = params._input().asString();
385382
if (textOpt.isPresent()) {
386383
inputMessages.add(LLMObs.LLMMessage.from("user", textOpt.get()));
@@ -389,6 +386,43 @@ public void withResponseCreateParams(AgentSpan span, ResponseCreateParams params
389386
if (!inputMessages.isEmpty()) {
390387
span.setTag("_ml_obs_tag.input", inputMessages);
391388
}
389+
390+
extractReasoningFromParams(params)
391+
.ifPresent(reasoningMap -> span.setTag("_ml_obs_request.reasoning", reasoningMap));
392+
}
393+
394+
private Optional<Map<String, String>> extractReasoningFromParams(ResponseCreateParams params) {
395+
com.openai.core.JsonField<Reasoning> reasoningField = params._reasoning();
396+
if (reasoningField.isMissing()) {
397+
return Optional.empty();
398+
}
399+
400+
Map<String, String> reasoningMap = new HashMap<>();
401+
402+
Optional<Reasoning> knownReasoning = reasoningField.asKnown();
403+
if (knownReasoning.isPresent()) {
404+
Reasoning reasoning = knownReasoning.get();
405+
reasoning.effort().ifPresent(effort -> reasoningMap.put("effort", effort.asString()));
406+
reasoning.summary().ifPresent(summary -> reasoningMap.put("summary", summary.asString()));
407+
} else {
408+
Optional<Map<String, com.openai.core.JsonValue>> rawObject = reasoningField.asObject();
409+
if (rawObject.isPresent()) {
410+
Map<String, com.openai.core.JsonValue> obj = rawObject.get();
411+
com.openai.core.JsonValue effortVal = obj.get("effort");
412+
if (effortVal != null) {
413+
effortVal.asString().ifPresent(v -> reasoningMap.put("effort", String.valueOf(v)));
414+
}
415+
com.openai.core.JsonValue summaryVal = obj.get("summary");
416+
if (summaryVal == null) {
417+
summaryVal = obj.get("generate_summary");
418+
}
419+
if (summaryVal != null) {
420+
summaryVal.asString().ifPresent(v -> reasoningMap.put("summary", String.valueOf(v)));
421+
}
422+
}
423+
}
424+
425+
return reasoningMap.isEmpty() ? Optional.empty() : Optional.of(reasoningMap);
392426
}
393427

394428
public void withResponse(AgentSpan span, Response response) {
@@ -423,11 +457,15 @@ private void withResponse(AgentSpan span, Response response, boolean stream) {
423457

424458
Map<String, Object> metadata = new HashMap<>();
425459

460+
Object reasoningTag = span.getTag("_ml_obs_request.reasoning");
461+
if (reasoningTag != null) {
462+
metadata.put("reasoning", reasoningTag);
463+
}
464+
426465
response.maxOutputTokens().ifPresent(v -> metadata.put("max_output_tokens", v));
427466
response.temperature().ifPresent(v -> metadata.put("temperature", v));
428467
response.topP().ifPresent(v -> metadata.put("top_p", v));
429468

430-
// Extract tool_choice as string
431469
Response.ToolChoice toolChoice = response.toolChoice();
432470
if (toolChoice.isOptions()) {
433471
metadata.put("tool_choice", toolChoice.asOptions()._value().asString().orElse(null));
@@ -437,14 +475,12 @@ private void withResponse(AgentSpan span, Response response, boolean stream) {
437475
metadata.put("tool_choice", "function");
438476
}
439477

440-
// Extract truncation as string
441478
response
442479
.truncation()
443480
.ifPresent(
444481
(Response.Truncation t) ->
445482
metadata.put("truncation", t._value().asString().orElse(null)));
446483

447-
// Extract text format
448484
response
449485
.text()
450486
.ifPresent(
@@ -491,24 +527,35 @@ private void withResponse(AgentSpan span, Response response, boolean stream) {
491527

492528
private List<LLMObs.LLMMessage> extractResponseOutputMessages(List<ResponseOutputItem> output) {
493529
List<LLMObs.LLMMessage> messages = new ArrayList<>();
494-
List<LLMObs.ToolCall> toolCalls = new ArrayList<>();
495-
String textContent = null;
496530

497531
for (ResponseOutputItem item : output) {
498532
if (item.isFunctionCall()) {
499533
ResponseFunctionToolCall functionCall = item.asFunctionCall();
500534
LLMObs.ToolCall toolCall = ToolCallExtractor.getToolCall(functionCall);
501535
if (toolCall != null) {
502-
toolCalls.add(toolCall);
536+
List<LLMObs.ToolCall> toolCalls = Collections.singletonList(toolCall);
537+
messages.add(LLMObs.LLMMessage.from("assistant", null, toolCalls));
503538
}
504539
} else if (item.isMessage()) {
505540
ResponseOutputMessage message = item.asMessage();
506-
textContent = extractMessageContent(message);
541+
String textContent = extractMessageContent(message);
542+
Optional<String> roleOpt = message._role().asString();
543+
String role = roleOpt.orElse("assistant");
544+
messages.add(LLMObs.LLMMessage.from(role, textContent));
545+
} else if (item.isReasoning()) {
546+
ResponseReasoningItem reasoning = item.asReasoning();
547+
try (JsonWriter writer = new JsonWriter()) {
548+
writer.beginObject();
549+
if (!reasoning.summary().isEmpty()) {
550+
writer.name("summary").value(reasoning.summary().get(0).text());
551+
}
552+
reasoning.encryptedContent().ifPresent(v -> writer.name("encrypted_content").value(v));
553+
writer.name("id").value(reasoning.id());
554+
writer.endObject();
555+
messages.add(LLMObs.LLMMessage.from("reasoning", writer.toString()));
556+
}
507557
}
508558
}
509-
510-
messages.add(LLMObs.LLMMessage.from("assistant", textContent, toolCalls));
511-
512559
return messages;
513560
}
514561

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/test/groovy/OpenAiTest.groovy

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ import com.openai.core.JsonValue
99
import com.openai.models.ChatModel
1010
import com.openai.models.FunctionDefinition
1111
import com.openai.models.FunctionParameters
12+
import com.openai.models.Reasoning
13+
import com.openai.models.ReasoningEffort
1214
import com.openai.models.chat.completions.ChatCompletionCreateParams
1315
import com.openai.models.chat.completions.ChatCompletionFunctionTool
1416
import com.openai.models.completions.CompletionCreateParams
1517
import com.openai.models.embeddings.EmbeddingCreateParams
1618
import com.openai.models.embeddings.EmbeddingModel
1719
import com.openai.models.responses.ResponseCreateParams
20+
import com.openai.models.responses.ResponseIncludable
1821
import datadog.trace.agent.test.server.http.TestHttpServer
1922
import datadog.trace.core.util.LRUCache
2023
import datadog.trace.llmobs.LlmObsSpecification
@@ -146,6 +149,23 @@ abstract class OpenAiTest extends LlmObsSpecification {
146149
.build()
147150
}
148151

152+
ResponseCreateParams responseCreateParamsWithReasoning(boolean json) {
153+
if (json) {
154+
return ResponseCreateParams.builder()
155+
.model("o4-mini")
156+
.input("If one plus a number is 10, what is the number?")
157+
.include(Collections.singletonList(ResponseIncludable.REASONING_ENCRYPTED_CONTENT)) // TODO "include":["reasoning.encrypted_content"]
158+
.reasoning(JsonValue.from([effort: "medium", summary: "detailed"]))
159+
.build()
160+
}
161+
return ResponseCreateParams.builder()
162+
.model("o4-mini")
163+
.input("If one plus a number is 10, what is the number?")
164+
.include(Collections.singletonList(ResponseIncludable.REASONING_ENCRYPTED_CONTENT))
165+
.reasoning(Reasoning.builder().effort(ReasoningEffort.MEDIUM).summary(Reasoning.Summary.DETAILED).build())
166+
.build()
167+
}
168+
149169
ChatCompletionCreateParams chatCompletionCreateParamsWithTools() {
150170
ChatCompletionCreateParams.builder()
151171
.model(ChatModel.GPT_4O_MINI)

dd-java-agent/instrumentation/openai-java/openai-java-3.0/src/test/groovy/ResponseServiceTest.groovy

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class ResponseServiceTest extends OpenAiTest {
2222
expect:
2323
resp != null
2424
and:
25-
assertResponseTrace(false)
25+
assertResponseTrace(false, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
2626
}
2727

2828
def "create response test withRawResponse"() {
@@ -34,7 +34,7 @@ class ResponseServiceTest extends OpenAiTest {
3434
resp.statusCode() == 200
3535
resp.parse().valid // force response parsing, so it sets all the tags
3636
and:
37-
assertResponseTrace(false)
37+
assertResponseTrace(false, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
3838
}
3939

4040
def "create streaming response test (#scenario)"() {
@@ -48,14 +48,31 @@ class ResponseServiceTest extends OpenAiTest {
4848
}
4949
5050
expect:
51-
assertResponseTrace(true)
51+
assertResponseTrace(true, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
5252
5353
where:
5454
scenario | params
5555
"complete" | responseCreateParams()
5656
"incomplete" | responseCreateParamsWithMaxOutputTokens()
5757
}
5858
59+
def "create streaming response test (reasoning)"() {
60+
runnableUnderTrace("parent") {
61+
StreamResponse<ResponseStreamEvent> streamResponse = openAiClient.responses().createStreaming(responseCreateParams)
62+
try (Stream stream = streamResponse.stream()) {
63+
stream.forEach {
64+
// consume the stream
65+
}
66+
}
67+
}
68+
69+
expect:
70+
assertResponseTrace(true, "o4-mini", "o4-mini-2025-04-16", [effort: "medium", summary: "detailed"])
71+
72+
where:
73+
responseCreateParams << [responseCreateParamsWithReasoning(false), responseCreateParamsWithReasoning(true)]
74+
}
75+
5976
def "create streaming response test withRawResponse"() {
6077
runnableUnderTrace("parent") {
6178
HttpResponseFor<StreamResponse<ResponseStreamEvent>> streamResponse = openAiClient.responses().withRawResponse().createStreaming(responseCreateParams())
@@ -67,7 +84,7 @@ class ResponseServiceTest extends OpenAiTest {
6784
}
6885
6986
expect:
70-
assertResponseTrace(true)
87+
assertResponseTrace(true, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
7188
}
7289
7390
def "create async response test"() {
@@ -78,7 +95,7 @@ class ResponseServiceTest extends OpenAiTest {
7895
responseFuture.get()
7996
8097
expect:
81-
assertResponseTrace(false)
98+
assertResponseTrace(false, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
8299
}
83100
84101
def "create async response test withRawResponse"() {
@@ -90,7 +107,7 @@ class ResponseServiceTest extends OpenAiTest {
90107
resp.parse().valid // force response parsing, so it sets all the tags
91108
92109
expect:
93-
assertResponseTrace(false)
110+
assertResponseTrace(false, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
94111
}
95112
96113
def "create streaming async response test"() {
@@ -102,7 +119,7 @@ class ResponseServiceTest extends OpenAiTest {
102119
}
103120
asyncResp.onCompleteFuture().get()
104121
expect:
105-
assertResponseTrace(true)
122+
assertResponseTrace(true, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
106123
}
107124
108125
def "create streaming async response test withRawResponse"() {
@@ -117,10 +134,10 @@ class ResponseServiceTest extends OpenAiTest {
117134
}
118135
expect:
119136
resp.statusCode() == 200
120-
assertResponseTrace(true)
137+
assertResponseTrace(true, "gpt-3.5-turbo", "gpt-3.5-turbo-0125", null)
121138
}
122139
123-
private void assertResponseTrace(boolean isStreaming) {
140+
private void assertResponseTrace(boolean isStreaming, String reqModel, String respModel, Map reasoning) {
124141
assertTraces(1) {
125142
trace(3) {
126143
sortSpansByStart()
@@ -146,18 +163,21 @@ class ResponseServiceTest extends OpenAiTest {
146163
"_ml_obs_metric.total_tokens" Long
147164
"_ml_obs_metric.cache_read_input_tokens" Long
148165
"_ml_obs_tag.parent_id" "undefined"
166+
if (reasoning != null) {
167+
"_ml_obs_request.reasoning" reasoning
168+
}
149169
"openai.request.method" "POST"
150170
"openai.request.endpoint" "v1/responses"
151171
"openai.api_base" openAiBaseApi
152-
"$OpenAiDecorator.RESPONSE_MODEL" "gpt-3.5-turbo-0125"
172+
"$OpenAiDecorator.RESPONSE_MODEL" respModel
153173
if (!isStreaming) {
154174
"openai.organization.ratelimit.requests.limit" 10000
155175
"openai.organization.ratelimit.requests.remaining" Integer
156176
"openai.organization.ratelimit.tokens.limit" 50000000
157177
"openai.organization.ratelimit.tokens.remaining" Integer
158178
}
159179
"$OpenAiDecorator.OPENAI_ORGANIZATION_NAME" "datadog-staging"
160-
"$OpenAiDecorator.REQUEST_MODEL" "gpt-3.5-turbo"
180+
"$OpenAiDecorator.REQUEST_MODEL" reqModel
161181
"$Tags.COMPONENT" "openai"
162182
"$Tags.SPAN_KIND" Tags.SPAN_KIND_CLIENT
163183
defaultTags()

0 commit comments

Comments
 (0)