feat(client): support completions streaming structured outputs (#528)

damo · stainless-app[bot] · commit 1102eadbbdbe · 2025-07-15T14:51:34.000Z
* structured-streaming-495: updates, example and docs for streaming with structured outputs.

* structured-streaming-495: improved accumulator API, partial support for Responses API.

* structured-streaming-495: added links to SDK docs.
diff --git a/README.md b/README.md
@@ -508,6 +508,31 @@ the latter when `ResponseCreateParams.Builder.text(Class<T>)` is called.
 For a full example of the usage of _Structured Outputs_ with the Responses API, see
 [`ResponsesStructuredOutputsExample`](openai-java-example/src/main/java/com/openai/example/ResponsesStructuredOutputsExample.java).
 
+### Usage with streaming
+
+_Structured Outputs_ can also be used with [Streaming](#streaming) and the Chat Completions API. As
+responses are returned in "chunks", the full response must first be accumulated to concatenate the
+JSON strings that can then be converted into instances of the arbitrary Java class. Normal streaming
+operations can be performed while accumulating the JSON strings.
+
+Use the [`ChatCompletionAccumulator`](openai-java-core/src/main/kotlin/com/openai/helpers/ChatCompletionAccumulator.kt)
+as described in the section on [Streaming helpers](#streaming-helpers) to accumulate the JSON
+strings. Once accumulated, use `ChatCompletionAccumulator.chatCompletion(Class<T>)` to convert the
+accumulated `ChatCompletion` into a
+[`StructuredChatCompletion`](openai-java-core/src/main/kotlin/com/openai/models/chat/completions/StructuredChatCompletion.kt).
+The `StructuredChatCompletion` can then automatically deserialize the JSON strings into instances of
+your Java class.
+
+For a full example of the usage of _Structured Outputs_ with Streaming and the Chat Completions API,
+see
+[`StructuredOutputsStreamingExample`](openai-java-example/src/main/java/com/openai/example/StructuredOutputsStreamingExample.java).
+
+At present, there is no accumulator for streaming responses using the Responses API. It is still
+possible to derive a JSON schema from a Java class and create a streaming response for a
+[`StructuredResponseCreateParams`](openai-java-core/src/main/kotlin/com/openai/models/responses/StructuredResponseCreateParams.kt)
+object, but there is no helper for deserialization of the response to an instance of that Java
+class.
+
 ### Defining JSON schema properties
 
 When a JSON schema is derived from your Java classes, all properties represented by `public` fields
@@ -594,13 +619,13 @@ import io.swagger.v3.oas.annotations.media.ArraySchema;
 class Article {
     @ArraySchema(minItems = 1, maxItems = 10)
     public List<String> authors;
- 
+
     @Schema(pattern = "^[A-Za-z ]+$")
     public String title;
-    
+
     @Schema(format = "date")
     public String publicationDate;
-    
+
     @Schema(minimum = "1")
     public int pageCount;
 }
diff --git a/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt b/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt
@@ -218,8 +218,8 @@ internal fun extractSchema(type: Class<*>): ObjectNode {
 }
 
 /**
- * Creates an instance of a Java class using data from a JSON. The JSON data should conform to the
- * JSON schema previously extracted from the Java class.
+ * Creates an instance of a Java class using data from a JSON string. The JSON data should conform
+ * to the JSON schema previously extracted from the Java class.
  *
  * @throws OpenAIInvalidDataException If the JSON data cannot be parsed to an instance of the
  *   [responseType] class.
diff --git a/openai-java-core/src/main/kotlin/com/openai/helpers/ChatCompletionAccumulator.kt b/openai-java-core/src/main/kotlin/com/openai/helpers/ChatCompletionAccumulator.kt
@@ -7,6 +7,7 @@ import com.openai.models.chat.completions.ChatCompletion
 import com.openai.models.chat.completions.ChatCompletionChunk
 import com.openai.models.chat.completions.ChatCompletionMessage
 import com.openai.models.chat.completions.ChatCompletionMessageToolCall
+import com.openai.models.chat.completions.StructuredChatCompletion
 import java.util.Optional
 import kotlin.jvm.optionals.getOrNull
 
@@ -122,6 +123,23 @@ class ChatCompletionAccumulator private constructor() {
     fun chatCompletion(): ChatCompletion =
         checkNotNull(chatCompletion) { "Final chat completion chunk(s) not yet received." }
 
+    /**
+     * Gets the final accumulated chat completion with support for structured outputs. Until the
+     * last chunk has been accumulated, a [StructuredChatCompletion] will not be available. Wait
+     * until all chunks have been handled by [accumulate] before calling this method. See that
+     * method for more details on how the last chunk is detected. See the SDK documentation on
+     * _Structured Outputs_ for more details and example code.
+     *
+     * @param responseType The Java class from which the JSON schema in the request was derived. The
+     *   output JSON conforming to that schema can be converted automatically back to an instance of
+     *   that Java class by the [StructuredChatCompletion].
+     * @throws IllegalStateException If called before the last chunk has been accumulated.
+     * @throws OpenAIInvalidDataException If the JSON data cannot be parsed to an instance of the
+     *   [responseType] class.
+     */
+    fun <T : Any> chatCompletion(responseType: Class<T>) =
+        StructuredChatCompletion(responseType, chatCompletion())
+
     /**
      * Accumulates a streamed chunk and uses it to construct a [ChatCompletion]. When all chunks
      * have been accumulated, the chat completion can be retrieved by calling [chatCompletion].
diff --git a/openai-java-core/src/main/kotlin/com/openai/services/blocking/ResponseService.kt b/openai-java-core/src/main/kotlin/com/openai/services/blocking/ResponseService.kt
@@ -116,6 +116,26 @@ interface ResponseService {
     fun createStreaming(requestOptions: RequestOptions): StreamResponse<ResponseStreamEvent> =
         createStreaming(ResponseCreateParams.none(), requestOptions)
 
+    /**
+     * Creates a streaming model response for the given response conversation. The input parameters
+     * can define a JSON schema derived automatically from an arbitrary class to request a
+     * structured output in JSON form. However, that structured output is split over multiple
+     * streamed events, so it will not be deserialized automatically into an instance of that class.
+     * See the [SDK documentation](https://github.com/openai/openai-java/#usage-with-streaming) for
+     * full details.
+     */
+    @MustBeClosed
+    fun createStreaming(
+        params: StructuredResponseCreateParams<*>
+    ): StreamResponse<ResponseStreamEvent> = createStreaming(params, RequestOptions.none())
+
+    /** @see [createStreaming] */
+    @MustBeClosed
+    fun createStreaming(
+        params: StructuredResponseCreateParams<*>,
+        requestOptions: RequestOptions = RequestOptions.none(),
+    ): StreamResponse<ResponseStreamEvent> = createStreaming(params.rawParams, requestOptions)
+
     /** Retrieves a model response with the given ID. */
     fun retrieve(responseId: String): Response = retrieve(responseId, ResponseRetrieveParams.none())
 
diff --git a/openai-java-core/src/main/kotlin/com/openai/services/blocking/chat/ChatCompletionService.kt b/openai-java-core/src/main/kotlin/com/openai/services/blocking/chat/ChatCompletionService.kt
@@ -117,6 +117,28 @@ interface ChatCompletionService {
         requestOptions: RequestOptions = RequestOptions.none(),
     ): StreamResponse<ChatCompletionChunk>
 
+    /**
+     * Creates a streaming model response for the given chat conversation. The input parameters can
+     * define a JSON schema derived automatically from an arbitrary class to request a structured
+     * output in JSON form. However, that structured output is split over multiple streamed events,
+     * so it will not be deserialized automatically into an instance of that class. To deserialize
+     * the output, first use a helper class to accumulate the stream of events into a single output
+     * value. See the
+     * [SDK documentation](https://github.com/openai/openai-java/#usage-with-streaming) for full
+     * details.
+     */
+    @MustBeClosed
+    fun createStreaming(
+        params: StructuredChatCompletionCreateParams<*>
+    ): StreamResponse<ChatCompletionChunk> = createStreaming(params, RequestOptions.none())
+
+    /** @see [createStreaming] */
+    @MustBeClosed
+    fun createStreaming(
+        params: StructuredChatCompletionCreateParams<*>,
+        requestOptions: RequestOptions = RequestOptions.none(),
+    ): StreamResponse<ChatCompletionChunk> = createStreaming(params.rawParams, requestOptions)
+
     /**
      * Get a stored chat completion. Only Chat Completions that have been created with the `store`
      * parameter set to `true` will be returned.
diff --git a/openai-java-example/src/main/java/com/openai/example/StructuredOutputsStreamingExample.java b/openai-java-example/src/main/java/com/openai/example/StructuredOutputsStreamingExample.java
@@ -0,0 +1,91 @@
+package com.openai.example;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonPropertyDescription;
+import com.openai.client.OpenAIClient;
+import com.openai.client.okhttp.OpenAIOkHttpClient;
+import com.openai.core.http.StreamResponse;
+import com.openai.helpers.ChatCompletionAccumulator;
+import com.openai.models.ChatModel;
+import com.openai.models.chat.completions.ChatCompletionChunk;
+import com.openai.models.chat.completions.ChatCompletionCreateParams;
+import com.openai.models.chat.completions.StructuredChatCompletionCreateParams;
+import io.swagger.v3.oas.annotations.media.ArraySchema;
+import io.swagger.v3.oas.annotations.media.Schema;
+import java.util.List;
+
+public final class StructuredOutputsStreamingExample {
+
+    public static class Person {
+        @JsonPropertyDescription("The first name and surname of the person.")
+        public String name;
+
+        public int birthYear;
+
+        @JsonPropertyDescription("The year the person died, or 'present' if the person is living.")
+        public String deathYear;
+
+        @Override
+        public String toString() {
+            return name + " (" + birthYear + '-' + deathYear + ')';
+        }
+    }
+
+    public static class Book {
+        public String title;
+
+        public Person author;
+
+        @JsonPropertyDescription("The year in which the book was first published.")
+        @Schema(minimum = "1500")
+        public int publicationYear;
+
+        public String genre;
+
+        @JsonIgnore
+        public String isbn;
+
+        @Override
+        public String toString() {
+            return '"' + title + "\" (" + publicationYear + ") [" + genre + "] by " + author;
+        }
+    }
+
+    public static class BookList {
+        @ArraySchema(maxItems = 100)
+        public List<Book> books;
+    }
+
+    private StructuredOutputsStreamingExample() {}
+
+    public static void main(String[] args) {
+        // Configures using one of:
+        // - The `OPENAI_API_KEY` environment variable
+        // - The `OPENAI_BASE_URL` and `AZURE_OPENAI_KEY` environment variables
+        OpenAIClient client = OpenAIOkHttpClient.fromEnv();
+
+        StructuredChatCompletionCreateParams<BookList> createParams = ChatCompletionCreateParams.builder()
+                .model(ChatModel.GPT_4O_MINI)
+                .maxCompletionTokens(2048)
+                .responseFormat(BookList.class)
+                .addUserMessage("List some famous late twentieth century novels.")
+                .build();
+
+        ChatCompletionAccumulator accumulator = ChatCompletionAccumulator.create();
+
+        try (StreamResponse<ChatCompletionChunk> streamResponse =
+                client.chat().completions().createStreaming(createParams)) {
+            streamResponse.stream()
+                    .peek(accumulator::accumulate)
+                    .flatMap(completion -> completion.choices().stream())
+                    .flatMap(choice -> choice.delta().content().stream())
+                    .forEach(System.out::print);
+            System.out.println();
+        }
+
+        accumulator.chatCompletion(BookList.class).choices().stream()
+                .flatMap(choice -> choice.message().content().stream())
+                .flatMap(bookList -> bookList.books.stream())
+                .forEach(book -> System.out.println(" - " + book));
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -218,8 +218,8 @@ internal fun extractSchema(type: Class<*>): ObjectNode {`
`218`	`218`	`}`
`219`	`219`
`220`	`220`	`/**`
`221`		`- * Creates an instance of a Java class using data from a JSON. The JSON data should conform to the`
`222`		`- * JSON schema previously extracted from the Java class.`
	`221`	`+ * Creates an instance of a Java class using data from a JSON string. The JSON data should conform`
	`222`	`+ * to the JSON schema previously extracted from the Java class.`
`223`	`223`	`*`
`224`	`224`	`* @throws OpenAIInvalidDataException If the JSON data cannot be parsed to an instance of the`
`225`	`225`	`* [responseType] class.`