Skip to content

Commit 1e0ab8f

Browse files
committed
feat: add OpenAI gpt-image-1 and gpt-image-1-mini models with new parameters
Add support for OpenAI's new GPT Image models (gpt-image-1 and gpt-image-1-mini) with all new model-specific parameters according to the official API specification. Changes: - Add GPT_IMAGE_1 and GPT_IMAGE_1_MINI to ImageModel enum - Update default image model from DALL_E_3 to GPT_IMAGE_1_MINI - Add 6 new gpt-image-1 specific parameters to OpenAiImageRequest: * background: transparency control (transparent/opaque/auto) * moderation: content moderation level (low/auto) * outputCompression: compression level 0-100% for webp/jpeg * outputFormat: output format (png/jpeg/webp) * partialImages: streaming partial images support (0-3) * stream: enable streaming mode - Update OpenAiImageOptions with new fields, getters/setters, and builder methods - Update documentation to reflect model-specific parameter support - Add comprehensive integration tests (OpenAiImageApiIT) with parameterized tests - Update existing tests to use new default model Breaking Changes: - Default image model changed from DALL_E_3 to GPT_IMAGE_1_MINI - OpenAiImageRequest constructor signature updated with 6 new parameters Reference: https://platform.openai.com/docs/models Signed-off-by: Alexandros Pappas <[email protected]>
1 parent 4bd7d3e commit 1e0ab8f

File tree

6 files changed

+346
-23
lines changed

6 files changed

+346
-23
lines changed

auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class OpenAiImageProperties extends OpenAiParentProperties {
3737

3838
private String imagesPath = DEFAULT_IMAGES_PATH;
3939

40-
public static final String DEFAULT_IMAGE_MODEL = OpenAiImageApi.ImageModel.DALL_E_3.getValue();
40+
public static final String DEFAULT_IMAGE_MODEL = OpenAiImageApi.ImageModel.GPT_IMAGE_1_MINI.getValue();
4141

4242
/**
4343
* Options for OpenAI Image API.

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java

Lines changed: 155 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,27 +67,31 @@ public class OpenAiImageOptions implements ImageOptions {
6767
private Integer height;
6868

6969
/**
70-
* The quality of the image that will be generated. hd creates images with finer
71-
* details and greater consistency across the image. This param is only supported for
72-
* dall-e-3.
70+
* The quality of the image that will be generated. auto (default value) will
71+
* automatically select the best quality for the given model. high, medium and low are
72+
* supported for gpt-image-1. hd and standard are supported for dall-e-3. standard is
73+
* the only option for dall-e-2.
7374
*/
7475
@JsonProperty("quality")
7576
private String quality;
7677

7778
/**
78-
* The format in which the generated images are returned. Must be one of url or
79-
* b64_json.
79+
* The format in which generated images with dall-e-2 and dall-e-3 are returned. Must
80+
* be one of url or b64_json. URLs are only valid for 60 minutes after the image has
81+
* been generated. This parameter isn't supported for gpt-image-1 which will always
82+
* return base64-encoded images.
8083
*/
8184
@JsonProperty("response_format")
8285
private String responseFormat;
8386

8487
/**
85-
* The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for
86-
* dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models.
87-
* This property is automatically computed when both width and height are set,
88-
* following the format "widthxheight". When setting this property directly, it must
89-
* follow the format "WxH" where W and H are valid integers. Invalid formats will
90-
* result in null width and height values.
88+
* The size of the generated images. Must be one of 1024x1024, 1536x1024 (landscape),
89+
* 1024x1536 (portrait), or auto (default value) for gpt-image-1. Must be one of
90+
* 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024,
91+
* or 1024x1792 for dall-e-3. This property is automatically computed when both width
92+
* and height are set, following the format "widthxheight". When setting this property
93+
* directly, it must follow the format "WxH" where W and H are valid integers. Invalid
94+
* formats will result in null width and height values.
9195
*/
9296
@JsonProperty("size")
9397
private String size;
@@ -108,6 +112,52 @@ public class OpenAiImageOptions implements ImageOptions {
108112
@JsonProperty("user")
109113
private String user;
110114

115+
/**
116+
* Allows to set transparency for the background of the generated image(s). This
117+
* parameter is only supported for gpt-image-1. Must be one of transparent, opaque or
118+
* auto (default value). When auto is used, the model will automatically determine the
119+
* best background for the image.
120+
*/
121+
@JsonProperty("background")
122+
private String background;
123+
124+
/**
125+
* Control the content-moderation level for images generated by gpt-image-1. Must be
126+
* either low for less restrictive filtering or auto (default value).
127+
*/
128+
@JsonProperty("moderation")
129+
private String moderation;
130+
131+
/**
132+
* The compression level (0-100%) for the generated images. This parameter is only
133+
* supported for gpt-image-1 with the webp or jpeg output formats, and defaults to
134+
* 100.
135+
*/
136+
@JsonProperty("output_compression")
137+
private Integer outputCompression;
138+
139+
/**
140+
* The format in which the generated images are returned. This parameter is only
141+
* supported for gpt-image-1. Must be one of png, jpeg, or webp.
142+
*/
143+
@JsonProperty("output_format")
144+
private String outputFormat;
145+
146+
/**
147+
* The number of partial images to generate. This parameter is used for streaming
148+
* responses that return partial images. Value must be between 0 and 3. When set to 0,
149+
* the response will be a single image sent in one streaming event.
150+
*/
151+
@JsonProperty("partial_images")
152+
private Integer partialImages;
153+
154+
/**
155+
* Generate the image in streaming mode. Defaults to false. This parameter is only
156+
* supported for gpt-image-1.
157+
*/
158+
@JsonProperty("stream")
159+
private Boolean stream;
160+
111161
public static Builder builder() {
112162
return new Builder();
113163
}
@@ -128,6 +178,12 @@ public static OpenAiImageOptions fromOptions(OpenAiImageOptions fromOptions) {
128178
options.size = fromOptions.size;
129179
options.style = fromOptions.style;
130180
options.user = fromOptions.user;
181+
options.background = fromOptions.background;
182+
options.moderation = fromOptions.moderation;
183+
options.outputCompression = fromOptions.outputCompression;
184+
options.outputFormat = fromOptions.outputFormat;
185+
options.partialImages = fromOptions.partialImages;
186+
options.stream = fromOptions.stream;
131187
return options;
132188
}
133189

@@ -262,6 +318,54 @@ public void setSize(String size) {
262318
}
263319
}
264320

321+
public String getBackground() {
322+
return this.background;
323+
}
324+
325+
public void setBackground(String background) {
326+
this.background = background;
327+
}
328+
329+
public String getModeration() {
330+
return this.moderation;
331+
}
332+
333+
public void setModeration(String moderation) {
334+
this.moderation = moderation;
335+
}
336+
337+
public Integer getOutputCompression() {
338+
return this.outputCompression;
339+
}
340+
341+
public void setOutputCompression(Integer outputCompression) {
342+
this.outputCompression = outputCompression;
343+
}
344+
345+
public String getOutputFormat() {
346+
return this.outputFormat;
347+
}
348+
349+
public void setOutputFormat(String outputFormat) {
350+
this.outputFormat = outputFormat;
351+
}
352+
353+
public Integer getPartialImages() {
354+
return this.partialImages;
355+
}
356+
357+
public void setPartialImages(Integer partialImages) {
358+
this.partialImages = partialImages;
359+
}
360+
361+
public Boolean getStream() {
362+
return this.stream;
363+
}
364+
365+
public void setStream(Boolean stream) {
366+
this.stream = stream;
367+
}
368+
265369
@Override
266370
public boolean equals(Object o) {
267371
if (this == o) {
@@ -274,21 +378,28 @@ public boolean equals(Object o) {
274378
&& Objects.equals(this.width, that.width) && Objects.equals(this.height, that.height)
275379
&& Objects.equals(this.quality, that.quality)
276380
&& Objects.equals(this.responseFormat, that.responseFormat) && Objects.equals(this.size, that.size)
277-
&& Objects.equals(this.style, that.style) && Objects.equals(this.user, that.user);
381+
&& Objects.equals(this.style, that.style) && Objects.equals(this.user, that.user)
382+
&& Objects.equals(this.background, that.background) && Objects.equals(this.moderation, that.moderation)
383+
&& Objects.equals(this.outputCompression, that.outputCompression)
384+
&& Objects.equals(this.outputFormat, that.outputFormat)
385+
&& Objects.equals(this.partialImages, that.partialImages) && Objects.equals(this.stream, that.stream);
278386
}
279387

280388
@Override
281389
public int hashCode() {
282390
return Objects.hash(this.n, this.model, this.width, this.height, this.quality, this.responseFormat, this.size,
283-
this.style, this.user);
391+
this.style, this.user, this.background, this.moderation, this.outputCompression, this.outputFormat,
392+
this.partialImages, this.stream);
284393
}
285394

286395
@Override
287396
public String toString() {
288397
return "OpenAiImageOptions{" + "n=" + this.n + ", model='" + this.model + '\'' + ", width=" + this.width
289398
+ ", height=" + this.height + ", quality='" + this.quality + '\'' + ", responseFormat='"
290399
+ this.responseFormat + '\'' + ", size='" + this.size + '\'' + ", style='" + this.style + '\''
291-
+ ", user='" + this.user + '\'' + '}';
400+
+ ", user='" + this.user + '\'' + ", background='" + this.background + '\'' + ", moderation='"
401+
+ this.moderation + '\'' + ", outputCompression=" + this.outputCompression + ", outputFormat='"
402+
+ this.outputFormat + '\'' + ", partialImages=" + this.partialImages + ", stream=" + this.stream + '}';
292403
}
293404

294405
/**
@@ -351,6 +462,36 @@ public Builder user(String user) {
351462
return this;
352463
}
353464

465+
public Builder background(String background) {
466+
this.options.setBackground(background);
467+
return this;
468+
}
469+
470+
public Builder moderation(String moderation) {
471+
this.options.setModeration(moderation);
472+
return this;
473+
}
474+
475+
public Builder outputCompression(Integer outputCompression) {
476+
this.options.setOutputCompression(outputCompression);
477+
return this;
478+
}
479+
480+
public Builder outputFormat(String outputFormat) {
481+
this.options.setOutputFormat(outputFormat);
482+
return this;
483+
}
484+
485+
public Builder partialImages(Integer partialImages) {
486+
this.options.setPartialImages(partialImages);
487+
return this;
488+
}
489+
490+
public Builder stream(Boolean stream) {
491+
this.options.setStream(stream);
492+
return this;
493+
}
494+
354495
public OpenAiImageOptions build() {
355496
return this.options;
356497
}

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
*/
4646
public class OpenAiImageApi {
4747

48-
public static final String DEFAULT_IMAGE_MODEL = ImageModel.DALL_E_3.getValue();
48+
public static final String DEFAULT_IMAGE_MODEL = ImageModel.GPT_IMAGE_1_MINI.getValue();
4949

5050
private final RestClient restClient;
5151

@@ -99,10 +99,22 @@ public static Builder builder() {
9999

100100
/**
101101
* OpenAI Image API model.
102-
* <a href="https://platform.openai.com/docs/models/dall-e">DALL·E</a>
102+
* <a href="https://platform.openai.com/docs/models">Models</a>
103103
*/
104104
public enum ImageModel {
105105

106+
/**
107+
* Multimodal language model that accepts both text and image inputs, and produces
108+
* image outputs.
109+
*/
110+
GPT_IMAGE_1("gpt-image-1"),
111+
112+
/**
113+
* A cost-efficient version of GPT Image 1. It is a natively multimodal language
114+
* model that accepts both text and image inputs, and produces image outputs.
115+
*/
116+
GPT_IMAGE_1_MINI("gpt-image-1-mini"),
117+
106118
/**
107119
* The latest DALL·E model released in Nov 2023.
108120
*/
@@ -137,10 +149,16 @@ public record OpenAiImageRequest(
137149
@JsonProperty("response_format") String responseFormat,
138150
@JsonProperty("size") String size,
139151
@JsonProperty("style") String style,
140-
@JsonProperty("user") String user) {
152+
@JsonProperty("user") String user,
153+
@JsonProperty("background") String background,
154+
@JsonProperty("moderation") String moderation,
155+
@JsonProperty("output_compression") Integer outputCompression,
156+
@JsonProperty("output_format") String outputFormat,
157+
@JsonProperty("partial_images") Integer partialImages,
158+
@JsonProperty("stream") Boolean stream) {
141159

142160
public OpenAiImageRequest(String prompt, String model) {
143-
this(prompt, model, null, null, null, null, null, null);
161+
this(prompt, model, null, null, null, null, null, null, null, null, null, null, null, null);
144162
}
145163
}
146164

0 commit comments

Comments
 (0)