Skip to content

Commit 0116f29

Browse files
authored
feat: add support for image generation using gpt-image-1 (#971)
* feat: add gpt-image-1 support * feat: add example to generate image using gpt-image-1 model * style: missing period in comments * feat: add missing fields to example * docs: add GPT Image 1 to README * revert: keep `examples/images/main.go` unchanged * docs: remove unnecessary newline from example in README file
1 parent 8ba38f6 commit 0116f29

File tree

3 files changed

+126
-13
lines changed

3 files changed

+126
-13
lines changed

README.md

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This library provides unofficial Go clients for [OpenAI API](https://platform.op
77

88
* ChatGPT 4o, o1
99
* GPT-3, GPT-4
10-
* DALL·E 2, DALL·E 3
10+
* DALL·E 2, DALL·E 3, GPT Image 1
1111
* Whisper
1212

1313
## Installation
@@ -357,6 +357,66 @@ func main() {
357357
```
358358
</details>
359359

360+
<details>
361+
<summary>GPT Image 1 image generation</summary>
362+
363+
```go
364+
package main
365+
366+
import (
367+
"context"
368+
"encoding/base64"
369+
"fmt"
370+
"os"
371+
372+
openai "github.com/sashabaranov/go-openai"
373+
)
374+
375+
func main() {
376+
c := openai.NewClient("your token")
377+
ctx := context.Background()
378+
379+
req := openai.ImageRequest{
380+
Prompt: "Parrot on a skateboard performing a trick. Large bold text \"SKATE MASTER\" banner at the bottom of the image. Cartoon style, natural light, high detail, 1:1 aspect ratio.",
381+
Background: openai.CreateImageBackgroundOpaque,
382+
Model: openai.CreateImageModelGptImage1,
383+
Size: openai.CreateImageSize1024x1024,
384+
N: 1,
385+
Quality: openai.CreateImageQualityLow,
386+
OutputCompression: 100,
387+
OutputFormat: openai.CreateImageOutputFormatJPEG,
388+
// Moderation: openai.CreateImageModerationLow,
389+
// User: "",
390+
}
391+
392+
resp, err := c.CreateImage(ctx, req)
393+
if err != nil {
394+
fmt.Printf("Image creation Image generation with GPT Image 1error: %v\n", err)
395+
return
396+
}
397+
398+
fmt.Println("Image Base64:", resp.Data[0].B64JSON)
399+
400+
// Decode the base64 data
401+
imgBytes, err := base64.StdEncoding.DecodeString(resp.Data[0].B64JSON)
402+
if err != nil {
403+
fmt.Printf("Base64 decode error: %v\n", err)
404+
return
405+
}
406+
407+
// Write image to file
408+
outputPath := "generated_image.jpg"
409+
err = os.WriteFile(outputPath, imgBytes, 0644)
410+
if err != nil {
411+
fmt.Printf("Failed to write image file: %v\n", err)
412+
return
413+
}
414+
415+
fmt.Printf("The image was saved as %s\n", outputPath)
416+
}
417+
```
418+
</details>
419+
360420
<details>
361421
<summary>Configuring proxy</summary>
362422

examples/images/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ func main() {
2525
return
2626
}
2727
fmt.Println(respUrl.Data[0].URL)
28-
}
28+
}

image.go

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,51 +13,101 @@ const (
1313
CreateImageSize256x256 = "256x256"
1414
CreateImageSize512x512 = "512x512"
1515
CreateImageSize1024x1024 = "1024x1024"
16+
1617
// dall-e-3 supported only.
1718
CreateImageSize1792x1024 = "1792x1024"
1819
CreateImageSize1024x1792 = "1024x1792"
20+
21+
// gpt-image-1 supported only.
22+
CreateImageSize1536x1024 = "1536x1024" // Landscape
23+
CreateImageSize1024x1536 = "1024x1536" // Portrait
1924
)
2025

2126
const (
22-
CreateImageResponseFormatURL = "url"
27+
// dall-e-2 and dall-e-3 only.
2328
CreateImageResponseFormatB64JSON = "b64_json"
29+
CreateImageResponseFormatURL = "url"
2430
)
2531

2632
const (
27-
CreateImageModelDallE2 = "dall-e-2"
28-
CreateImageModelDallE3 = "dall-e-3"
33+
CreateImageModelDallE2 = "dall-e-2"
34+
CreateImageModelDallE3 = "dall-e-3"
35+
CreateImageModelGptImage1 = "gpt-image-1"
2936
)
3037

3138
const (
3239
CreateImageQualityHD = "hd"
3340
CreateImageQualityStandard = "standard"
41+
42+
// gpt-image-1 only.
43+
CreateImageQualityHigh = "high"
44+
CreateImageQualityMedium = "medium"
45+
CreateImageQualityLow = "low"
3446
)
3547

3648
const (
49+
// dall-e-3 only.
3750
CreateImageStyleVivid = "vivid"
3851
CreateImageStyleNatural = "natural"
3952
)
4053

54+
const (
55+
// gpt-image-1 only.
56+
CreateImageBackgroundTransparent = "transparent"
57+
CreateImageBackgroundOpaque = "opaque"
58+
)
59+
60+
const (
61+
// gpt-image-1 only.
62+
CreateImageModerationLow = "low"
63+
)
64+
65+
const (
66+
// gpt-image-1 only.
67+
CreateImageOutputFormatPNG = "png"
68+
CreateImageOutputFormatJPEG = "jpeg"
69+
CreateImageOutputFormatWEBP = "webp"
70+
)
71+
4172
// ImageRequest represents the request structure for the image API.
4273
type ImageRequest struct {
43-
Prompt string `json:"prompt,omitempty"`
44-
Model string `json:"model,omitempty"`
45-
N int `json:"n,omitempty"`
46-
Quality string `json:"quality,omitempty"`
47-
Size string `json:"size,omitempty"`
48-
Style string `json:"style,omitempty"`
49-
ResponseFormat string `json:"response_format,omitempty"`
50-
User string `json:"user,omitempty"`
74+
Prompt string `json:"prompt,omitempty"`
75+
Model string `json:"model,omitempty"`
76+
N int `json:"n,omitempty"`
77+
Quality string `json:"quality,omitempty"`
78+
Size string `json:"size,omitempty"`
79+
Style string `json:"style,omitempty"`
80+
ResponseFormat string `json:"response_format,omitempty"`
81+
User string `json:"user,omitempty"`
82+
Background string `json:"background,omitempty"`
83+
Moderation string `json:"moderation,omitempty"`
84+
OutputCompression int `json:"output_compression,omitempty"`
85+
OutputFormat string `json:"output_format,omitempty"`
5186
}
5287

5388
// ImageResponse represents a response structure for image API.
5489
type ImageResponse struct {
5590
Created int64 `json:"created,omitempty"`
5691
Data []ImageResponseDataInner `json:"data,omitempty"`
92+
Usage ImageResponseUsage `json:"usage,omitempty"`
5793

5894
httpHeader
5995
}
6096

97+
// ImageResponseInputTokensDetails represents the token breakdown for input tokens.
98+
type ImageResponseInputTokensDetails struct {
99+
TextTokens int `json:"text_tokens,omitempty"`
100+
ImageTokens int `json:"image_tokens,omitempty"`
101+
}
102+
103+
// ImageResponseUsage represents the token usage information for image API.
104+
type ImageResponseUsage struct {
105+
TotalTokens int `json:"total_tokens,omitempty"`
106+
InputTokens int `json:"input_tokens,omitempty"`
107+
OutputTokens int `json:"output_tokens,omitempty"`
108+
InputTokensDetails ImageResponseInputTokensDetails `json:"input_tokens_details,omitempty"`
109+
}
110+
61111
// ImageResponseDataInner represents a response data structure for image API.
62112
type ImageResponseDataInner struct {
63113
URL string `json:"url,omitempty"`
@@ -91,6 +141,8 @@ type ImageEditRequest struct {
91141
N int `json:"n,omitempty"`
92142
Size string `json:"size,omitempty"`
93143
ResponseFormat string `json:"response_format,omitempty"`
144+
Quality string `json:"quality,omitempty"`
145+
User string `json:"user,omitempty"`
94146
}
95147

96148
// CreateEditImage - API call to create an image. This is the main endpoint of the DALL-E API.
@@ -159,6 +211,7 @@ type ImageVariRequest struct {
159211
N int `json:"n,omitempty"`
160212
Size string `json:"size,omitempty"`
161213
ResponseFormat string `json:"response_format,omitempty"`
214+
User string `json:"user,omitempty"`
162215
}
163216

164217
// CreateVariImage - API call to create an image variation. This is the main endpoint of the DALL-E API.

0 commit comments

Comments
 (0)