feat: add support for image generation using gpt-image-1 (#971)

PChaparro · web-flow · commit 0116f2994de0 · 2025-05-13T12:51:08.000+01:00
* feat: add gpt-image-1 support

* feat: add example to generate image using gpt-image-1 model

* style: missing period in comments

* feat: add missing fields to example

* docs: add GPT Image 1 to README

* revert: keep `examples/images/main.go` unchanged

* docs: remove unnecessary newline from example in README file
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ This library provides unofficial Go clients for [OpenAI API](https://platform.op
 
 * ChatGPT 4o, o1
 * GPT-3, GPT-4
-* DALL·E 2, DALL·E 3
+* DALL·E 2, DALL·E 3, GPT Image 1
 * Whisper
 
 ## Installation
@@ -357,6 +357,66 @@ func main() {
 ```
 </details>
 
+<details>
+<summary>GPT Image 1 image generation</summary>
+
+```go
+package main
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"os"
+
+	openai "github.com/sashabaranov/go-openai"
+)
+
+func main() {
+	c := openai.NewClient("your token")
+	ctx := context.Background()
+
+	req := openai.ImageRequest{
+		Prompt:            "Parrot on a skateboard performing a trick. Large bold text \"SKATE MASTER\" banner at the bottom of the image. Cartoon style, natural light, high detail, 1:1 aspect ratio.",
+		Background:        openai.CreateImageBackgroundOpaque,
+		Model:             openai.CreateImageModelGptImage1,
+		Size:              openai.CreateImageSize1024x1024,
+		N:                 1,
+		Quality:           openai.CreateImageQualityLow,
+		OutputCompression: 100,
+		OutputFormat:      openai.CreateImageOutputFormatJPEG,
+		// Moderation: 		 openai.CreateImageModerationLow,
+		// User: 					 "",
+	}
+
+	resp, err := c.CreateImage(ctx, req)
+	if err != nil {
+		fmt.Printf("Image creation Image generation with GPT Image 1error: %v\n", err)
+		return
+	}
+
+	fmt.Println("Image Base64:", resp.Data[0].B64JSON)
+
+	// Decode the base64 data
+	imgBytes, err := base64.StdEncoding.DecodeString(resp.Data[0].B64JSON)
+	if err != nil {
+		fmt.Printf("Base64 decode error: %v\n", err)
+		return
+	}
+
+	// Write image to file
+	outputPath := "generated_image.jpg"
+	err = os.WriteFile(outputPath, imgBytes, 0644)
+	if err != nil {
+		fmt.Printf("Failed to write image file: %v\n", err)
+		return
+	}
+
+	fmt.Printf("The image was saved as %s\n", outputPath)
+}
+```
+</details>
+
 <details>
 <summary>Configuring proxy</summary>
 
diff --git a/examples/images/main.go b/examples/images/main.go
@@ -25,4 +25,4 @@ func main() {
 		return
 	}
 	fmt.Println(respUrl.Data[0].URL)
-}
+}
diff --git a/image.go b/image.go
@@ -13,51 +13,101 @@ const (
 	CreateImageSize256x256   = "256x256"
 	CreateImageSize512x512   = "512x512"
 	CreateImageSize1024x1024 = "1024x1024"
+
 	// dall-e-3 supported only.
 	CreateImageSize1792x1024 = "1792x1024"
 	CreateImageSize1024x1792 = "1024x1792"
+
+	// gpt-image-1 supported only.
+	CreateImageSize1536x1024 = "1536x1024" // Landscape
+	CreateImageSize1024x1536 = "1024x1536" // Portrait
 )
 
 const (
-	CreateImageResponseFormatURL     = "url"
+	// dall-e-2 and dall-e-3 only.
 	CreateImageResponseFormatB64JSON = "b64_json"
+	CreateImageResponseFormatURL     = "url"
 )
 
 const (
-	CreateImageModelDallE2 = "dall-e-2"
-	CreateImageModelDallE3 = "dall-e-3"
+	CreateImageModelDallE2    = "dall-e-2"
+	CreateImageModelDallE3    = "dall-e-3"
+	CreateImageModelGptImage1 = "gpt-image-1"
 )
 
 const (
 	CreateImageQualityHD       = "hd"
 	CreateImageQualityStandard = "standard"
+
+	// gpt-image-1 only.
+	CreateImageQualityHigh   = "high"
+	CreateImageQualityMedium = "medium"
+	CreateImageQualityLow    = "low"
 )
 
 const (
+	// dall-e-3 only.
 	CreateImageStyleVivid   = "vivid"
 	CreateImageStyleNatural = "natural"
 )
 
+const (
+	// gpt-image-1 only.
+	CreateImageBackgroundTransparent = "transparent"
+	CreateImageBackgroundOpaque      = "opaque"
+)
+
+const (
+	// gpt-image-1 only.
+	CreateImageModerationLow = "low"
+)
+
+const (
+	// gpt-image-1 only.
+	CreateImageOutputFormatPNG  = "png"
+	CreateImageOutputFormatJPEG = "jpeg"
+	CreateImageOutputFormatWEBP = "webp"
+)
+
 // ImageRequest represents the request structure for the image API.
 type ImageRequest struct {
-	Prompt         string `json:"prompt,omitempty"`
-	Model          string `json:"model,omitempty"`
-	N              int    `json:"n,omitempty"`
-	Quality        string `json:"quality,omitempty"`
-	Size           string `json:"size,omitempty"`
-	Style          string `json:"style,omitempty"`
-	ResponseFormat string `json:"response_format,omitempty"`
-	User           string `json:"user,omitempty"`
+	Prompt            string `json:"prompt,omitempty"`
+	Model             string `json:"model,omitempty"`
+	N                 int    `json:"n,omitempty"`
+	Quality           string `json:"quality,omitempty"`
+	Size              string `json:"size,omitempty"`
+	Style             string `json:"style,omitempty"`
+	ResponseFormat    string `json:"response_format,omitempty"`
+	User              string `json:"user,omitempty"`
+	Background        string `json:"background,omitempty"`
+	Moderation        string `json:"moderation,omitempty"`
+	OutputCompression int    `json:"output_compression,omitempty"`
+	OutputFormat      string `json:"output_format,omitempty"`
 }
 
 // ImageResponse represents a response structure for image API.
 type ImageResponse struct {
 	Created int64                    `json:"created,omitempty"`
 	Data    []ImageResponseDataInner `json:"data,omitempty"`
+	Usage   ImageResponseUsage       `json:"usage,omitempty"`
 
 	httpHeader
 }
 
+// ImageResponseInputTokensDetails represents the token breakdown for input tokens.
+type ImageResponseInputTokensDetails struct {
+	TextTokens  int `json:"text_tokens,omitempty"`
+	ImageTokens int `json:"image_tokens,omitempty"`
+}
+
+// ImageResponseUsage represents the token usage information for image API.
+type ImageResponseUsage struct {
+	TotalTokens        int                             `json:"total_tokens,omitempty"`
+	InputTokens        int                             `json:"input_tokens,omitempty"`
+	OutputTokens       int                             `json:"output_tokens,omitempty"`
+	InputTokensDetails ImageResponseInputTokensDetails `json:"input_tokens_details,omitempty"`
+}
+
 // ImageResponseDataInner represents a response data structure for image API.
 type ImageResponseDataInner struct {
 	URL           string `json:"url,omitempty"`
@@ -91,6 +141,8 @@ type ImageEditRequest struct {
 	N              int      `json:"n,omitempty"`
 	Size           string   `json:"size,omitempty"`
 	ResponseFormat string   `json:"response_format,omitempty"`
+	Quality        string   `json:"quality,omitempty"`
+	User           string   `json:"user,omitempty"`
 }
 
 // CreateEditImage - API call to create an image. This is the main endpoint of the DALL-E API.
@@ -159,6 +211,7 @@ type ImageVariRequest struct {
 	N              int      `json:"n,omitempty"`
 	Size           string   `json:"size,omitempty"`
 	ResponseFormat string   `json:"response_format,omitempty"`
+	User           string   `json:"user,omitempty"`
 }
 
 // CreateVariImage - API call to create an image variation. This is the main endpoint of the DALL-E API.

Original file line number	Diff line number	Diff line change
`@@ -25,4 +25,4 @@ func main() {`
`25`	`25`	`return`
`26`	`26`	`}`
`27`	`27`	`fmt.Println(respUrl.Data[0].URL)`
`28`		`-}`
	`28`	`+}`