Skip to content

Commit e22f59e

Browse files
committed
feat(gemini): update request structures for Veo predictLongRunning
- Refactored the request URL and body construction methods to align with the Veo predictLongRunning endpoint. - Introduced new data structures for Veo instances and parameters, replacing the previous Gemini video generation configurations. - Updated the Vertex adaptor to utilize the new Veo request payload format.
1 parent 8103b4b commit e22f59e

File tree

3 files changed

+51
-67
lines changed

3 files changed

+51
-67
lines changed

relay/channel/task/gemini/adaptor.go

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycom
4444
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
4545
}
4646

47-
// BuildRequestURL constructs the Gemini API generateVideos endpoint.
47+
// BuildRequestURL constructs the Gemini API predictLongRunning endpoint for Veo.
4848
func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
4949
modelName := info.UpstreamModelName
5050
version := model_setting.GetGeminiVersionSetting(modelName)
5151

5252
return fmt.Sprintf(
53-
"%s/%s/models/%s:generateVideos",
53+
"%s/%s/models/%s:predictLongRunning",
5454
a.baseURL,
5555
version,
5656
modelName,
@@ -65,7 +65,7 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
6565
return nil
6666
}
6767

68-
// BuildRequestBody converts request into the Gemini API generateVideos format.
68+
// BuildRequestBody converts request into the Veo predictLongRunning format.
6969
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
7070
v, ok := c.Get("task_request")
7171
if !ok {
@@ -76,34 +76,36 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
7676
return nil, fmt.Errorf("unexpected task_request type")
7777
}
7878

79-
body := GeminiVideoPayload{
80-
Prompt: req.Prompt,
81-
Config: &GeminiVideoGenerationConfig{},
82-
}
83-
79+
instance := VeoInstance{Prompt: req.Prompt}
8480
if img := ExtractMultipartImage(c, info); img != nil {
85-
body.Image = img
81+
instance.Image = img
8682
} else if len(req.Images) > 0 {
8783
if parsed := ParseImageInput(req.Images[0]); parsed != nil {
88-
body.Image = parsed
84+
instance.Image = parsed
8985
info.Action = constant.TaskActionGenerate
9086
}
9187
}
9288

93-
if err := taskcommon.UnmarshalMetadata(req.Metadata, body.Config); err != nil {
89+
params := &VeoParameters{}
90+
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
9491
return nil, errors.Wrap(err, "unmarshal metadata failed")
9592
}
96-
if body.Config.DurationSeconds == 0 && req.Duration > 0 {
97-
body.Config.DurationSeconds = req.Duration
93+
if params.DurationSeconds == 0 && req.Duration > 0 {
94+
params.DurationSeconds = req.Duration
9895
}
99-
if body.Config.Resolution == "" && req.Size != "" {
100-
body.Config.Resolution = SizeToVeoResolution(req.Size)
96+
if params.Resolution == "" && req.Size != "" {
97+
params.Resolution = SizeToVeoResolution(req.Size)
10198
}
102-
if body.Config.AspectRatio == "" && req.Size != "" {
103-
body.Config.AspectRatio = SizeToVeoAspectRatio(req.Size)
99+
if params.AspectRatio == "" && req.Size != "" {
100+
params.AspectRatio = SizeToVeoAspectRatio(req.Size)
101+
}
102+
params.Resolution = strings.ToLower(params.Resolution)
103+
params.SampleCount = 1
104+
105+
body := VeoRequestPayload{
106+
Instances: []VeoInstance{instance},
107+
Parameters: params,
104108
}
105-
body.Config.Resolution = strings.ToLower(body.Config.Resolution)
106-
body.Config.NumberOfVideos = 1
107109

108110
data, err := common.Marshal(body)
109111
if err != nil {

relay/channel/task/gemini/dto.go

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,42 @@
11
package gemini
22

3-
// GeminiVideoGenerationConfig represents the Gemini API GenerateVideosConfig.
4-
// Reference: https://ai.google.dev/gemini-api/docs/video
5-
type GeminiVideoGenerationConfig struct {
6-
AspectRatio string `json:"aspectRatio,omitempty"`
7-
DurationSeconds int `json:"durationSeconds,omitempty"`
8-
NegativePrompt string `json:"negativePrompt,omitempty"`
9-
PersonGeneration string `json:"personGeneration,omitempty"`
10-
Resolution string `json:"resolution,omitempty"`
11-
NumberOfVideos int `json:"numberOfVideos,omitempty"`
12-
}
13-
143
// VeoImageInput represents an image input for Veo image-to-video.
154
// Used by both Gemini and Vertex adaptors.
165
type VeoImageInput struct {
176
BytesBase64Encoded string `json:"bytesBase64Encoded"`
187
MimeType string `json:"mimeType"`
198
}
209

21-
// GeminiVideoPayload is the top-level request body for the Gemini API
22-
// models/{model}:generateVideos endpoint.
23-
type GeminiVideoPayload struct {
24-
Model string `json:"model,omitempty"`
25-
Prompt string `json:"prompt"`
26-
Image *VeoImageInput `json:"image,omitempty"`
27-
Config *GeminiVideoGenerationConfig `json:"config,omitempty"`
10+
// VeoInstance represents a single instance in the Veo predictLongRunning request.
11+
type VeoInstance struct {
12+
Prompt string `json:"prompt"`
13+
Image *VeoImageInput `json:"image,omitempty"`
2814
// TODO: support referenceImages (style/asset references, up to 3 images)
2915
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
3016
}
3117

18+
// VeoParameters represents the parameters block for Veo predictLongRunning.
19+
type VeoParameters struct {
20+
SampleCount int `json:"sampleCount"`
21+
DurationSeconds int `json:"durationSeconds,omitempty"`
22+
AspectRatio string `json:"aspectRatio,omitempty"`
23+
Resolution string `json:"resolution,omitempty"`
24+
NegativePrompt string `json:"negativePrompt,omitempty"`
25+
PersonGeneration string `json:"personGeneration,omitempty"`
26+
StorageUri string `json:"storageUri,omitempty"`
27+
CompressionQuality string `json:"compressionQuality,omitempty"`
28+
ResizeMode string `json:"resizeMode,omitempty"`
29+
Seed *int `json:"seed,omitempty"`
30+
GenerateAudio *bool `json:"generateAudio,omitempty"`
31+
}
32+
33+
// VeoRequestPayload is the top-level request body for the Veo
34+
// predictLongRunning endpoint (used by both Gemini and Vertex).
35+
type VeoRequestPayload struct {
36+
Instances []VeoInstance `json:"instances"`
37+
Parameters *VeoParameters `json:"parameters,omitempty"`
38+
}
39+
3240
type submitResponse struct {
3341
Name string `json:"name"`
3442
}

relay/channel/task/vertex/adaptor.go

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,32 +27,6 @@ import (
2727
// Request / Response structures
2828
// ============================
2929

30-
type veoInstance struct {
31-
Prompt string `json:"prompt"`
32-
Image *geminitask.VeoImageInput `json:"image,omitempty"`
33-
// TODO: support referenceImages (style/asset references, up to 3 images)
34-
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
35-
}
36-
37-
type veoParameters struct {
38-
SampleCount int `json:"sampleCount"`
39-
DurationSeconds int `json:"durationSeconds,omitempty"`
40-
AspectRatio string `json:"aspectRatio,omitempty"`
41-
Resolution string `json:"resolution,omitempty"`
42-
NegativePrompt string `json:"negativePrompt,omitempty"`
43-
PersonGeneration string `json:"personGeneration,omitempty"`
44-
StorageUri string `json:"storageUri,omitempty"`
45-
CompressionQuality string `json:"compressionQuality,omitempty"`
46-
ResizeMode string `json:"resizeMode,omitempty"`
47-
Seed *int `json:"seed,omitempty"`
48-
GenerateAudio *bool `json:"generateAudio,omitempty"`
49-
}
50-
51-
type requestPayload struct {
52-
Instances []veoInstance `json:"instances"`
53-
Parameters *veoParameters `json:"parameters,omitempty"`
54-
}
55-
5630
type fetchOperationPayload struct {
5731
OperationName string `json:"operationName"`
5832
}
@@ -186,7 +160,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
186160
}
187161
req := v.(relaycommon.TaskSubmitReq)
188162

189-
instance := veoInstance{Prompt: req.Prompt}
163+
instance := geminitask.VeoInstance{Prompt: req.Prompt}
190164
if img := geminitask.ExtractMultipartImage(c, info); img != nil {
191165
instance.Image = img
192166
} else if len(req.Images) > 0 {
@@ -196,7 +170,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
196170
}
197171
}
198172

199-
params := &veoParameters{}
173+
params := &geminitask.VeoParameters{}
200174
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
201175
return nil, fmt.Errorf("unmarshal metadata failed: %w", err)
202176
}
@@ -212,8 +186,8 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
212186
params.Resolution = strings.ToLower(params.Resolution)
213187
params.SampleCount = 1
214188

215-
body := requestPayload{
216-
Instances: []veoInstance{instance},
189+
body := geminitask.VeoRequestPayload{
190+
Instances: []geminitask.VeoInstance{instance},
217191
Parameters: params,
218192
}
219193

0 commit comments

Comments
 (0)