envoyproxy
diff --git a/‎internal/apischema/gcp/gcp.go‎
Lines changed: 44 additions & 7 deletions b/‎internal/apischema/gcp/gcp.go‎
Lines changed: 44 additions & 7 deletions
diff --git a/‎internal/apischema/openai/openai.go‎
Lines changed: 33 additions & 97 deletions b/‎internal/apischema/openai/openai.go‎
Lines changed: 33 additions & 97 deletions
diff --git a/‎internal/apischema/openai/union.go‎
Lines changed: 115 additions & 1 deletion b/‎internal/apischema/openai/union.go‎
Lines changed: 115 additions & 1 deletion
@@ -5,7 +5,11 @@
 
 package gcp
 
-import "google.golang.org/genai"
+import (
+	"google.golang.org/genai"
+
+	"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
+)
 
 type GenerateContentRequest struct {
 	// Contains the multipart content of a message.
@@ -37,14 +41,47 @@ type GenerateContentRequest struct {
 	SafetySettings []*genai.SafetySetting `json:"safetySettings,omitempty"`
 }
 
-type EmbedContentRequest struct {
-	// Content to be embedded. Only text content is supported for embeddings.
-	Content *genai.Content `json:"content"`
+// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#syntax
+type Instance struct {
+	// The text that you want to generate embeddings for.
+	Content string `json:"content"`
+
+	// Used to convey intended downstream application to help the model produce better embeddings. If left blank, the default used is RETRIEVAL_QUERY.
+	// For more information about task types, see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types
+	// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#task_type
+	TaskType openai.EmbeddingTaskType `json:"task_type,omitempty"`
+
+	// Used to help the model produce better embeddings. Only valid with task_type=RETRIEVAL_DOCUMENT.
+	Title string `json:"title,omitempty"`
+}
+
+// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#parameter-list
+type Parameters struct {
+	// When set to true, input text will be truncated. When set to false, an error is returned if the input text is longer than the maximum length supported by the model. Defaults to true.
+	AutoTruncate bool `json:"auto_truncate,omitempty"`
+
+	// Used to specify output embedding size. If set, output embeddings will be truncated to the size specified.
+	OutputDimensionality int `json:"out_dimensionality,omitempty"`
+}
+
+// https://github.com/googleapis/python-aiplatform/blob/30e41d01f3fd0ef08da6ad6eb7f83df34476105e/google/cloud/aiplatform_v1/types/prediction_service.py#L63
+type PredictRequest struct {
+	// A list of instances
+	//
+	Instances []*Instance `json:"instances"`
 
 	// Optional configuration for the embedding request.
 	// Uses the official genai library configuration structure.
-	Config *genai.EmbedContentConfig `json:"config,omitempty"`
+	Parameters Parameters `json:"parameters,omitempty"`
 }
 
-// Note: We now use genai.EmbedContentResponse directly instead of defining our own.
-// This provides better compatibility and includes metadata like token usage.
+// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#response_body
+type Prediction struct {
+	// The result generated from input text.
+	Embeddings genai.ContentEmbedding `json:"embeddings"`
+}
+
+// https://github.com/googleapis/python-aiplatform/blob/30e41d01f3fd0ef08da6ad6eb7f83df34476105e/google/cloud/aiplatform_v1/types/prediction_service.py#L117
+type PredictResponse struct {
+	Predictions []*Prediction `json:"predictions"`
+}
@@ -310,6 +310,13 @@ func (c ContentUnion) MarshalJSON() ([]byte, error) {
 	return json.Marshal(c.Value)
 }
 
+// EmbeddingInputItem represents a single embedding input with optional metadata
+type EmbeddingInputItem struct {
+	Content  string            `json:"content"`             // The actual text content
+	TaskType EmbeddingTaskType `json:"task_type,omitempty"` // Optional task type
+	Title    string            `json:"title,omitempty"`     // Optional title
+}
+
 // EmbeddingRequestInput is the EmbeddingRequest.Input type.
 type EmbeddingRequestInput struct {
 	Value any
@@ -1498,8 +1505,8 @@ type Model struct {
 	OwnedBy string `json:"owned_by"`
 }
 
-// EmbeddingCompletionRequest represents a request structure for embeddings API.
-type EmbeddingCompletionRequest struct {
+// EmbeddingRequest represents a request structure for embeddings API.
+type EmbeddingRequest struct {
 	// Input: Input text to embed, encoded as a string or array of tokens.
 	// To embed multiple inputs in a single request, pass an array of strings or array of token arrays.
 	// The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002),
@@ -1524,101 +1531,33 @@ type EmbeddingCompletionRequest struct {
 	// Docs: https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-user
 	User *string `json:"user,omitempty"`
 
-	// GCPVertexAIEmbeddingVendorFields configures the GCP VertexAI specific fields during schema translation.
+	// GCPVertexAIEmbeddingVendorFields configures the GCP VertexAI specific fields for embedding during schema translation.
 	*GCPVertexAIEmbeddingVendorFields `json:",inline,omitempty"`
 }
 
-// GetModel implements ModelName interface
-func (e *EmbeddingCompletionRequest) GetModel() string {
-	return e.Model
-}
-
-// EmbeddingChatRequest represents a request structure for embeddings API. This is not a standard openai, but just extend the request to have messages/chat like completion requests
-type EmbeddingChatRequest struct {
-	// Messages: A list of messages comprising the conversation so far.
-	// Depending on the model you use, different message types (modalities) are supported,
-	// like text, images, and audio.
-	Messages []ChatCompletionMessageParamUnion `json:"messages"`
-
-	// Model: ID of the model to use.
-	// Docs: https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-model
-	Model string `json:"model"`
-
-	// EncodingFormat: The format to return the embeddings in. Can be either float or base64.
-	// Docs: https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-encoding_format
-	EncodingFormat *string `json:"encoding_format,omitempty"` //nolint:tagliatelle //follow openai api
-
-	// Dimensions: The number of dimensions the resulting output embeddings should have.
-	// Only supported in text-embedding-3 and later models.
-	// Docs: https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-dimensions
-	Dimensions *int `json:"dimensions,omitempty"`
-
-	// User: A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
-	// Docs: https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-user
-	User *string `json:"user,omitempty"`
-
-	// GCPVertexAIEmbeddingVendorFields configures the GCP VertexAI specific fields during schema translation.
-	*GCPVertexAIEmbeddingVendorFields `json:",inline,omitempty"`
-}
+type EmbeddingTaskType string
 
-// GetModel implements ModelProvider interface
-func (e *EmbeddingChatRequest) GetModel() string {
-	return e.Model
-}
-
-// EmbeddingRequest is a union type that can handle both EmbeddingCompletionRequest and EmbeddingChatRequest.
-type EmbeddingRequest struct {
-	OfCompletion *EmbeddingCompletionRequest `json:",omitzero,inline"`
-	OfChat       *EmbeddingChatRequest       `json:",omitzero,inline"`
-}
-
-// UnmarshalJSON implements json.Unmarshaler to handle both EmbeddingCompletionRequest and EmbeddingChatRequest.
-func (e *EmbeddingRequest) UnmarshalJSON(data []byte) error {
-	// Check for Messages field to distinguish EmbeddingChatRequest
-	messagesResult := gjson.GetBytes(data, "messages")
-	if messagesResult.Exists() {
-		var chatReq EmbeddingChatRequest
-		if err := json.Unmarshal(data, &chatReq); err != nil {
-			return err
-		}
-		e.OfChat = &chatReq
-		return nil
-	}
-
-	// Check for Input field to distinguish EmbeddingCompletionRequest
-	inputResult := gjson.GetBytes(data, "input")
-	if inputResult.Exists() {
-		var completionReq EmbeddingCompletionRequest
-		if err := json.Unmarshal(data, &completionReq); err != nil {
-			return err
-		}
-		e.OfCompletion = &completionReq
-		return nil
-	}
+const (
+	EmbeddingTaskTypeRetrievalQuery     EmbeddingTaskType = "RETRIEVAL_QUERY"
+	EmbeddingTaskTypeRetrievalDocument  EmbeddingTaskType = "RETRIEVAL_DOCUMENT"
+	EmbeddingTaskTypeSemanticSimilarity EmbeddingTaskType = "SEMANTIC_SIMILARITY"
+	EmbeddingTaskTypeClassification     EmbeddingTaskType = "CLASSIFICATION"
+	EmbeddingTaskTypeClustering         EmbeddingTaskType = "CLUSTERING"
+	EmbeddingTaskTypeQuestionAnswering  EmbeddingTaskType = "QUESTION_ANSWERING"
+	EmbeddingTaskTypeFactVerification   EmbeddingTaskType = "FACT_VERIFICATION"
+	EmbeddingTaskTypeCodeRetrievalQuery EmbeddingTaskType = "CODE_RETRIEVAL_QUERY"
+)
 
-	return errors.New("embedding request must have either 'input' field (EmbeddingCompletionRequest) or 'messages' field (EmbeddingChatRequest)")
-}
+// GCPVertexAIEmbeddingVendorFields contains GCP Vertex AI (Gemini) vendor-specific fields for embeddings.
+type GCPVertexAIEmbeddingVendorFields struct {
+	// When set to true, input text will be truncated. When set to false, an error is returned if the input text is longer than the maximum length supported by the model. Defaults to true.
+	// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#parameter-list
 
-// MarshalJSON implements json.Marshaler.
-func (e EmbeddingRequest) MarshalJSON() ([]byte, error) {
-	if e.OfCompletion != nil {
-		return json.Marshal(e.OfCompletion)
-	}
-	if e.OfChat != nil {
-		return json.Marshal(e.OfChat)
-	}
-	return nil, errors.New("no embedding request to marshal")
-}
+	AutoTruncate bool `json:"auto_truncate,omitempty"`
 
-// GetModelFromEmbeddingRequest extracts the model name from any EmbeddingRequest type
-func GetModelFromEmbeddingRequest(req *EmbeddingRequest) string {
-	if req.OfCompletion != nil {
-		return req.OfCompletion.GetModel()
-	}
-	if req.OfChat != nil {
-		return req.OfChat.GetModel()
-	}
-	return ""
+	// This is global task_type set, which is convenient for users. If left blank, the default used is RETRIEVAL_QUERY.
+	// For more information about task types, see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types
+	TaskType EmbeddingTaskType `json:"task_type,omitempty"`
 }
 
 // EmbeddingResponse represents a response from /v1/embeddings.
@@ -1653,6 +1592,10 @@ type Embedding struct {
 
 	// Index: The index of the embedding in the list of embeddings.
 	Index int `json:"index"`
+
+	// If the input text was truncated due to having a length longer than the allowed maximum input.
+	// https://github.com/googleapis/go-genai/blob/cb486e101dc66794d52125dd22ff43ff4c0e76a6/types.go#L2807
+	Truncated bool `json:"truncated,omitempty"`
 }
 
 // EmbeddingUnion is a union type that can handle both []float64 and string formats.
@@ -1694,13 +1637,6 @@ type EmbeddingUsage struct {
 	TotalTokens int `json:"total_tokens"` //nolint:tagliatelle //follow openai api
 }
 
-// GCPVertexAIEmbeddingVendorFields contains GCP Vertex AI (Gemini) vendor-specific fields for embedding requests.
-type GCPVertexAIEmbeddingVendorFields struct {
-	// Type of task for which the embedding will be used.
-	// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types#supported_task_types
-	TaskType string `json:"task_type,omitempty"`
-}
-
 // JSONUNIXTime is a helper type to marshal/unmarshal time.Time UNIX timestamps.
 type JSONUNIXTime time.Time
 
 
@@ -23,6 +23,19 @@ func unmarshalJSONNestedUnion(typ string, data []byte) (interface{}, error) {
 	case '"':
 		return unquoteOrUnmarshalJSONString(typ, data)
 
+	case '{':
+		// Single object with content/task_type/title
+		var item EmbeddingInputItem
+		err = json.Unmarshal(data, &item)
+		if err != nil {
+			return nil, fmt.Errorf("cannot unmarshal %s as EmbeddingInputItem: %w", typ, err)
+		}
+		// Validate that the content field is not empty
+		if item.Content == "" {
+			return nil, fmt.Errorf("invalid %s type (must be string, object, or array)", typ)
+		}
+		return item, nil
+
 	case '[':
 		// Array: skip to first element
 		idx++
@@ -38,13 +51,31 @@ func unmarshalJSONNestedUnion(typ string, data []byte) (interface{}, error) {
 		// Determine element type
 		switch data[idx] {
 		case '"':
+			// Check if this is a mixed array (strings and objects)
+			if isMixedArray(data) {
+				return unmarshalMixedArray(typ, data)
+			}
 			// []string
 			var strs []string
 			if err := json.Unmarshal(data, &strs); err != nil {
 				return nil, fmt.Errorf("cannot unmarshal %s as []string: %w", typ, err)
 			}
 			return strs, nil
 
+		case '{':
+			// []EmbeddingInputItem
+			var items []EmbeddingInputItem
+			if err := json.Unmarshal(data, &items); err != nil {
+				return nil, fmt.Errorf("cannot unmarshal %s as []EmbeddingInputItem: %w", typ, err)
+			}
+			// Validate that all items have non-empty content
+			for _, item := range items {
+				if item.Content == "" {
+					return nil, fmt.Errorf("invalid %s array element", typ)
+				}
+			}
+			return items, nil
+
 		case '[':
 			// [][]int64
 			var intArrays [][]int64
@@ -60,7 +91,7 @@ func unmarshalJSONNestedUnion(typ string, data []byte) (interface{}, error) {
 		}
 
 	default:
-		return nil, fmt.Errorf("invalid %s type (must be string or array)", typ)
+		return nil, fmt.Errorf("invalid %s type (must be string, object, or array)", typ)
 	}
 }
 
@@ -101,3 +132,86 @@ func unquoteOrUnmarshalJSONString(typ string, data []byte) (string, error) {
 	}
 	return str, nil
 }
+
+// isMixedArray checks if the array contains both strings and objects
+func isMixedArray(data []byte) bool {
+	var arr []json.RawMessage
+	if err := json.Unmarshal(data, &arr); err != nil {
+		return false
+	}
+
+	hasString := false
+	hasObject := false
+
+	for _, item := range arr {
+		trimmed := item
+		// Skip leading whitespace
+		idx := 0
+		for idx < len(trimmed) && (trimmed[idx] == ' ' || trimmed[idx] == '\t' || trimmed[idx] == '\n' || trimmed[idx] == '\r') {
+			idx++
+		}
+		if idx >= len(trimmed) {
+			continue
+		}
+
+		switch trimmed[idx] {
+		case '"':
+			hasString = true
+		case '{':
+			hasObject = true
+		}
+
+		// If we have both types, it's a mixed array
+		if hasString && hasObject {
+			return true
+		}
+	}
+
+	return false
+}
+
+// unmarshalMixedArray handles arrays with both strings and EmbeddingInputItem objects
+func unmarshalMixedArray(typ string, data []byte) (interface{}, error) {
+	var arr []json.RawMessage
+	if err := json.Unmarshal(data, &arr); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal %s as mixed array: %w", typ, err)
+	}
+
+	result := make([]interface{}, len(arr))
+
+	for i, item := range arr {
+		// Skip leading whitespace
+		idx := 0
+		for idx < len(item) && (item[idx] == ' ' || item[idx] == '\t' || item[idx] == '\n' || item[idx] == '\r') {
+			idx++
+		}
+		if idx >= len(item) {
+			return nil, fmt.Errorf("empty element in mixed %s array", typ)
+		}
+
+		switch item[idx] {
+		case '"':
+			// String element
+			var str string
+			if err := json.Unmarshal(item, &str); err != nil {
+				return nil, fmt.Errorf("cannot unmarshal string element in mixed %s array: %w", typ, err)
+			}
+			result[i] = str
+		case '{':
+			// Object element
+			var embeddingItem EmbeddingInputItem
+			if err := json.Unmarshal(item, &embeddingItem); err != nil {
+				return nil, fmt.Errorf("cannot unmarshal object element in mixed %s array: %w", typ, err)
+			}
+			// Validate that the content field is not empty
+			if embeddingItem.Content == "" {
+				return nil, fmt.Errorf("invalid element type in mixed %s array", typ)
+			}
+			result[i] = embeddingItem
+		default:
+			return nil, fmt.Errorf("invalid element type in mixed %s array", typ)
+		}
+	}
+
+	return result, nil
+}