envoyproxy
diff --git a/‎.github/workflows/build_and_test.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/build_and_test.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎api/v1alpha1/shared_types.go‎
Lines changed: 5 additions & 1 deletion b/‎api/v1alpha1/shared_types.go‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎cmd/extproc/mainlib/main.go‎
Lines changed: 2 additions & 0 deletions b/‎cmd/extproc/mainlib/main.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/basic/README.md‎
Lines changed: 1 addition & 0 deletions b/‎examples/basic/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/basic/cohere.yaml‎
Lines changed: 87 additions & 0 deletions b/‎examples/basic/cohere.yaml‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎go.mod‎
Lines changed: 1 addition & 0 deletions b/‎go.mod‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎go.sum‎
Lines changed: 2 additions & 0 deletions b/‎go.sum‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/apischema/cohere/rerank_v2.go‎
Lines changed: 98 additions & 0 deletions b/‎internal/apischema/cohere/rerank_v2.go‎
Lines changed: 98 additions & 0 deletions
@@ -160,6 +160,7 @@ jobs:
           TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }}
           TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
           TEST_GEMINI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GEMINI_API_KEY }}
+          TEST_COHERE_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_COHERE_API_KEY }}
           TEST_GROQ_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GROQ_API_KEY }}
           TEST_GROK_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GROK_API_KEY }}
           TEST_SAMBANOVA_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_SAMBANOVA_API_KEY }}
@@ -207,6 +208,7 @@ jobs:
           TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
           TEST_ANTHROPIC_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_ANTHROPIC_API_KEY }}
           TEST_GEMINI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GEMINI_API_KEY }}
+          TEST_COHERE_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_COHERE_API_KEY }}
         run: make test-e2e
 
   test_e2e_upgrade:
 
@@ -15,7 +15,7 @@ package v1alpha1
 type VersionedAPISchema struct {
 	// Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend.
 	//
-	// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
+	// +kubebuilder:validation:Enum=OpenAI;Cohere;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
 	Name APISchema `json:"name"`
 
 	// Version is the version of the API schema.
@@ -43,6 +43,10 @@ const (
 	//
 	// https://github.com/openai/openai-openapi
 	APISchemaOpenAI APISchema = "OpenAI"
+	// APISchemaCohere is the Cohere schema.
+	//
+	// https://docs.cohere.com/v2
+	APISchemaCohere APISchema = "Cohere"
 	// APISchemaAWSBedrock is the AWS Bedrock schema.
 	//
 	// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
 
@@ -235,6 +235,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 	completionMetrics := metrics.NewCompletionFactory(meter, metricsRequestHeaderAttributes)
 	embeddingsMetrics := metrics.NewEmbeddingsFactory(meter, metricsRequestHeaderAttributes)
 	imageGenerationMetrics := metrics.NewImageGenerationFactory(meter, metricsRequestHeaderAttributes)()
+	rerankMetrics := metrics.NewRerankFactory(meter, metricsRequestHeaderAttributes)
 	mcpMetrics := metrics.NewMCP(meter, metricsRequestHeaderAttributes)
 
 	tracing, err := tracing.NewTracingFromEnv(ctx, os.Stdout, spanRequestHeaderAttributes)
@@ -250,6 +251,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 	server.Register(path.Join(flags.rootPrefix, "/v1/completions"), extproc.CompletionsProcessorFactory(completionMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/embeddings"), extproc.EmbeddingsProcessorFactory(embeddingsMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/images/generations"), extproc.ImageGenerationProcessorFactory(imageGenerationMetrics))
+	server.Register(path.Join(flags.rootPrefix, "/cohere/v2/rerank"), extproc.RerankProcessorFactory(rerankMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/models"), extproc.NewModelsProcessor)
 	server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(messagesMetrics))
 
 
@@ -11,6 +11,7 @@ traffic for various AI providers.
 - `azure_openai.yaml` - Azure OpenAI integration
 - `gcp_vertex.yaml` - GCP Vertex AI integration
 - `tars.yaml` - TARS integration
+- `cohere.yaml` - Cohere integration
 
 For AWS Bedrock, we recommend using either `aws-pod-identity.yaml` (EKS 1.24+) or
 `aws-irsa.yaml` (all EKS versions) for production deployments instead of static credentials. [Docs](https://docs.aws.amazon.com/eks/latest/best-practices/identity-and-access-management.html#_identities_and_credentials_for_eks_pods)
@@ -0,0 +1,87 @@
+# Copyright Envoy AI Gateway Authors
+# SPDX-License-Identifier: Apache-2.0
+# The full text of the Apache license is available in the LICENSE file at
+# the root of the repo.
+
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: envoy-ai-gateway-basic-cohere
+  namespace: default
+spec:
+  parentRefs:
+    - name: envoy-ai-gateway-basic
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: rerank-english-v3.0
+      backendRefs:
+        - name: envoy-ai-gateway-basic-cohere
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIServiceBackend
+metadata:
+  name: envoy-ai-gateway-basic-cohere
+  namespace: default
+spec:
+  schema:
+    name: Cohere
+    version: v2
+  backendRef:
+    name: envoy-ai-gateway-basic-cohere
+    kind: Backend
+    group: gateway.envoyproxy.io
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: BackendSecurityPolicy
+metadata:
+  name: envoy-ai-gateway-basic-cohere-apikey
+  namespace: default
+spec:
+  targetRefs:
+    - group: aigateway.envoyproxy.io
+      kind: AIServiceBackend
+      name: envoy-ai-gateway-basic-cohere
+  type: APIKey
+  apiKey:
+    secretRef:
+      name: envoy-ai-gateway-basic-cohere-apikey
+      namespace: default
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: Backend
+metadata:
+  name: envoy-ai-gateway-basic-cohere
+  namespace: default
+spec:
+  endpoints:
+    - fqdn:
+        hostname: api.cohere.com
+        port: 443
+---
+apiVersion: gateway.networking.k8s.io/v1alpha3
+kind: BackendTLSPolicy
+metadata:
+  name: envoy-ai-gateway-basic-cohere-tls
+  namespace: default
+spec:
+  targetRefs:
+    - group: "gateway.envoyproxy.io"
+      kind: Backend
+      name: envoy-ai-gateway-basic-cohere
+  validation:
+    wellKnownCACertificates: "System"
+    hostname: api.cohere.com
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: envoy-ai-gateway-basic-cohere-apikey
+  namespace: default
+type: Opaque
+stringData:
+  apiKey: COHERE_API_KEY # Replace with your Cohere API key.
@@ -16,6 +16,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/sts v1.38.9
 	github.com/cenkalti/backoff/v4 v4.3.0
 	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443
+	github.com/cohere-ai/cohere-go/v2 v2.15.3
 	github.com/coreos/go-oidc/v3 v3.16.0
 	github.com/docker/docker v28.5.1+incompatible
 	github.com/envoyproxy/gateway v1.6.0-rc.0.0.20251028174200-282c916a47e1
 
@@ -93,6 +93,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
 github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cohere-ai/cohere-go/v2 v2.15.3 h1:d6m4mspLmviA5OcJzY4wRmugQhcWP1iOPjSkgyZImhs=
+github.com/cohere-ai/cohere-go/v2 v2.15.3/go.mod h1:MuiJkCxlR18BDV2qQPbz2Yb/OCVphT1y6nD2zYaKeR0=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
 github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
 github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
 
@@ -0,0 +1,98 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+// Package cohere contains Cohere API schema definitions.
+package cohere
+
+// RerankV2Request represents the request body for Cohere Rerank API v2.
+// Docs: https://docs.cohere.com/reference/rerank
+type RerankV2Request struct {
+	// Model identifier to use, e.g. "rerank-v3.5".
+	Model string `json:"model"`
+	// Query to rank documents against.
+	Query string `json:"query"`
+	// Documents to be compared with the query. For best performance, keep under 1000.
+	// Long documents may be truncated server-side by max_tokens_per_doc.
+	Documents []string `json:"documents"`
+	// Optional: limit returned results to top_n.
+	TopN *int `json:"top_n,omitempty"`
+	// Optional: truncate long documents to this many tokens. Default: 4096.
+	MaxTokensPerDoc *int `json:"max_tokens_per_doc,omitempty"`
+}
+
+// RerankV2Response represents the response from Cohere Rerank API v2.
+// Docs: https://docs.cohere.com/reference/rerank
+type RerankV2Response struct {
+	// Ordered list of ranked documents with scores.
+	Results []*RerankV2Result `json:"results"`
+	// Unique request ID.
+	ID *string `json:"id,omitempty"`
+	// Additional metadata including API version and billing.
+	Meta *RerankV2Meta `json:"meta,omitempty"`
+}
+
+// RerankV2Result is a single ranked item in the response.
+type RerankV2Result struct {
+	// Index is the position of the matched item in the input documents slice.
+	Index int `json:"index"`
+	// RelevanceScore is the model-assigned score indicating how well the
+	// document matches the query (higher means more relevant).
+	RelevanceScore float64 `json:"relevance_score"`
+}
+
+// RerankV2Meta contains metadata returned by the API.
+type RerankV2Meta struct {
+	// APIVersion contains the version information for the API that processed the request.
+	APIVersion *RerankV2APIVersion `json:"api_version,omitempty"`
+	// BilledUnits reports the billed resource usage for this request.
+	BilledUnits *RerankV2BilledUnits `json:"billed_units,omitempty"`
+	// Tokens provides the token usage breakdown for the request/response.
+	Tokens *RerankV2Tokens `json:"tokens,omitempty"`
+	// CachedTokens is the number of prompt tokens that hit the inference cache.
+	CachedTokens *float64 `json:"cached_tokens,omitempty"`
+	// Warnings contains any non-fatal warnings generated while processing the request.
+	Warnings []string `json:"warnings,omitempty"`
+}
+
+// RerankV2APIVersion describes the API version details in the response meta.
+type RerankV2APIVersion struct {
+	// Version is the API version string (e.g., "2").
+	Version string `json:"version"`
+	// IsDeprecated indicates whether this API version is deprecated (nullable).
+	IsDeprecated *bool `json:"is_deprecated,omitempty"`
+	// IsExperimental indicates whether this API version is experimental (nullable).
+	IsExperimental *bool `json:"is_experimental,omitempty"`
+}
+
+// RerankV2BilledUnits contains usage metrics related to the request.
+type RerankV2BilledUnits struct {
+	// Images is the number of billed images (nullable).
+	Images *float64 `json:"images,omitempty"`
+	// InputTokens is the number of billed input tokens (nullable).
+	InputTokens *float64 `json:"input_tokens,omitempty"`
+	// OutputTokens is the number of billed output tokens (nullable).
+	OutputTokens *float64 `json:"output_tokens,omitempty"`
+	// SearchUnits is the number of billed search units (nullable).
+	SearchUnits *float64 `json:"search_units,omitempty"`
+	// Classifications is the number of billed classification units (nullable).
+	Classifications *float64 `json:"classifications,omitempty"`
+}
+
+// RerankV2Tokens captures token accounting for the request.
+// Docs: https://docs.cohere.com/reference/rerank#response.body.meta.tokens
+type RerankV2Tokens struct {
+	// InputTokens is the number of tokens used as input to the model (nullable).
+	InputTokens *float64 `json:"input_tokens,omitempty"`
+	// OutputTokens is the number of tokens produced by the model (nullable).
+	OutputTokens *float64 `json:"output_tokens,omitempty"`
+}
+
+// RerankV2Error describes a Cohere v2 error.
+type RerankV2Error struct {
+	// ID is a unique identifier for the error (nullable).
+	ID *string `json:"id,omitempty"`
+	// Message is a human-readable description of the error (nullable).
+	Message *string `json:"message,omitempty"`
+}