Skip to content

Commit c87e367

Browse files
feat: add passthrough cohere v2 rerank endpoint (#1433)
**Description** This PR adds complete support for Cohere's rerank endpoint (/v2/rerank) across the Envoy AI Gateway. It introduces apischema, processor, translator, metrics instrumentation and updates Docmentation to include rerank endpoint. **Related Issues/PRs (if applicable)** #957 --------- Signed-off-by: ayush <[email protected]>
1 parent df87d58 commit c87e367

File tree

26 files changed

+1951
-25
lines changed

26 files changed

+1951
-25
lines changed

.github/workflows/build_and_test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ jobs:
160160
TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }}
161161
TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
162162
TEST_GEMINI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GEMINI_API_KEY }}
163+
TEST_COHERE_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_COHERE_API_KEY }}
163164
TEST_GROQ_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GROQ_API_KEY }}
164165
TEST_GROK_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GROK_API_KEY }}
165166
TEST_SAMBANOVA_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_SAMBANOVA_API_KEY }}
@@ -207,6 +208,7 @@ jobs:
207208
TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
208209
TEST_ANTHROPIC_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_ANTHROPIC_API_KEY }}
209210
TEST_GEMINI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GEMINI_API_KEY }}
211+
TEST_COHERE_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_COHERE_API_KEY }}
210212
run: make test-e2e
211213

212214
test_e2e_upgrade:

api/v1alpha1/shared_types.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ package v1alpha1
1515
type VersionedAPISchema struct {
1616
// Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend.
1717
//
18-
// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
18+
// +kubebuilder:validation:Enum=OpenAI;Cohere;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic;AWSAnthropic
1919
Name APISchema `json:"name"`
2020

2121
// Version is the version of the API schema.
@@ -43,6 +43,10 @@ const (
4343
//
4444
// https://github.com/openai/openai-openapi
4545
APISchemaOpenAI APISchema = "OpenAI"
46+
// APISchemaCohere is the Cohere schema.
47+
//
48+
// https://docs.cohere.com/v2
49+
APISchemaCohere APISchema = "Cohere"
4650
// APISchemaAWSBedrock is the AWS Bedrock schema.
4751
//
4852
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html

cmd/extproc/mainlib/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
235235
completionMetrics := metrics.NewCompletionFactory(meter, metricsRequestHeaderAttributes)
236236
embeddingsMetrics := metrics.NewEmbeddingsFactory(meter, metricsRequestHeaderAttributes)
237237
imageGenerationMetrics := metrics.NewImageGenerationFactory(meter, metricsRequestHeaderAttributes)()
238+
rerankMetrics := metrics.NewRerankFactory(meter, metricsRequestHeaderAttributes)
238239
mcpMetrics := metrics.NewMCP(meter, metricsRequestHeaderAttributes)
239240

240241
tracing, err := tracing.NewTracingFromEnv(ctx, os.Stdout, spanRequestHeaderAttributes)
@@ -250,6 +251,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
250251
server.Register(path.Join(flags.rootPrefix, "/v1/completions"), extproc.CompletionsProcessorFactory(completionMetrics))
251252
server.Register(path.Join(flags.rootPrefix, "/v1/embeddings"), extproc.EmbeddingsProcessorFactory(embeddingsMetrics))
252253
server.Register(path.Join(flags.rootPrefix, "/v1/images/generations"), extproc.ImageGenerationProcessorFactory(imageGenerationMetrics))
254+
server.Register(path.Join(flags.rootPrefix, "/cohere/v2/rerank"), extproc.RerankProcessorFactory(rerankMetrics))
253255
server.Register(path.Join(flags.rootPrefix, "/v1/models"), extproc.NewModelsProcessor)
254256
server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(messagesMetrics))
255257

examples/basic/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ traffic for various AI providers.
1111
- `azure_openai.yaml` - Azure OpenAI integration
1212
- `gcp_vertex.yaml` - GCP Vertex AI integration
1313
- `tars.yaml` - TARS integration
14+
- `cohere.yaml` - Cohere integration
1415

1516
For AWS Bedrock, we recommend using either `aws-pod-identity.yaml` (EKS 1.24+) or
1617
`aws-irsa.yaml` (all EKS versions) for production deployments instead of static credentials. [Docs](https://docs.aws.amazon.com/eks/latest/best-practices/identity-and-access-management.html#_identities_and_credentials_for_eks_pods)

examples/basic/cohere.yaml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright Envoy AI Gateway Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
# The full text of the Apache license is available in the LICENSE file at
4+
# the root of the repo.
5+
6+
apiVersion: aigateway.envoyproxy.io/v1alpha1
7+
kind: AIGatewayRoute
8+
metadata:
9+
name: envoy-ai-gateway-basic-cohere
10+
namespace: default
11+
spec:
12+
parentRefs:
13+
- name: envoy-ai-gateway-basic
14+
kind: Gateway
15+
group: gateway.networking.k8s.io
16+
rules:
17+
- matches:
18+
- headers:
19+
- type: Exact
20+
name: x-ai-eg-model
21+
value: rerank-english-v3.0
22+
backendRefs:
23+
- name: envoy-ai-gateway-basic-cohere
24+
---
25+
apiVersion: aigateway.envoyproxy.io/v1alpha1
26+
kind: AIServiceBackend
27+
metadata:
28+
name: envoy-ai-gateway-basic-cohere
29+
namespace: default
30+
spec:
31+
schema:
32+
name: Cohere
33+
version: v2
34+
backendRef:
35+
name: envoy-ai-gateway-basic-cohere
36+
kind: Backend
37+
group: gateway.envoyproxy.io
38+
---
39+
apiVersion: aigateway.envoyproxy.io/v1alpha1
40+
kind: BackendSecurityPolicy
41+
metadata:
42+
name: envoy-ai-gateway-basic-cohere-apikey
43+
namespace: default
44+
spec:
45+
targetRefs:
46+
- group: aigateway.envoyproxy.io
47+
kind: AIServiceBackend
48+
name: envoy-ai-gateway-basic-cohere
49+
type: APIKey
50+
apiKey:
51+
secretRef:
52+
name: envoy-ai-gateway-basic-cohere-apikey
53+
namespace: default
54+
---
55+
apiVersion: gateway.envoyproxy.io/v1alpha1
56+
kind: Backend
57+
metadata:
58+
name: envoy-ai-gateway-basic-cohere
59+
namespace: default
60+
spec:
61+
endpoints:
62+
- fqdn:
63+
hostname: api.cohere.com
64+
port: 443
65+
---
66+
apiVersion: gateway.networking.k8s.io/v1alpha3
67+
kind: BackendTLSPolicy
68+
metadata:
69+
name: envoy-ai-gateway-basic-cohere-tls
70+
namespace: default
71+
spec:
72+
targetRefs:
73+
- group: "gateway.envoyproxy.io"
74+
kind: Backend
75+
name: envoy-ai-gateway-basic-cohere
76+
validation:
77+
wellKnownCACertificates: "System"
78+
hostname: api.cohere.com
79+
---
80+
apiVersion: v1
81+
kind: Secret
82+
metadata:
83+
name: envoy-ai-gateway-basic-cohere-apikey
84+
namespace: default
85+
type: Opaque
86+
stringData:
87+
apiKey: COHERE_API_KEY # Replace with your Cohere API key.

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ require (
1616
github.com/aws/aws-sdk-go-v2/service/sts v1.38.9
1717
github.com/cenkalti/backoff/v4 v4.3.0
1818
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443
19+
github.com/cohere-ai/cohere-go/v2 v2.15.3
1920
github.com/coreos/go-oidc/v3 v3.16.0
2021
github.com/docker/docker v28.5.1+incompatible
2122
github.com/envoyproxy/gateway v1.6.0-rc.0.0.20251028174200-282c916a47e1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
9393
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
9494
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
9595
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
96+
github.com/cohere-ai/cohere-go/v2 v2.15.3 h1:d6m4mspLmviA5OcJzY4wRmugQhcWP1iOPjSkgyZImhs=
97+
github.com/cohere-ai/cohere-go/v2 v2.15.3/go.mod h1:MuiJkCxlR18BDV2qQPbz2Yb/OCVphT1y6nD2zYaKeR0=
9698
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
9799
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
98100
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright Envoy AI Gateway Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
// The full text of the Apache license is available in the LICENSE file at
4+
// the root of the repo.
5+
6+
// Package cohere contains Cohere API schema definitions.
7+
package cohere
8+
9+
// RerankV2Request represents the request body for Cohere Rerank API v2.
10+
// Docs: https://docs.cohere.com/reference/rerank
11+
type RerankV2Request struct {
12+
// Model identifier to use, e.g. "rerank-v3.5".
13+
Model string `json:"model"`
14+
// Query to rank documents against.
15+
Query string `json:"query"`
16+
// Documents to be compared with the query. For best performance, keep under 1000.
17+
// Long documents may be truncated server-side by max_tokens_per_doc.
18+
Documents []string `json:"documents"`
19+
// Optional: limit returned results to top_n.
20+
TopN *int `json:"top_n,omitempty"`
21+
// Optional: truncate long documents to this many tokens. Default: 4096.
22+
MaxTokensPerDoc *int `json:"max_tokens_per_doc,omitempty"`
23+
}
24+
25+
// RerankV2Response represents the response from Cohere Rerank API v2.
26+
// Docs: https://docs.cohere.com/reference/rerank
27+
type RerankV2Response struct {
28+
// Ordered list of ranked documents with scores.
29+
Results []*RerankV2Result `json:"results"`
30+
// Unique request ID.
31+
ID *string `json:"id,omitempty"`
32+
// Additional metadata including API version and billing.
33+
Meta *RerankV2Meta `json:"meta,omitempty"`
34+
}
35+
36+
// RerankV2Result is a single ranked item in the response.
37+
type RerankV2Result struct {
38+
// Index is the position of the matched item in the input documents slice.
39+
Index int `json:"index"`
40+
// RelevanceScore is the model-assigned score indicating how well the
41+
// document matches the query (higher means more relevant).
42+
RelevanceScore float64 `json:"relevance_score"`
43+
}
44+
45+
// RerankV2Meta contains metadata returned by the API.
46+
type RerankV2Meta struct {
47+
// APIVersion contains the version information for the API that processed the request.
48+
APIVersion *RerankV2APIVersion `json:"api_version,omitempty"`
49+
// BilledUnits reports the billed resource usage for this request.
50+
BilledUnits *RerankV2BilledUnits `json:"billed_units,omitempty"`
51+
// Tokens provides the token usage breakdown for the request/response.
52+
Tokens *RerankV2Tokens `json:"tokens,omitempty"`
53+
// CachedTokens is the number of prompt tokens that hit the inference cache.
54+
CachedTokens *float64 `json:"cached_tokens,omitempty"`
55+
// Warnings contains any non-fatal warnings generated while processing the request.
56+
Warnings []string `json:"warnings,omitempty"`
57+
}
58+
59+
// RerankV2APIVersion describes the API version details in the response meta.
60+
type RerankV2APIVersion struct {
61+
// Version is the API version string (e.g., "2").
62+
Version string `json:"version"`
63+
// IsDeprecated indicates whether this API version is deprecated (nullable).
64+
IsDeprecated *bool `json:"is_deprecated,omitempty"`
65+
// IsExperimental indicates whether this API version is experimental (nullable).
66+
IsExperimental *bool `json:"is_experimental,omitempty"`
67+
}
68+
69+
// RerankV2BilledUnits contains usage metrics related to the request.
70+
type RerankV2BilledUnits struct {
71+
// Images is the number of billed images (nullable).
72+
Images *float64 `json:"images,omitempty"`
73+
// InputTokens is the number of billed input tokens (nullable).
74+
InputTokens *float64 `json:"input_tokens,omitempty"`
75+
// OutputTokens is the number of billed output tokens (nullable).
76+
OutputTokens *float64 `json:"output_tokens,omitempty"`
77+
// SearchUnits is the number of billed search units (nullable).
78+
SearchUnits *float64 `json:"search_units,omitempty"`
79+
// Classifications is the number of billed classification units (nullable).
80+
Classifications *float64 `json:"classifications,omitempty"`
81+
}
82+
83+
// RerankV2Tokens captures token accounting for the request.
84+
// Docs: https://docs.cohere.com/reference/rerank#response.body.meta.tokens
85+
type RerankV2Tokens struct {
86+
// InputTokens is the number of tokens used as input to the model (nullable).
87+
InputTokens *float64 `json:"input_tokens,omitempty"`
88+
// OutputTokens is the number of tokens produced by the model (nullable).
89+
OutputTokens *float64 `json:"output_tokens,omitempty"`
90+
}
91+
92+
// RerankV2Error describes a Cohere v2 error.
93+
type RerankV2Error struct {
94+
// ID is a unique identifier for the error (nullable).
95+
ID *string `json:"id,omitempty"`
96+
// Message is a human-readable description of the error (nullable).
97+
Message *string `json:"message,omitempty"`
98+
}

0 commit comments

Comments
 (0)