Skip to content

Commit 989d82a

Browse files
committed
Merge branch 'main' of ssh://github.com/envoyproxy/ai-gateway
2 parents 4f644e6 + 9bdd87e commit 989d82a

File tree

102 files changed

+2437
-1570
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+2437
-1570
lines changed

.github/workflows/build_and_test.yaml

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -67,32 +67,7 @@ jobs:
6767
~/go/pkg/mod
6868
~/go/bin
6969
key: unittest-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }}-${{ matrix.os }}
70-
71-
# This runs ollama server to be used in `aigw run` end-to-end tests.
72-
# The test case using it will be skipped if ollama is not available.
73-
# Since installing it and pulling the model takes a while, we do it only for Linux runners.
74-
- name: Start Ollama server
75-
if: matrix.os == 'ubuntu-latest'
76-
run: |
77-
curl -fsSL https://ollama.com/install.sh | sh && sudo systemctl stop ollama
78-
nohup ollama serve > ollama.log 2>&1 &
79-
timeout 30 sh -c 'until nc -z localhost 11434; do sleep 1; done'
80-
grep _MODEL .env.ollama | cut -d= -f2 | xargs -I{} ollama pull {}
81-
env:
82-
OLLAMA_CONTEXT_LENGTH: 131072 # Larger context for goose
83-
OLLAMA_HOST: 0.0.0.0
84-
# Download Envoy via func-e using implicit default version `aigw` would
85-
# otherwise need to download during test runs.
86-
- name: Download Envoy via func-e
87-
run: go tool -modfile=tools/go.mod func-e run --version
88-
env:
89-
FUNC_E_HOME: /tmp/envoy-gateway # hard-coded directory in EG
90-
- env:
91-
TEST_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BEDROCK_USER_AWS_ACCESS_KEY_ID }}
92-
TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }}
93-
TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
94-
TEST_GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
95-
run: make test-coverage
70+
- run: make test-coverage
9671
- if: failure()
9772
run: cat ollama.log || true
9873
- name: Upload coverage to Codecov
@@ -252,6 +227,7 @@ jobs:
252227
TEST_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BEDROCK_USER_AWS_ACCESS_KEY_ID }}
253228
TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }}
254229
TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }}
230+
TEST_ANTHROPIC_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_ANTHROPIC_API_KEY }}
255231
TEST_GEMINI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_GEMINI_API_KEY }}
256232
run: make test-e2e
257233

@@ -388,10 +364,14 @@ jobs:
388364
FUNC_E_HOME: /tmp/envoy-gateway # hard-coded directory in EG
389365
- name: Install Goose
390366
env:
391-
GOOSE_VERSION: v1.8.0
367+
GOOSE_VERSION: v1.10.0
368+
OS: Linux
392369
run: |
393370
curl -fsSL https://github.com/block/goose/releases/download/stable/download_cli.sh | CONFIGURE=false bash
394-
- run: make test-e2e-aigw
371+
- env:
372+
# This is used to access the GitHub MCP server.
373+
TEST_GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
374+
run: make test-e2e-aigw
395375
- if: failure()
396376
run: cat ollama.log || true
397377

api/v1alpha1/ai_gateway_route.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ type AIGatewayRouteSpec struct {
103103
// type: OutputToken
104104
// - metadataKey: llm_total_token
105105
// type: TotalToken
106+
// - metadataKey: llm_cached_input_token
107+
// type: CachedInputToken
106108
// ```
107109
// Then, with the following BackendTrafficPolicy of Envoy Gateway, you can have three
108110
// rate limit buckets for each unique x-user-id header value. One bucket is for the input token,

api/v1alpha1/backendsecurity_policy.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const (
1919
BackendSecurityPolicyTypeAPIKey BackendSecurityPolicyType = "APIKey"
2020
BackendSecurityPolicyTypeAWSCredentials BackendSecurityPolicyType = "AWSCredentials"
2121
BackendSecurityPolicyTypeAzureAPIKey BackendSecurityPolicyType = "AzureAPIKey"
22+
BackendSecurityPolicyTypeAnthropicAPIKey BackendSecurityPolicyType = "AnthropicAPIKey" // #nosec G101
2223
BackendSecurityPolicyTypeAzureCredentials BackendSecurityPolicyType = "AzureCredentials"
2324
BackendSecurityPolicyTypeGCPCredentials BackendSecurityPolicyType = "GCPCredentials"
2425
)
@@ -43,11 +44,12 @@ type BackendSecurityPolicy struct {
4344
//
4445
// Only one type of BackendSecurityPolicy can be defined.
4546
// +kubebuilder:validation:MaxProperties=3
46-
// +kubebuilder:validation:XValidation:rule="self.type == 'APIKey' ? (has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true",message="When type is APIKey, only apiKey field should be set"
47-
// +kubebuilder:validation:XValidation:rule="self.type == 'AWSCredentials' ? (has(self.awsCredentials) && !has(self.apiKey) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true",message="When type is AWSCredentials, only awsCredentials field should be set"
48-
// +kubebuilder:validation:XValidation:rule="self.type == 'AzureAPIKey' ? (has(self.azureAPIKey) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true",message="When type is AzureAPIKey, only azureAPIKey field should be set"
49-
// +kubebuilder:validation:XValidation:rule="self.type == 'AzureCredentials' ? (has(self.azureCredentials) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.gcpCredentials)) : true",message="When type is AzureCredentials, only azureCredentials field should be set"
50-
// +kubebuilder:validation:XValidation:rule="self.type == 'GCPCredentials' ? (has(self.gcpCredentials) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.azureCredentials)) : true",message="When type is GCPCredentials, only gcpCredentials field should be set"
47+
// +kubebuilder:validation:XValidation:rule="self.type == 'APIKey' ? (has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true",message="When type is APIKey, only apiKey field should be set"
48+
// +kubebuilder:validation:XValidation:rule="self.type == 'AWSCredentials' ? (has(self.awsCredentials) && !has(self.apiKey) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true",message="When type is AWSCredentials, only awsCredentials field should be set"
49+
// +kubebuilder:validation:XValidation:rule="self.type == 'AzureAPIKey' ? (has(self.azureAPIKey) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureCredentials) && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true",message="When type is AzureAPIKey, only azureAPIKey field should be set"
50+
// +kubebuilder:validation:XValidation:rule="self.type == 'AzureCredentials' ? (has(self.azureCredentials) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true",message="When type is AzureCredentials, only azureCredentials field should be set"
51+
// +kubebuilder:validation:XValidation:rule="self.type == 'GCPCredentials' ? (has(self.gcpCredentials) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.anthropicAPIKey)) : true",message="When type is GCPCredentials, only gcpCredentials field should be set"
52+
// +kubebuilder:validation:XValidation:rule="self.type == 'AnthropicAPIKey' ? (has(self.anthropicAPIKey) && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true",message="When type is AnthropicAPIKey, only anthropicAPIKey field should be set"
5153
type BackendSecurityPolicySpec struct {
5254
// TargetRefs are the names of the AIServiceBackend resources this BackendSecurityPolicy is being attached to.
5355
// Attaching multiple BackendSecurityPolicies to the same AIServiceBackend is invalid and will result in an error
@@ -60,7 +62,7 @@ type BackendSecurityPolicySpec struct {
6062

6163
// Type specifies the type of the backend security policy.
6264
//
63-
// +kubebuilder:validation:Enum=APIKey;AWSCredentials;AzureAPIKey;AzureCredentials;GCPCredentials
65+
// +kubebuilder:validation:Enum=APIKey;AWSCredentials;AzureAPIKey;AzureCredentials;GCPCredentials;AnthropicAPIKey
6466
Type BackendSecurityPolicyType `json:"type"`
6567

6668
// APIKey is a mechanism to access a backend(s). The API key will be injected into the Authorization header.
@@ -82,10 +84,17 @@ type BackendSecurityPolicySpec struct {
8284
//
8385
// +optional
8486
AzureCredentials *BackendSecurityPolicyAzureCredentials `json:"azureCredentials,omitempty"`
87+
8588
// GCPCredentials is a mechanism to access a backend(s). GCP specific logic will be applied.
8689
//
8790
// +optional
8891
GCPCredentials *BackendSecurityPolicyGCPCredentials `json:"gcpCredentials,omitempty"`
92+
93+
// AnthropicAPIKey is a mechanism to access Anthropic backend(s). The API key will be injected into the "x-api-key" header.
94+
// https://docs.claude.com/en/api/overview#authentication
95+
//
96+
// +optional
97+
AnthropicAPIKey *BackendSecurityPolicyAnthropicAPIKey `json:"anthropicAPIKey,omitempty"`
8998
}
9099

91100
// BackendSecurityPolicyList contains a list of BackendSecurityPolicy
@@ -314,3 +323,11 @@ type GCPCredentialsFile struct {
314323
// The secret should contain the GCP service account credentials file keyed on "service_account.json".
315324
SecretRef *gwapiv1.SecretObjectReference `json:"secretRef"`
316325
}
326+
327+
// BackendSecurityPolicyAnthropicAPIKey specifies the Anthropic API key.
328+
type BackendSecurityPolicyAnthropicAPIKey struct {
329+
// SecretRef is the reference to the secret containing the Anthropic API key.
330+
// ai-gateway must be given the permission to read this secret.
331+
// The key of the secret should be "apiKey".
332+
SecretRef *gwapiv1.SecretObjectReference `json:"secretRef"`
333+
}

api/v1alpha1/shared_types.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ package v1alpha1
1515
type VersionedAPISchema struct {
1616
// Name is the name of the API schema of the AIGatewayRoute or AIServiceBackend.
1717
//
18-
// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic
18+
// +kubebuilder:validation:Enum=OpenAI;AWSBedrock;AzureOpenAI;GCPVertexAI;GCPAnthropic;Anthropic
1919
Name APISchema `json:"name"`
2020

2121
// Version is the version of the API schema.
@@ -62,6 +62,9 @@ const (
6262
//
6363
// https://docs.anthropic.com/en/api/claude-on-vertex-ai
6464
APISchemaGCPAnthropic APISchema = "GCPAnthropic"
65+
// APISchemaAnthropic is the native Anthropic API schema.
66+
// https://docs.claude.com/en/home
67+
APISchemaAnthropic APISchema = "Anthropic"
6568
)
6669

6770
const (
@@ -80,7 +83,7 @@ type LLMRequestCost struct {
8083
// and it uses "output token" as the cost. The other types are "InputToken", "TotalToken",
8184
// and "CEL".
8285
//
83-
// +kubebuilder:validation:Enum=OutputToken;InputToken;TotalToken;CEL
86+
// +kubebuilder:validation:Enum=OutputToken;InputToken;CachedInputToken;TotalToken;CEL
8487
Type LLMRequestCostType `json:"type"`
8588
// CEL is the CEL expression to calculate the cost of the request.
8689
// The CEL expression must return a signed or unsigned integer. If the
@@ -91,13 +94,15 @@ type LLMRequestCost struct {
9194
// * model: the model name extracted from the request content. Type: string.
9295
// * backend: the backend name in the form of "name.namespace". Type: string.
9396
// * input_tokens: the number of input tokens. Type: unsigned integer.
97+
// * cached_input_tokens: the number of cached input tokens. Type: unsigned integer.
9498
// * output_tokens: the number of output tokens. Type: unsigned integer.
9599
// * total_tokens: the total number of tokens. Type: unsigned integer.
96100
//
97101
// For example, the following expressions are valid:
98102
//
99103
// * "model == 'llama' ? input_tokens + output_token * 0.5 : total_tokens"
100104
// * "backend == 'foo.default' ? input_tokens + output_tokens : total_tokens"
105+
// * "backend == 'bar.default' ? (input_tokens - cached_input_tokens) + cached_input_tokens * 0.1 + output_tokens : total_tokens"
101106
// * "input_tokens + output_tokens + total_tokens"
102107
// * "input_tokens * output_tokens"
103108
//
@@ -111,6 +116,8 @@ type LLMRequestCostType string
111116
const (
112117
// LLMRequestCostTypeInputToken is the cost type of the input token.
113118
LLMRequestCostTypeInputToken LLMRequestCostType = "InputToken"
119+
// LLMRequestCostTypeCachedInputToken is the cost type of the cached input token.
120+
LLMRequestCostTypeCachedInputToken LLMRequestCostType = "CachedInputToken"
114121
// LLMRequestCostTypeOutputToken is the cost type of the output token.
115122
LLMRequestCostTypeOutputToken LLMRequestCostType = "OutputToken"
116123
// LLMRequestCostTypeTotalToken is the cost type of the total token.

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/aigw/healthcheck.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
"log/slog"
1212
"time"
1313

14-
"github.com/envoyproxy/ai-gateway/internal/aigw"
14+
"github.com/tetratelabs/func-e/experimental/admin"
1515
)
1616

1717
// healthcheck performs looks up the Envoy subprocess, gets its admin port,
@@ -27,13 +27,12 @@ func healthcheck(ctx context.Context, _, stderr io.Writer) error {
2727
}
2828

2929
func doHealthcheck(ctx context.Context, aigwPid int, logger *slog.Logger) error {
30-
envoyAdmin, err := aigw.NewEnvoyAdminClient(ctx, aigwPid, 0)
31-
if err != nil {
30+
if adminClient, err := admin.NewAdminClient(ctx, aigwPid); err != nil {
3231
logger.Error("Failed to find Envoy admin server", "error", err)
3332
return err
34-
} else if err = envoyAdmin.IsReady(ctx); err != nil {
35-
logger.Error("Envoy admin server is not ready", "adminPort", envoyAdmin.Port(), "error", err)
33+
} else if err = adminClient.IsReady(ctx); err != nil {
34+
logger.Error("Envoy admin server is not ready", "adminPort", adminClient.Port(), "error", err)
3635
return err
3736
}
38-
return err
37+
return nil
3938
}

cmd/aigw/healthcheck_test.go

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,16 @@ package main
88
import (
99
"bytes"
1010
"context"
11-
"fmt"
11+
"io"
1212
"log/slog"
13-
"net/http"
14-
"net/http/httptest"
15-
"net/url"
1613
"os"
17-
"os/exec"
18-
"path/filepath"
19-
"strconv"
2014
"testing"
2115
"time"
2216

2317
"github.com/stretchr/testify/require"
18+
func_e "github.com/tetratelabs/func-e"
19+
"github.com/tetratelabs/func-e/api"
20+
"github.com/tetratelabs/func-e/experimental/admin"
2421
)
2522

2623
func Test_healthcheck(t *testing.T) {
@@ -38,38 +35,32 @@ func Test_healthcheck(t *testing.T) {
3835
})
3936

4037
t.Run("returns nil when ready", func(t *testing.T) {
41-
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
42-
require.Equal(t, "/ready", r.URL.Path)
43-
w.WriteHeader(http.StatusOK)
44-
_, _ = w.Write([]byte("live"))
45-
}))
46-
defer server.Close()
47-
48-
u, err := url.Parse(server.URL)
49-
require.NoError(t, err)
50-
port, err := strconv.Atoi(u.Port())
51-
require.NoError(t, err)
52-
53-
adminFile := filepath.Join(t.TempDir(), "admin-address.txt")
54-
require.NoError(t, os.WriteFile(adminFile, []byte(fmt.Sprintf("127.0.0.1:%d", port)), 0o600))
55-
5638
ctx, cancel := context.WithCancel(t.Context())
5739
defer cancel()
5840

59-
cmdStr := fmt.Sprintf("sleep 30 && echo -- --admin-address-path %s", adminFile)
60-
cmd := exec.CommandContext(ctx, "sh", "-c", cmdStr)
61-
require.NoError(t, cmd.Start())
62-
defer func() {
63-
_ = cmd.Process.Kill()
64-
_, _ = cmd.Process.Wait()
65-
}()
41+
var healthCheckErr error
42+
var log bytes.Buffer
6643

67-
time.Sleep(100 * time.Millisecond)
44+
// Even though AdminClient.IsReady exists, we don't have it injected in
45+
// Docker. This intentionally ignores the parameter.
46+
startupHook := func(ctx context.Context, _ admin.AdminClient, _ string) error {
47+
logger := slog.New(slog.NewTextHandler(&log, nil))
48+
healthCheckErr = doHealthcheck(ctx, pid, logger)
49+
// Cancel immediately to stop Envoy and complete test quickly
50+
cancel()
51+
return nil
52+
}
6853

69-
var buf bytes.Buffer
70-
logger := slog.New(slog.NewTextHandler(&buf, nil))
71-
err = doHealthcheck(t.Context(), pid, logger)
54+
// Run with minimal Envoy config
55+
err := func_e.Run(ctx, []string{
56+
"--config-yaml",
57+
"admin: {address: {socket_address: {address: '127.0.0.1', port_value: 0}}}",
58+
}, api.Out(io.Discard), api.EnvoyOut(io.Discard), api.EnvoyErr(io.Discard), admin.WithStartupHook(startupHook))
59+
60+
// Expect nil error since Run returns nil on context cancellation (documented behavior)
7261
require.NoError(t, err)
73-
require.Empty(t, buf)
62+
63+
require.NoError(t, healthCheckErr)
64+
require.Empty(t, log)
7465
})
7566
}

0 commit comments

Comments
 (0)