From 328c7d76a49dd31d40fcb3a91957e08ac5ae3884 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Tue, 14 Oct 2025 13:01:57 -0400 Subject: [PATCH 1/2] refactor: separate XDG directories for config, data, state and runtime **Description** This refactors aigw to use distinct directories following XDG Base Directory Specification conventions: - --config-home/$AIGW_CONFIG_HOME: Configuration files (default: ~/.config/aigw) - --data-home/$AIGW_DATA_HOME: Envoy binaries via func-e (default: ~/.local/share/aigw) - --state-home/$AIGW_STATE_HOME: Run logs and state (default: ~/.local/state/aigw) - --runtime-dir/$AIGW_RUNTIME_DIR: Ephemeral files like UDS (default: /tmp/aigw-${UID}) This separation aligns with XDG principles where configuration, data, state, and runtime files are independently configurable for different storage tiers. This is particularly useful for Docker deployments to map volumes appropriately. This also adds --run-id/$AIGW_RUN_ID to override the default YYYYMMDD_HHMMSS_UUU timestamp format with a custom identifier. Setting this to '0' enables predictable paths for Docker/Kubernetes single-run scenarios. Signed-off-by: Adrian Cole --- .github/workflows/build_and_test.yaml | 29 ++- Dockerfile | 28 ++- cmd/aigw/config.go | 28 +++ cmd/aigw/config_test.go | 67 +++++++ cmd/aigw/main.go | 90 ++++++++- cmd/aigw/main_test.go | 276 +++++++++++++++++++++++--- cmd/aigw/run.go | 101 +++++----- cmd/aigw/run_test.go | 28 ++- cmd/aigw/runopts.go | 95 +++++++++ cmd/aigw/runopts_test.go | 204 +++++++++++++++++++ go.mod | 2 +- go.sum | 5 +- internal/xdg/directories.go | 34 ++++ site/docs/cli/installation.md | 25 ++- site/docs/cli/run.md | 57 ++++++ 15 files changed, 964 insertions(+), 105 deletions(-) create mode 100644 cmd/aigw/runopts.go create mode 100644 cmd/aigw/runopts_test.go create mode 100644 internal/xdg/directories.go diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 3050cb7c27..d69b9b299b 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -67,7 +67,32 @@ jobs: ~/go/pkg/mod ~/go/bin key: unittest-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }}-${{ matrix.os }} - - run: make test-coverage + + # This runs ollama server to be used in `aigw run` end-to-end tests. + # The test case using it will be skipped if ollama is not available. + # Since installing it and pulling the model takes a while, we do it only for Linux runners. + - name: Start Ollama server + if: matrix.os == 'ubuntu-latest' + run: | + curl -fsSL https://ollama.com/install.sh | sh && sudo systemctl stop ollama + nohup ollama serve > ollama.log 2>&1 & + timeout 30 sh -c 'until nc -z localhost 11434; do sleep 1; done' + grep _MODEL .env.ollama | cut -d= -f2 | xargs -I{} ollama pull {} + env: + OLLAMA_CONTEXT_LENGTH: 131072 # Larger context for goose + OLLAMA_HOST: 0.0.0.0 + # Download Envoy via func-e using implicit default version `aigw` would + # otherwise need to download during test runs. + - name: Download Envoy via func-e + run: go tool -modfile=tools/go.mod func-e run --version + env: + FUNC_E_DATA_HOME: ~/.local/share/aigw + - env: + TEST_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BEDROCK_USER_AWS_ACCESS_KEY_ID }} + TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }} + TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }} + TEST_GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: make test-coverage - if: failure() run: cat ollama.log || true - name: Upload coverage to Codecov @@ -360,7 +385,7 @@ jobs: - name: Download Envoy via func-e run: go tool -modfile=tools/go.mod func-e run --version env: - FUNC_E_HOME: /tmp/envoy-gateway # hard-coded directory in EG + FUNC_E_DATA_HOME: ~/.local/share/aigw - name: Install Goose env: GOOSE_VERSION: v1.10.0 diff --git a/Dockerfile b/Dockerfile index 2041140add..6660699a7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,30 +15,44 @@ FROM golang:1.25 AS envoy-downloader ARG TARGETOS ARG TARGETARCH ARG COMMAND_NAME -# Hard-coded directory for envoy-gateway resources -# See https://github.com/envoyproxy/gateway/blob/d95ce4ce564cfff47ed1fd6c97e29c1058aa4a61/internal/infrastructure/host/proxy_infra.go#L16 -WORKDIR /tmp/envoy-gateway +# Download Envoy binary to AIGW_DATA_HOME for the nonroot user +WORKDIR /build RUN if [ "$COMMAND_NAME" = "aigw" ]; then \ go install github.com/tetratelabs/func-e/cmd/func-e@latest && \ - func-e --platform ${TARGETOS}/${TARGETARCH} --home-dir . run --version; \ + FUNC_E_DATA_HOME=/home/nonroot/.local/share/aigw func-e --platform ${TARGETOS}/${TARGETARCH} run --version; \ fi \ - && mkdir -p certs \ - && chown -R 65532:65532 . \ - && chmod -R 755 . + # Create directories for the nonroot user + && mkdir -p /home/nonroot /tmp/envoy-gateway/certs \ + && chown -R 65532:65532 /home/nonroot /tmp/envoy-gateway \ + && chmod -R 755 /home/nonroot /tmp/envoy-gateway FROM gcr.io/distroless/${VARIANT}-debian12:nonroot ARG COMMAND_NAME ARG TARGETOS ARG TARGETARCH +# Copy pre-downloaded Envoy binary and EG certs directory +COPY --from=envoy-downloader /home/nonroot /home/nonroot COPY --from=envoy-downloader /tmp/envoy-gateway /tmp/envoy-gateway COPY ./out/${COMMAND_NAME}-${TARGETOS}-${TARGETARCH} /app USER nonroot:nonroot +# Set AIGW_RUN_ID=0 for predictable file paths in containers. +# This creates the following directory structure: +# ~/.config/aigw/ - XDG config (e.g., envoy-version preference) +# ~/.local/share/aigw/ - XDG data (downloaded Envoy binaries via func-e) +# ~/.local/state/aigw/runs/0/ - XDG state (aigw.log, envoy-gateway-config.yaml, extproc-config.yaml, resources/) +# ~/.local/state/aigw/envoy-runs/0/ - XDG state (func-e stdout.log, stderr.log) +# /tmp/aigw-0/ - XDG runtime (uds.sock, admin-address.txt) +ENV AIGW_RUN_ID=0 + # The healthcheck subcommand performs an HTTP GET to localhost:1064/healthlthy for "aigw run". # NOTE: This is only for aigw in practice since this is ignored by Kubernetes. HEALTHCHECK --interval=10s --timeout=5s --start-period=5s --retries=3 \ CMD ["/app", "healthcheck"] ENTRYPOINT ["/app"] + +# Default CMD for aigw - uses AIGW_RUN_ID from environment +CMD ["run"] diff --git a/cmd/aigw/config.go b/cmd/aigw/config.go index b0bd553403..9d83f2017f 100644 --- a/cmd/aigw/config.go +++ b/cmd/aigw/config.go @@ -9,7 +9,9 @@ import ( "errors" "fmt" "os" + "path/filepath" "reflect" + "strings" "github.com/a8m/envsubst" @@ -59,3 +61,29 @@ func readConfig(path string, mcpServers *autoconfig.MCPServers, debug bool) (str } return envsubst.String(config) } + +// expandPath expands environment variables and tilde in paths, then converts to absolute path. +// Returns empty string if input is empty. +// Replaces ~/ with ${HOME}/ before expanding environment variables. +func expandPath(path string) string { + if path == "" { + return "" + } + + // Replace ~/ with ${HOME}/ + if strings.HasPrefix(path, "~/") { + path = "${HOME}/" + path[2:] + } + + // Expand environment variables + expanded := os.ExpandEnv(path) + + // Convert to absolute path + abs, err := filepath.Abs(expanded) + if err != nil { + // If we can't get absolute path, return expanded path + return expanded + } + + return abs +} diff --git a/cmd/aigw/config_test.go b/cmd/aigw/config_test.go index 81556ebb10..9d312972a6 100644 --- a/cmd/aigw/config_test.go +++ b/cmd/aigw/config_test.go @@ -116,6 +116,73 @@ func TestReadConfig(t *testing.T) { }) } +func TestExpandPath(t *testing.T) { + homeDir, err := os.UserHomeDir() + require.NoError(t, err) + + tests := []struct { + name string + path string + envVars map[string]string + expected string + }{ + { + name: "empty path returns empty", + path: "", + expected: "", + }, + { + name: "tilde path", + path: "~/test/file.txt", + expected: filepath.Join(homeDir, "test/file.txt"), + }, + { + name: "tilde slash returns HOME", + path: "~/", + expected: homeDir, + }, + { + name: "absolute path unchanged", + path: "/absolute/path/file.txt", + expected: "/absolute/path/file.txt", + }, + { + name: "env var expansion", + path: "${HOME}/test", + expected: filepath.Join(homeDir, "test"), + }, + { + name: "custom env var", + path: "${CUSTOM_DIR}/file.txt", + envVars: map[string]string{"CUSTOM_DIR": "/custom"}, + expected: "/custom/file.txt", + }, + { + name: "tilde with env var", + path: "~/test/${USER}", + envVars: map[string]string{"USER": "testuser"}, + expected: filepath.Join(homeDir, "test/testuser"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + actual := expandPath(tt.path) + require.Equal(t, tt.expected, actual) + }) + } + t.Run("relative/path", func(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + expected := filepath.Join(cwd, "relative/path") + actual := expandPath("relative/path") + require.Equal(t, expected, actual) + }) +} + func TestRecreateDir(t *testing.T) { tests := []struct { name string diff --git a/cmd/aigw/main.go b/cmd/aigw/main.go index 3d1413ff10..d54ce7de3a 100644 --- a/cmd/aigw/main.go +++ b/cmd/aigw/main.go @@ -12,6 +12,7 @@ import ( "io" "log" "os" + "time" "github.com/alecthomas/kong" ctrl "sigs.k8s.io/controller-runtime" @@ -19,11 +20,18 @@ import ( "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" "github.com/envoyproxy/ai-gateway/internal/autoconfig" "github.com/envoyproxy/ai-gateway/internal/version" + "github.com/envoyproxy/ai-gateway/internal/xdg" ) type ( // cmd corresponds to the top-level `aigw` command. cmd struct { + // Global XDG flags + ConfigHome string `name:"config-home" env:"AIGW_CONFIG_HOME" help:"Configuration files directory. Defaults to ~/.config/aigw" type:"path"` + DataHome string `name:"data-home" env:"AIGW_DATA_HOME" help:"Downloaded Envoy binaries directory. Defaults to ~/.local/share/aigw" type:"path"` + StateHome string `name:"state-home" env:"AIGW_STATE_HOME" help:"Persistent state and logs directory. Defaults to ~/.local/state/aigw" type:"path"` + RuntimeDir string `name:"runtime-dir" env:"AIGW_RUNTIME_DIR" help:"Ephemeral runtime files directory. Defaults to /tmp/aigw-$UID" type:"path"` + // Version is the sub-command to show the version. Version struct{} `cmd:"" help:"Show version."` // Run is the sub-command parsed by the `cmdRun` struct. @@ -34,16 +42,74 @@ type ( // cmdRun corresponds to `aigw run` command. cmdRun struct { Debug bool `help:"Enable debug logging emitted to stderr."` - Path string `arg:"" name:"path" optional:"" help:"Path to the AI Gateway configuration yaml file. Optional when at least OPENAI_API_KEY or AZURE_OPENAI_API_KEY is set." type:"path"` + Path string `arg:"" name:"path" optional:"" help:"Path to the AI Gateway configuration yaml file. Defaults to $AIGW_CONFIG_HOME/config.yaml if exists, otherwise optional when at least OPENAI_API_KEY or AZURE_OPENAI_API_KEY is set." type:"path"` AdminPort int `help:"HTTP port for the admin server (serves /metrics and /health endpoints)." default:"1064"` McpConfig string `name:"mcp-config" help:"Path to MCP servers configuration file." type:"path"` McpJSON string `name:"mcp-json" help:"JSON string of MCP servers configuration."` + RunID string `name:"run-id" env:"AIGW_RUN_ID" help:"Run identifier for this invocation. Defaults to timestamp-based ID or $AIGW_RUN_ID. Use '0' for Docker/Kubernetes."` mcpConfig *autoconfig.MCPServers `kong:"-"` // Internal field: normalized MCP JSON data + dirs *xdg.Directories `kong:"-"` // Internal field: XDG directories, set by BeforeApply + runOpts *runOpts `kong:"-"` // Internal field: run options, set by Validate } // cmdHealthcheck corresponds to `aigw healthcheck` command. cmdHealthcheck struct{} ) +// BeforeApply is called by Kong before applying defaults to set XDG directory defaults. +func (c *cmd) BeforeApply(_ *kong.Context) error { + // Expand paths unconditionally (handles ~/, env vars, and converts to absolute) + // Set defaults only if not set (empty string) + if c.ConfigHome == "" { + c.ConfigHome = "~/.config/aigw" + } + c.ConfigHome = expandPath(c.ConfigHome) + + if c.DataHome == "" { + c.DataHome = "~/.local/share/aigw" + } + c.DataHome = expandPath(c.DataHome) + + if c.StateHome == "" { + c.StateHome = "~/.local/state/aigw" + } + c.StateHome = expandPath(c.StateHome) + + if c.RuntimeDir == "" { + c.RuntimeDir = "/tmp/aigw-${UID}" + } + c.RuntimeDir = expandPath(c.RuntimeDir) + + // Populate Run.dirs with expanded XDG directories for use in Run.BeforeApply + c.Run.dirs = &xdg.Directories{ + ConfigHome: c.ConfigHome, + DataHome: c.DataHome, + StateHome: c.StateHome, + RuntimeDir: c.RuntimeDir, + } + + return nil +} + +// BeforeApply is called by Kong before applying defaults to set computed default values. +func (c *cmdRun) BeforeApply(_ *kong.Context) error { + // Set RunID default if not provided + if c.RunID == "" { + c.RunID = generateRunID(time.Now()) + } + + // Set Path to default config.yaml if it exists and Path not provided + if c.Path == "" && c.dirs != nil { + defaultPath := c.dirs.ConfigHome + "/config.yaml" + if _, err := os.Stat(defaultPath); err == nil { + c.Path = defaultPath + } + } + // Expand Path (handles ~/, env vars, and converts to absolute) + c.Path = expandPath(c.Path) + + return nil +} + // Validate is called by Kong after parsing to validate the cmdRun arguments. func (c *cmdRun) Validate() error { if c.McpConfig != "" && c.McpJSON != "" { @@ -53,6 +119,8 @@ func (c *cmdRun) Validate() error { return fmt.Errorf("you must supply at least OPENAI_API_KEY or AZURE_OPENAI_API_KEY or a config file path") } + c.McpConfig = expandPath(c.McpConfig) + var mcpJSON string if c.McpConfig != "" { raw, err := os.ReadFile(c.McpConfig) @@ -71,11 +139,18 @@ func (c *cmdRun) Validate() error { } c.mcpConfig = &mcpConfig } + + opts, err := newRunOpts(c.dirs, c.RunID, c.Path, mainlib.Main) + if err != nil { + return fmt.Errorf("failed to create run options: %w", err) + } + c.runOpts = opts + return nil } type ( - runFn func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error + runFn func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error healthcheckFn func(context.Context, io.Writer, io.Writer) error ) @@ -106,11 +181,12 @@ func doMain(ctx context.Context, stdout, stderr io.Writer, args []string, exitFn } parsed, err := parser.Parse(args) parser.FatalIfErrorf(err) + switch parsed.Command() { case "version": _, _ = fmt.Fprintf(stdout, "Envoy AI Gateway CLI: %s\n", version.Version) case "run", "run ": - err = rf(ctx, c.Run, runOpts{extProcLauncher: mainlib.Main}, stdout, stderr) + err = rf(ctx, c.Run, c.Run.runOpts, stdout, stderr) if err != nil { log.Fatalf("Error running: %v", err) } @@ -123,3 +199,11 @@ func doMain(ctx context.Context, stdout, stderr io.Writer, args []string, exitFn panic("unreachable") } } + +// generateRunID generates a unique run identifier based on the current time. +// Defaults to the same convention as func-e: "YYYYMMDD_HHMMSS_UUU" format. +// Last 3 digits of microseconds to allow concurrent runs. +func generateRunID(now time.Time) string { + micro := now.Nanosecond() / 1000 % 1000 + return fmt.Sprintf("%s_%03d", now.Format("20060102_150405"), micro) +} diff --git a/cmd/aigw/main_test.go b/cmd/aigw/main_test.go index a663c6d5d3..7fffe32223 100644 --- a/cmd/aigw/main_test.go +++ b/cmd/aigw/main_test.go @@ -30,21 +30,29 @@ func Test_doMain(t *testing.T) { { name: "help", args: []string{"--help"}, - expOut: `Usage: aigw + expOut: `Usage: aigw [flags] Envoy AI Gateway CLI Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) Commands: - version + version [flags] Show version. run [] [flags] Run the AI Gateway locally for given configuration. - healthcheck + healthcheck [flags] Docker HEALTHCHECK command. Run "aigw --help" for more information on a command. @@ -60,53 +68,73 @@ Run "aigw --help" for more information on a command. name: "version help", args: []string{"version", "--help"}, expPanicCode: ptr.To(0), - expOut: `Usage: aigw version + expOut: `Usage: aigw version [flags] Show version. Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) `, }, { name: "run no arg", args: []string{"run"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, expPanicCode: ptr.To(80), }, { name: "run with OpenAI env", args: []string{"run"}, env: map[string]string{"OPENAI_API_KEY": "dummy-key"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, }, { name: "run help", args: []string{"run", "--help"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, expOut: `Usage: aigw run [] [flags] Run the AI Gateway locally for given configuration. Arguments: - [] Path to the AI Gateway configuration yaml file. Optional when at - least OPENAI_API_KEY or AZURE_OPENAI_API_KEY is set. + [] Path to the AI Gateway configuration yaml file. Defaults to + $AIGW_CONFIG_HOME/config.yaml if exists, otherwise optional when + at least OPENAI_API_KEY or AZURE_OPENAI_API_KEY is set. Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) - --debug Enable debug logging emitted to stderr. - --admin-port=1064 HTTP port for the admin server (serves /metrics and - /health endpoints). - --mcp-config=STRING Path to MCP servers configuration file. - --mcp-json=STRING JSON string of MCP servers configuration. + --debug Enable debug logging emitted to stderr. + --admin-port=1064 HTTP port for the admin server (serves /metrics + and /health endpoints). + --mcp-config=STRING Path to MCP servers configuration file. + --mcp-json=STRING JSON string of MCP servers configuration. + --run-id=STRING Run identifier for this invocation. Defaults to + timestamp-based ID or $AIGW_RUN_ID. Use '0' for + Docker/Kubernetes ($AIGW_RUN_ID). `, expPanicCode: ptr.To(0), }, { name: "run with path", args: []string{"run", "./path"}, - rf: func(_ context.Context, c cmdRun, _ runOpts, _, _ io.Writer) error { + rf: func(_ context.Context, c cmdRun, _ *runOpts, _, _ io.Writer) error { abs, err := filepath.Abs("./path") require.NoError(t, err) require.Equal(t, abs, c.Path) @@ -132,41 +160,225 @@ Flags: } } +func TestCmd_BeforeApply(t *testing.T) { + tests := []struct { + name string + configHome string + dataHome string + stateHome string + runtimeDir string + envVars map[string]string + expectedConfig string + expectedData string + expectedState string + expectedRuntime string + }{ + { + name: "sets defaults when all empty", + configHome: "", + dataHome: "", + stateHome: "", + runtimeDir: "", + envVars: map[string]string{"HOME": "/home/test", "UID": "1000"}, + expectedConfig: "/home/test/.config/aigw", + expectedData: "/home/test/.local/share/aigw", + expectedState: "/home/test/.local/state/aigw", + expectedRuntime: "/tmp/aigw-1000", + }, + { + name: "preserves explicit values", + configHome: "/custom/config", + dataHome: "/custom/data", + stateHome: "/custom/state", + runtimeDir: "/custom/runtime", + expectedConfig: "/custom/config", + expectedData: "/custom/data", + expectedState: "/custom/state", + expectedRuntime: "/custom/runtime", + }, + { + name: "mixes defaults and explicit values", + configHome: "/custom/config", + dataHome: "", + stateHome: "/custom/state", + runtimeDir: "", + envVars: map[string]string{"HOME": "/home/test", "UID": "1000"}, + expectedConfig: "/custom/config", + expectedData: "/home/test/.local/share/aigw", + expectedState: "/custom/state", + expectedRuntime: "/tmp/aigw-1000", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + c := cmd{ + ConfigHome: tt.configHome, + DataHome: tt.dataHome, + StateHome: tt.stateHome, + RuntimeDir: tt.runtimeDir, + } + + err := c.BeforeApply(nil) + require.NoError(t, err) + + require.Equal(t, tt.expectedConfig, c.ConfigHome) + require.Equal(t, tt.expectedData, c.DataHome) + require.Equal(t, tt.expectedState, c.StateHome) + require.Equal(t, tt.expectedRuntime, c.RuntimeDir) + + // Verify Run.dirs is populated + require.NotNil(t, c.Run.dirs) + require.Equal(t, tt.expectedConfig, c.Run.dirs.ConfigHome) + require.Equal(t, tt.expectedData, c.Run.dirs.DataHome) + require.Equal(t, tt.expectedState, c.Run.dirs.StateHome) + require.Equal(t, tt.expectedRuntime, c.Run.dirs.RuntimeDir) + }) + } +} + +func TestCmdRun_BeforeApply(t *testing.T) { + tests := []struct { + name string + path string + runID string + setupDirs func(t *testing.T, configHome string) + expectedPath string + expectedID string // empty means check it's generated + }{ + { + name: "generates runID when empty", + path: "", + runID: "", + expectedPath: "", + expectedID: "", // will verify it's non-empty + }, + { + name: "preserves explicit runID", + path: "", + runID: "my-custom-id", + expectedPath: "", + expectedID: "my-custom-id", + }, + { + name: "preserves explicit path", + path: "/explicit/config.yaml", + runID: "", + expectedPath: "/explicit/config.yaml", + expectedID: "", + }, + { + name: "sets path to default when config.yaml exists", + path: "", + runID: "", + setupDirs: func(t *testing.T, configHome string) { + err := os.WriteFile(filepath.Join(configHome, "config.yaml"), []byte("test"), 0o600) + require.NoError(t, err) + }, + expectedPath: "", // will be {configHome}/config.yaml + expectedID: "", + }, + { + name: "leaves path empty when config.yaml does not exist", + path: "", + runID: "", + expectedPath: "", + expectedID: "", + }, + { + name: "preserves explicit path even when config.yaml exists", + path: "/explicit/config.yaml", + runID: "", + setupDirs: func(t *testing.T, configHome string) { + err := os.WriteFile(filepath.Join(configHome, "config.yaml"), []byte("test"), 0o600) + require.NoError(t, err) + }, + expectedPath: "/explicit/config.yaml", + expectedID: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + configHome := t.TempDir() + + if tt.setupDirs != nil { + tt.setupDirs(t, configHome) + } + + dirs := newTempDirectories(t) + dirs.ConfigHome = configHome + + c := cmdRun{ + Path: tt.path, + RunID: tt.runID, + dirs: dirs, + } + + err := c.BeforeApply(nil) + require.NoError(t, err) + + // Check Path + if tt.expectedPath == "" && tt.path == "" && tt.setupDirs != nil { + // Special case: should be set to default + expected := filepath.Join(configHome, "config.yaml") + require.Equal(t, expected, c.Path) + } else { + require.Equal(t, tt.expectedPath, c.Path) + } + + // Check RunID + if tt.expectedID == "" && tt.runID == "" { + // Should be generated + require.NotEmpty(t, c.RunID) + // Verify format: YYYYMMDD_HHMMSS_UUU + require.Regexp(t, `^\d{8}_\d{6}_\d{3}$`, c.RunID) + } else { + require.Equal(t, tt.expectedID, c.RunID) + } + }) + } +} + func TestCmdRun_Validate(t *testing.T) { tests := []struct { name string - cmd cmdRun + path string envVars map[string]string expectedError string }{ { name: "no config and no env vars", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{}, expectedError: "you must supply at least OPENAI_API_KEY or AZURE_OPENAI_API_KEY or a config file path", }, { name: "config path provided", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{}, }, { name: "OPENAI_API_KEY set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", }, }, { name: "AZURE_OPENAI_API_KEY set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "AZURE_OPENAI_API_KEY": "azure-key", }, }, { name: "both API keys set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", "AZURE_OPENAI_API_KEY": "azure-key", @@ -174,14 +386,14 @@ func TestCmdRun_Validate(t *testing.T) { }, { name: "config path and OPENAI_API_KEY both set", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", }, }, { name: "config path and AZURE_OPENAI_API_KEY both set", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{ "AZURE_OPENAI_API_KEY": "azure-key", }, @@ -194,12 +406,22 @@ func TestCmdRun_Validate(t *testing.T) { t.Setenv(k, v) } - err := tt.cmd.Validate() + cmd := cmdRun{ + Path: tt.path, + RunID: "test-run-id", + dirs: newTempDirectories(t), + } + + err := cmd.Validate() if tt.expectedError != "" { require.EqualError(t, err, tt.expectedError) + require.Nil(t, cmd.runOpts) } else { require.NoError(t, err) + require.NotNil(t, cmd.runOpts) + require.Equal(t, tt.path, cmd.runOpts.configPath) + require.Equal(t, "test-run-id", cmd.runOpts.runID) } }) } diff --git a/cmd/aigw/run.go b/cmd/aigw/run.go index dc2c9d4a38..6de96fe8db 100644 --- a/cmd/aigw/run.go +++ b/cmd/aigw/run.go @@ -39,6 +39,7 @@ import ( "github.com/envoyproxy/ai-gateway/internal/extensionserver" "github.com/envoyproxy/ai-gateway/internal/filterapi" "github.com/envoyproxy/ai-gateway/internal/internalapi" + "github.com/envoyproxy/ai-gateway/internal/xdg" ) // This is the template for the Envoy Gateway configuration where PLACEHOLDER_TMPDIR will be replaced with the temporary @@ -79,34 +80,20 @@ type runCmdContext struct { fakeClientSet *fake.Clientset } -// runOpts are the options for the run command. -type runOpts struct { - // extProcLauncher is the function used to launch the external processor. - extProcLauncher func(ctx context.Context, args []string, w io.Writer) error -} - // run starts the AI Gateway locally for a given configuration. // -// This will create a temporary directory and a file: -// 1. ${os.TempDir}/envoy-gateway-config.yaml: This contains the configuration for the Envoy Gateway agent to run, derived from envoyGatewayConfig. -// 2. ${os.TempDir}/envoy-ai-gateway-resources: This will contain the EG resource generated by the translation and deployed by EG. -func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) error { +// All files are written to XDG Base Directory locations: +// 1. State: $AIGW_STATE_HOME/runs/{runID}/ - configs, resources, logs +// 2. Runtime: $AIGW_RUNTIME_DIR/{runID}/ - ephemeral sockets +func run(ctx context.Context, c cmdRun, o *runOpts, stdout, stderr io.Writer) error { start := time.Now() - var debugLogger *slog.Logger - if c.Debug { - logHandler := slog.NewTextHandler(stderr, &slog.HandlerOptions{}) - ctrl.SetLogger(logr.FromSlogHandler(logHandler)) - debugLogger = slog.New(logHandler) - } else { - stdout = io.Discard // until GW logs are reasonable - debugLogger = slog.New(slog.NewTextHandler(io.Discard, nil)) - } // First, we need to create the self-signed certificates used for communication between the EG and Envoy. // Certificates will be placed at /tmp/envoy-gateway/certs, which is currently is not configurable: // https://github.com/envoyproxy/gateway/blob/779c0a6bbdf7dacbf25a730140a112f99c239f0e/internal/infrastructure/host/infra.go#L22-L23 // - // TODO: maybe make it skip if the certs are already there, but not sure if it's worth the complexity. + // TODO: Override Envoy Gateway cert directory to use $AIGW_RUNTIME_DIR once possible via + // https://github.com/envoyproxy/gateway/pull/7225 certGenOut := &bytes.Buffer{} certGen := root.GetRootCommand() certGen.SetOut(certGenOut) @@ -116,30 +103,39 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err return fmt.Errorf("failed to execute certgen: %w: %s", err, certGenOut.String()) } - tmpdir := filepath.Join(os.TempDir(), "aigw-run") - if err := recreateDir(tmpdir); err != nil { - return fmt.Errorf("failed to create temporary directory %s: %w", tmpdir, err) + // Create aigw log file in run directory + aigwLogFile, err := os.Create(o.logPath) + if err != nil { + return fmt.Errorf("failed to create aigw log file %s: %w", o.logPath, err) } - egConfigPath := filepath.Join(tmpdir, "envoy-gateway-config.yaml") // 1. The path to the Envoy Gateway config. - resourcesTmpdir := filepath.Join(tmpdir, "/envoy-ai-gateway-resources") // 2. The path to the resources. - if err := recreateDir(resourcesTmpdir); err != nil { - return err + defer aigwLogFile.Close() + + // Tee debug logger to both stderr and log file + var logWriter io.Writer + if c.Debug { + logWriter = io.MultiWriter(stderr, aigwLogFile) + // keep stdout visible in debug mode + } else { + logWriter = aigwLogFile + stdout = io.Discard // until GW logs are reasonable } - // Write the Envoy Gateway config which points to the resourcesTmpdir to tell Envoy Gateway where to find the resources. - debugLogger.Info("Writing Envoy Gateway config", "path", egConfigPath) - err := os.WriteFile(egConfigPath, []byte(strings.ReplaceAll( - envoyGatewayConfigTemplate, "PLACEHOLDER_TMPDIR", resourcesTmpdir), + logHandler := slog.NewTextHandler(logWriter, &slog.HandlerOptions{}) + ctrl.SetLogger(logr.FromSlogHandler(logHandler)) + debugLogger := slog.New(logHandler) + + // Write the Envoy Gateway config which points to the resourcesDir to tell Envoy Gateway where to find the resources. + resourcesDir := filepath.Dir(o.egResourcesPath) + debugLogger.Info("Writing Envoy Gateway config", "path", o.egConfigPath) + err = os.WriteFile(o.egConfigPath, []byte(strings.ReplaceAll( + envoyGatewayConfigTemplate, "PLACEHOLDER_TMPDIR", resourcesDir), ), 0o600) if err != nil { - return fmt.Errorf("failed to write file %s: %w", egConfigPath, err) + return fmt.Errorf("failed to write file %s: %w", o.egConfigPath, err) } - // Write the Envoy Gateway resources into a file under resourcesTmpdir. - resourceYamlPath := filepath.Join(resourcesTmpdir, "config.yaml") - debugLogger.Info("Creating Envoy Gateway resource file", "path", resourceYamlPath) - udsPath := filepath.Join(tmpdir, "uds.sock") - _ = os.Remove(udsPath) + // Write the Envoy Gateway resources into a file under resourcesDir. + debugLogger.Info("Creating Envoy Gateway resource file", "path", o.egResourcesPath) // Do the translation of the given AI Gateway resources Yaml into Envoy Gateway resources and write them to the file. resourcesBuf := &bytes.Buffer{} @@ -148,12 +144,12 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err envoyGatewayResourcesOut: resourcesBuf, stderrLogger: debugLogger, stderr: stderr, - tmpdir: tmpdir, - udsPath: udsPath, + tmpdir: filepath.Dir(o.logPath), // runDir + udsPath: o.extprocUDSPath, adminPort: c.AdminPort, extProcLauncher: o.extProcLauncher, } - aiGatewayResourcesYaml, err := readConfig(c.Path, c.mcpConfig, c.Debug) + aiGatewayResourcesYaml, err := readConfig(o.configPath, c.mcpConfig, c.Debug) if err != nil { return err } @@ -161,13 +157,13 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err if err != nil { return fmt.Errorf("failed to write envoy resources and run extproc: %w", err) } - err = os.WriteFile(resourceYamlPath, resourcesBuf.Bytes(), 0o600) + err = os.WriteFile(o.egResourcesPath, resourcesBuf.Bytes(), 0o600) if err != nil { - return fmt.Errorf("failed to write file %s: %w", resourceYamlPath, err) + return fmt.Errorf("failed to write file %s: %w", o.egResourcesPath, err) } - // Set up middleware with startup hook now that we know listenerPort - redirectEnvoyStdio := newEnvoyRunMiddleware(start, listenerPort, stdout, stderr) + // Set up middleware with XDG directories and startup hook now that we know listenerPort + redirectEnvoyStdio := newEnvoyRunMiddleware(&o.Directories, o.runID, start, listenerPort, stdout, stderr) ctx = middleware.WithRunMiddleware(ctx, redirectEnvoyStdio) lis, err := net.Listen("tcp", "localhost:1061") @@ -175,7 +171,7 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err return fmt.Errorf("failed to listen: %w", err) } s := grpc.NewServer() - extSrv := extensionserver.New(fakeClient, ctrl.Log, udsPath, true) + extSrv := extensionserver.New(fakeClient, ctrl.Log, o.extprocUDSPath, true) egextension.RegisterEnvoyGatewayExtensionServer(s, extSrv) grpc_health_v1.RegisterHealthServer(s, extSrv) @@ -212,7 +208,7 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err server.SetOut(io.Discard) server.SetErr(io.Discard) } - server.SetArgs([]string{"server", "--config-path", egConfigPath}) + server.SetArgs([]string{"server", "--config-path", o.egConfigPath}) // Start the gateway server. This will block until the server is stopped. // The startup hook (configured via middleware) will print the status message when Envoy is ready. @@ -223,8 +219,8 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err } // newEnvoyRunMiddleware sets options for running Envoy and returns a middleware -// that configures Envoy I/O and sets up a startup hook to print the ready message. -func newEnvoyRunMiddleware(start time.Time, listenerPort int, stdout, stderr io.Writer) func(next api.RunFunc) api.RunFunc { +// that configures Envoy I/O, XDG directories, and sets up a startup hook to print the ready message. +func newEnvoyRunMiddleware(dirs *xdg.Directories, runID string, start time.Time, listenerPort int, stdout, stderr io.Writer) func(next api.RunFunc) api.RunFunc { // Define startup hook that will be called when Envoy admin is ready startupHook := func(_ context.Context, adminClient admin.AdminClient, _ string) error { // Print a status message without any timestamp formatting @@ -234,13 +230,18 @@ func newEnvoyRunMiddleware(start time.Time, listenerPort int, stdout, stderr io. return nil } - // aigw is primarily an Envoy controller, so ensure its output is visible + // Override func-e XDG paths to use aigw's directories and add startup hook overrides := []api.RunOption{ api.EnvoyOut(stdout), api.EnvoyErr(stderr), + // func-e will use these paths instead of its defaults + api.ConfigHome(dirs.ConfigHome), + api.DataHome(dirs.DataHome), + api.StateHome(dirs.StateHome), + api.RuntimeDir(dirs.RuntimeDir), + api.RunID(runID), admin.WithStartupHook(startupHook), } - return func(next api.RunFunc) api.RunFunc { return func(ctx context.Context, args []string, options ...api.RunOption) error { return next(ctx, args, append(options, overrides...)...) diff --git a/cmd/aigw/run_test.go b/cmd/aigw/run_test.go index 23dfdc02f6..35430e40be 100644 --- a/cmd/aigw/run_test.go +++ b/cmd/aigw/run_test.go @@ -44,7 +44,7 @@ func TestRun(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) defer cleanupRun(t, cancel) - opts := runOpts{extProcLauncher: func(context.Context, []string, io.Writer) error { return nil }} + opts := testRunOpts(t, func(context.Context, []string, io.Writer) error { return nil }) require.NoError(t, run(ctx, cmdRun{Debug: true, AdminPort: adminPort}, opts, stdout, stderr)) } @@ -53,7 +53,9 @@ func cleanupRun(t testing.TB, cancel context.CancelFunc) { if err := internaltesting.AwaitPortClosed(1975, 10*time.Second); err != nil { t.Logf("Failed to close port 1975: %v", err) } - // Delete the hard-coded path to certs defined in Envoy AI Gateway + // Delete the hard-coded path to certs defined in Envoy Gateway + // TODO: Remove once EG supports configurable cert directory + // https://github.com/envoyproxy/gateway/pull/7225 if err := os.RemoveAll("/tmp/envoy-gateway/certs"); err != nil { t.Logf("Failed to delete envoy gateway certs: %v", err) } @@ -67,9 +69,8 @@ func TestRunExtprocStartFailure(t *testing.T) { errChan := make(chan error) mockErr := errors.New("mock extproc error") go func() { - errChan <- run(ctx, cmdRun{}, runOpts{ - extProcLauncher: func(context.Context, []string, io.Writer) error { return mockErr }, - }, os.Stdout, io.Discard) + opts := testRunOpts(t, func(context.Context, []string, io.Writer) error { return mockErr }) + errChan <- run(ctx, cmdRun{}, opts, os.Stdout, io.Discard) }() select { @@ -241,15 +242,16 @@ func Test_newEnvoyMiddleware(t *testing.T) { start := time.Now() listenerPort := 1975 - middleware := newEnvoyRunMiddleware(start, listenerPort, &stdout, &stderr) + dirs := newTempDirectories(t) + middleware := newEnvoyRunMiddleware(dirs, "test-run", start, listenerPort, &stdout, &stderr) require.NotNil(t, middleware) err := middleware(func(ctx context.Context, args []string, options ...api.RunOption) error { require.Equal(t, t.Context(), ctx) require.Equal(t, []string{"test"}, args) - // 3 = EnvoyOut, EnvoyErr, StartupHook - require.Len(t, options, 3+len(tt.inputOptions)) + // 8 = EnvoyOut, EnvoyErr, ConfigHome, DataHome, StateHome, RuntimeDir, RunID, StartupHook + require.Len(t, options, 8+len(tt.inputOptions)) return nil })(t.Context(), []string{"test"}, tt.inputOptions...) require.NoError(t, err) @@ -265,3 +267,13 @@ func readFileFromProjectRoot(t *testing.T, file string) string { require.NoError(t, err) return string(b) } + +// testRunOpts creates runOpts for testing. +// This ensures test isolation by using t.TempDir() for all XDG directories. +func testRunOpts(t *testing.T, extProcLauncher func(context.Context, []string, io.Writer) error) *runOpts { + t.Helper() + dirs := newTempDirectories(t) + opts, err := newRunOpts(dirs, "test-run", "", extProcLauncher) + require.NoError(t, err) + return opts +} diff --git a/cmd/aigw/runopts.go b/cmd/aigw/runopts.go new file mode 100644 index 0000000000..baa0fe925c --- /dev/null +++ b/cmd/aigw/runopts.go @@ -0,0 +1,95 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package main + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/envoyproxy/ai-gateway/internal/xdg" +) + +// runOpts are the options for the run command. +type runOpts struct { + xdg.Directories + // runID is the unique identifier for this run + runID string + // extProcLauncher is the function used to launch the external processor. + extProcLauncher func(ctx context.Context, args []string, w io.Writer) error + + // Computed paths derived from Directories and runID + // configPath is the resolved aigw config file path. Either --path flag, {ConfigHome}/config.yaml if exists, or empty. + // Empty means auto-generate from OPENAI_API_KEY/AZURE_OPENAI_API_KEY environment variables. + configPath string + // logPath is {StateHome}/runs/{runID}/aigw.log + // Contains: aigw debug/info/error logs + logPath string + // egConfigPath is {StateHome}/runs/{runID}/envoy-gateway-config.yaml + // Contains: generated Envoy Gateway config that references egResourcesPath directory + // Passed to: Envoy Gateway via `envoy-gateway server --config-path ` + egConfigPath string + // egResourcesPath is {StateHome}/runs/{runID}/envoy-ai-gateway-resources/config.yaml + // Contains: Gateway, HTTPRoute, HTTPRouteFilter, Backend, Secret, BackendTrafficPolicy, SecurityPolicy, EnvoyExtensionPolicy objects + // Derived from: translating configPath (aigw resources -> Envoy Gateway resources) + // Referenced by: egConfigPath (tells Envoy Gateway where to load resources from the parent directory) + // Note: Must be in a subdirectory (not a flat file) because Envoy Gateway config template requires a directory path + egResourcesPath string + // extprocUDSPath is {RuntimeDir}/{runID}/uds.sock + // Unix domain socket for Envoy <-> aigw extproc communication + extprocUDSPath string + // extprocConfigPath is {StateHome}/runs/{runID}/extproc-config.yaml + // Contains: filterapi.Config YAML for external processor + // Derived from: translating configPath (extracts filter config from aigw resources) + extprocConfigPath string +} + +// newRunOpts creates runOpts with all paths computed and creates directories +// that aigw writes to directly (e.g. not ones owned by func-e or Envoy +// Gateway). Note: configPath may be empty (will auto-generate from env vars). +func newRunOpts(dirs *xdg.Directories, runID, configPath string, extProcLauncher func(context.Context, []string, io.Writer) error) (*runOpts, error) { + opts := &runOpts{ + Directories: *dirs, + runID: runID, + configPath: configPath, + extProcLauncher: extProcLauncher, + } + + // Compute all paths + runDir := filepath.Join(dirs.StateHome, "runs", runID) + opts.logPath = filepath.Join(runDir, "aigw.log") + opts.egConfigPath = filepath.Join(runDir, "envoy-gateway-config.yaml") + opts.egResourcesPath = filepath.Join(runDir, "envoy-ai-gateway-resources", "config.yaml") + opts.extprocConfigPath = filepath.Join(runDir, "extproc-config.yaml") + opts.extprocUDSPath = filepath.Join(dirs.RuntimeDir, runID, "uds.sock") + + // Create directories that aigw writes to + // runDir: for log, config, extproc-config (0o750 per XDG spec for StateHome) + if err := os.MkdirAll(runDir, 0o750); err != nil { + return nil, fmt.Errorf("failed to create run directory %s: %w", runDir, err) + } + + // Recreate runDir/envoy-ai-gateway-resources: for egResourcesPath (0o750) + // Remove if exists to ensure a clean state, then create + resourcesDir := filepath.Dir(opts.egResourcesPath) + if err := os.RemoveAll(resourcesDir); err != nil { + return nil, fmt.Errorf("failed to remove resources directory %s: %w", resourcesDir, err) + } + if err := os.MkdirAll(resourcesDir, 0o750); err != nil { + return nil, fmt.Errorf("failed to create resources directory %s: %w", resourcesDir, err) + } + + // RuntimeDir/{runID}: for UDS socket (0o700 per XDG spec for RuntimeDir) + // Remove UDS socket if exists to ensure a clean state + if err := os.MkdirAll(filepath.Dir(opts.extprocUDSPath), 0o700); err != nil { + return nil, fmt.Errorf("failed to create runtime directory %s: %w", filepath.Dir(opts.extprocUDSPath), err) + } + _ = os.Remove(opts.extprocUDSPath) + + return opts, nil +} diff --git a/cmd/aigw/runopts_test.go b/cmd/aigw/runopts_test.go new file mode 100644 index 0000000000..ec772f0fc3 --- /dev/null +++ b/cmd/aigw/runopts_test.go @@ -0,0 +1,204 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package main + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/envoyproxy/ai-gateway/internal/xdg" +) + +func newTempDirectories(t *testing.T) *xdg.Directories { + return &xdg.Directories{ + ConfigHome: t.TempDir(), + DataHome: t.TempDir(), + StateHome: t.TempDir(), + RuntimeDir: t.TempDir(), + } +} + +func TestNewRunOpts(t *testing.T) { + mockLauncher := func(_ context.Context, _ []string, _ io.Writer) error { return nil } + + t.Run("sets all fields correctly", func(t *testing.T) { + dirs := newTempDirectories(t) + runID := "test-run-123" + configPath := "/explicit/config.yaml" + + actual, err := newRunOpts(dirs, runID, configPath, mockLauncher) + require.NoError(t, err) + require.NotNil(t, actual) + + require.Equal(t, runID, actual.runID) + require.NotNil(t, actual.extProcLauncher) + require.Equal(t, configPath, actual.configPath) + + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + paths := []struct { + name string + expected string + actual string + }{ + {"logPath", filepath.Join(expectedRunDir, "aigw.log"), actual.logPath}, + {"egConfigPath", filepath.Join(expectedRunDir, "envoy-gateway-config.yaml"), actual.egConfigPath}, + {"egResourcesPath", filepath.Join(expectedRunDir, "envoy-ai-gateway-resources", "config.yaml"), actual.egResourcesPath}, + {"extprocConfigPath", filepath.Join(expectedRunDir, "extproc-config.yaml"), actual.extprocConfigPath}, + {"extprocUDSPath", filepath.Join(dirs.RuntimeDir, runID, "uds.sock"), actual.extprocUDSPath}, + } + + for _, p := range paths { + require.Equal(t, p.expected, p.actual, p.name) + require.True(t, filepath.IsAbs(p.actual), p.name) + } + + require.DirExists(t, expectedRunDir) + require.DirExists(t, filepath.Dir(actual.egResourcesPath)) + require.DirExists(t, filepath.Dir(actual.extprocUDSPath)) + }) + + t.Run("empty configPath remains empty", func(t *testing.T) { + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, "test-run", "", mockLauncher) + require.NoError(t, err) + require.Empty(t, actual.configPath) + }) +} + +func TestNewRunOpts_Permissions(t *testing.T) { + runID := "test-run-permissions" + + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, runID, "", nil) + require.NoError(t, err) + + // Verify runDir created with correct permissions + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + info, err := os.Stat(expectedRunDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o750), info.Mode().Perm()) + + // Verify egResourcesPath parent created with correct permissions + expectedResourcesDir := filepath.Dir(actual.egResourcesPath) + info, err = os.Stat(expectedResourcesDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o750), info.Mode().Perm()) + + // Verify RuntimeDir/{runID} created with correct permissions + expectedRuntimeRunDir := filepath.Join(dirs.RuntimeDir, runID) + info, err = os.Stat(expectedRuntimeRunDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o700), info.Mode().Perm()) +} + +func TestNewRunOpts_DirectoryContents(t *testing.T) { + runID := "test-run-empty" + + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, runID, "", nil) + require.NoError(t, err) + + // Verify runDir contains only expected entries + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + actualEntries, err := os.ReadDir(expectedRunDir) + require.NoError(t, err) + require.Len(t, actualEntries, 1) + require.Equal(t, "envoy-ai-gateway-resources", actualEntries[0].Name()) + + // Verify resourcesDir is empty + expectedResourcesDir := filepath.Dir(actual.egResourcesPath) + actualEntries, err = os.ReadDir(expectedResourcesDir) + require.NoError(t, err) + require.Empty(t, actualEntries) + + // Verify runtimeRunDir is empty + expectedRuntimeRunDir := filepath.Join(dirs.RuntimeDir, runID) + actualEntries, err = os.ReadDir(expectedRuntimeRunDir) + require.NoError(t, err) + require.Empty(t, actualEntries) +} + +func TestNewRunOpts_Errors(t *testing.T) { + t.Run("error when runDir creation fails", func(t *testing.T) { + baseDir := t.TempDir() + stateHome := filepath.Join(baseDir, "nonexistent", "readonly") + + // Make the parent read-only + parent := filepath.Dir(stateHome) + err := os.MkdirAll(parent, 0o755) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(parent, 0o755) + }) + err = os.Chmod(parent, 0o555) + require.NoError(t, err) + + dirs := newTempDirectories(t) + dirs.StateHome = stateHome + + _, err = newRunOpts(dirs, "test-run", "", nil) + require.Error(t, err) + }) + + t.Run("error when resources directory creation fails", func(t *testing.T) { + stateHome := t.TempDir() + + // Pre-create runDir successfully + runDir := filepath.Join(stateHome, "runs", "test-run-fail-resources") + err := os.MkdirAll(runDir, 0o750) + require.NoError(t, err) + + // Create a file where resources directory should be + resourcesParent := filepath.Join(runDir, "envoy-ai-gateway-resources") + err = os.WriteFile(resourcesParent, []byte("block"), 0o600) + require.NoError(t, err) + + // Make runDir read-only so RemoveAll fails + err = os.Chmod(runDir, 0o555) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(runDir, 0o755) + }) + + dirs := newTempDirectories(t) + dirs.StateHome = stateHome + + _, err = newRunOpts(dirs, "test-run-fail-resources", "", nil) + require.Error(t, err) + }) + + t.Run("error when runtime directory creation fails", func(t *testing.T) { + baseDir := t.TempDir() + runtimeDir := filepath.Join(baseDir, "nonexistent", "readonly") + + // Make the parent read-only + parent := filepath.Dir(runtimeDir) + err := os.MkdirAll(parent, 0o755) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(parent, 0o755) + }) + err = os.Chmod(parent, 0o555) + require.NoError(t, err) + + dirs := newTempDirectories(t) + dirs.RuntimeDir = runtimeDir + + _, err = newRunOpts(dirs, "test-run", "", nil) + require.Error(t, err) + }) +} diff --git a/go.mod b/go.mod index feb7cbf287..ac7b04d2a1 100644 --- a/go.mod +++ b/go.mod @@ -172,7 +172,7 @@ require ( github.com/kelseyhightower/envconfig v1.4.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect - github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c // indirect + github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect github.com/lyft/gostats v0.4.1 // indirect github.com/magiconair/properties v1.8.10 // indirect github.com/mailru/easyjson v0.9.0 // indirect diff --git a/go.sum b/go.sum index 750fea738a..c2bcaa0099 100644 --- a/go.sum +++ b/go.sum @@ -241,7 +241,6 @@ github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ= github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB31qAwjAohdSTU= @@ -298,8 +297,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c h1:VtwQ41oftZwlMnOEbMWQtSEUgU64U4s+GHk7hZK+jtY= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= +github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIiZhtifTV5OUqqiP82UAl0h87xj/l9k= +github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg= github.com/lyft/gostats v0.4.1 h1:oR6p4HRCGxt0nUntmZIWmYMgyothBi3eZH2A71vRjsc= github.com/lyft/gostats v0.4.1/go.mod h1:Tpx2xRzz4t+T2Tx0xdVgIoBdR2UMVz+dKnE3X01XSd8= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= diff --git a/internal/xdg/directories.go b/internal/xdg/directories.go new file mode 100644 index 0000000000..6b5a7f8589 --- /dev/null +++ b/internal/xdg/directories.go @@ -0,0 +1,34 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package xdg + +// Directories holds XDG Base Directory paths for aigw. +// See https://specifications.freedesktop.org/basedir-spec/latest/ +type Directories struct { + // ConfigHome is the base directory for user-specific configuration files. + // XDG specification: $XDG_CONFIG_HOME + // Default: ~/.config/aigw (or $AIGW_CONFIG_HOME) + // Contents: config.yaml (default config), envoy-version (func-e version preference) + ConfigHome string + + // DataHome is the base directory for user-specific data files. + // XDG specification: $XDG_DATA_HOME + // Default: ~/.local/share/aigw (or $AIGW_DATA_HOME) + // Contents: envoy-versions/ (downloaded Envoy binaries via func-e) + DataHome string + + // StateHome is the base directory for user-specific state data. + // XDG specification: $XDG_STATE_HOME + // Default: ~/.local/state/aigw (or $AIGW_STATE_HOME) + // Contents: runs/{runID}/ (per-run logs and configs), envoy-runs/{runID}/ (func-e logs) + StateHome string + + // RuntimeDir is the base directory for user-specific runtime files. + // XDG specification: $XDG_RUNTIME_DIR + // Default: /tmp/aigw-${UID} (or $AIGW_RUNTIME_DIR) + // Contents: {runID}/uds.sock (extproc socket), {runID}/admin-address.txt (func-e admin) + RuntimeDir string +} diff --git a/site/docs/cli/installation.md b/site/docs/cli/installation.md index 9b6c59d51f..d24dff1d86 100644 --- a/site/docs/cli/installation.md +++ b/site/docs/cli/installation.md @@ -22,10 +22,7 @@ you can mount it as a volume. The following example runs the AI Gateway with the default configuration for the [OpenAI provider](../getting-started/connect-providers/openai.md): ```shell -$ docker run --rm -p 1975:1975 -e OPENAI_API_KEY=OPENAI_API_KEY envoyproxy/ai-gateway-cli run -looking up the latest patch for Envoy version 1.35 -1.35.3 is already downloaded -starting: /tmp/envoy-gateway/versions/1.35.3/bin/envoy in run directory /tmp/envoy-gateway/runs/1758086300246501521 +docker run --rm -p 1975:1975 -e OPENAI_API_KEY=OPENAI_API_KEY envoyproxy/ai-gateway-cli run ``` ## Building the latest version @@ -76,6 +73,26 @@ Commands: Run "aigw --help" for more information on a command. ``` +## Configuration + +The [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) defines standard locations for user-specific files: + +- **Config files**: User-specific configuration (persistent, shared) +- **Data files**: Downloaded binaries (persistent, shared) +- **State files**: Logs and configs per run (persistent, debugging) +- **Runtime files**: Ephemeral files like sockets (deleted on reboot) + +`aigw` adopts these conventions to separate configuration, downloaded Envoy binaries, logs, and ephemeral runtime files. + +| Environment Variable | Default Path | CLI Flag | +| -------------------- | --------------------- | --------------- | +| `AIGW_CONFIG_HOME` | `~/.config/aigw` | `--config-home` | +| `AIGW_DATA_HOME` | `~/.local/share/aigw` | `--data-home` | +| `AIGW_STATE_HOME` | `~/.local/state/aigw` | `--state-home` | +| `AIGW_RUNTIME_DIR` | `/tmp/aigw-${UID}` | `--runtime-dir` | + +**Priority**: CLI flags > Environment variables > Defaults + ## What's next? The following sections provide more information about each of the CLI commands: diff --git a/site/docs/cli/run.md b/site/docs/cli/run.md index cb8f59900a..c255585ef1 100644 --- a/site/docs/cli/run.md +++ b/site/docs/cli/run.md @@ -300,6 +300,63 @@ focused on retrieval and semantic analysis. See [docker-compose-otel.yaml][docker-compose-otel.yaml] for a complete example configuration. +## Configuration + +### File Locations + +`aigw run` uses the [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) to organize files: + +| Environment Variable | Default Path | Purpose | +| -------------------- | --------------------- | --------------------------- | +| `AIGW_CONFIG_HOME` | `~/.config/aigw` | User configuration files | +| `AIGW_DATA_HOME` | `~/.local/share/aigw` | Downloaded Envoy binaries | +| `AIGW_STATE_HOME` | `~/.local/state/aigw` | Persistent logs and configs | +| `AIGW_RUNTIME_DIR` | `/tmp/aigw-${UID}` | Ephemeral runtime files | + +See [Installation - Configuration](./installation.md#configuration) for more details about XDG directories. + +### File Mappings + +Each invocation creates a unique run identifier (`runID`) in format `YYYYMMDD_HHMMSS_UUU` to isolate concurrent runs: + +| File Type | Purpose | Path | Type | +| ------------------------- | ---------------------------------------- | ---------------------------------------------------------------- | ------- | +| Default Config | Configuration file location | `${AIGW_CONFIG_HOME}/config.yaml` | CONFIG | +| Envoy Version Preference | Selected Envoy version (via func-e) | `${AIGW_CONFIG_HOME}/envoy-version` | CONFIG | +| Envoy Binaries | Downloaded executables (via func-e) | `${AIGW_DATA_HOME}/envoy-versions/{version}/bin/envoy` | DATA | +| AIGW Logs | Gateway logs and stderr output | `${AIGW_STATE_HOME}/runs/{runID}/aigw.log` | STATE | +| Envoy Gateway Config | Generated EG configuration | `${AIGW_STATE_HOME}/runs/{runID}/envoy-gateway-config.yaml` | STATE | +| Envoy Gateway Resources | Generated EG resources (Gateway, Routes) | `${AIGW_STATE_HOME}/runs/{runID}/envoy-ai-gateway-resources/...` | STATE | +| External Processor Config | Generated extproc configuration | `${AIGW_STATE_HOME}/runs/{runID}/extproc-config.yaml` | STATE | +| Envoy Run Logs (func-e) | Envoy stdout/stderr (via func-e) | `${AIGW_STATE_HOME}/envoy-runs/{runID}/stdout.log,stderr.log` | STATE | +| UDS Socket | Unix domain socket for extproc | `${AIGW_RUNTIME_DIR}/{runID}/uds.sock` | RUNTIME | +| Admin Address (func-e) | Envoy admin endpoint (via func-e) | `${AIGW_RUNTIME_DIR}/{runID}/admin-address.txt` | RUNTIME | + +**File Categories:** + +- **CONFIG**: User-specific configuration (persistent, shared across runs) +- **DATA**: Downloaded binaries (persistent, shared across runs) +- **STATE**: Per-run logs and configs (persistent for debugging) +- **RUNTIME**: Ephemeral files like sockets (cleaned on reboot) + +### `runID` + +By default, `aigw run` generates a timestamp-based `runID` for each invocation. You can customize this for predictable paths: + +```shell +# Use run ID "0" for Docker/Kubernetes deployments +aigw run --run-id=0 + +# Or via environment variable +AIGW_RUN_ID=production aigw run +``` + +Custom run IDs: + +- Enable predictable file paths in containers +- Allow correlation across multiple runs with the same ID +- Must not contain path separators (`/` or `\`) + --- [openinference]: https://github.com/Arize-ai/openinference/tree/main/spec From f4e5f70680ad9c5e45ea51b18d777c187fb1b78c Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Fri, 24 Oct 2025 14:51:40 -0700 Subject: [PATCH 2/2] revert bad merge Signed-off-by: Takeshi Yoneda --- .github/workflows/build_and_test.yaml | 29 +-------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 9b457c6f0a..5906a9e2a4 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -67,34 +67,7 @@ jobs: ~/go/pkg/mod ~/go/bin key: unittest-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }}-${{ matrix.os }} - - # This runs ollama server to be used in `aigw run` end-to-end tests. - # The test case using it will be skipped if ollama is not available. - # Since installing it and pulling the model takes a while, we do it only for Linux runners. - - name: Start Ollama server - if: matrix.os == 'ubuntu-latest' - run: | - curl -fsSL https://ollama.com/install.sh | sh && sudo systemctl stop ollama - nohup ollama serve > ollama.log 2>&1 & - timeout 30 sh -c 'until nc -z localhost 11434; do sleep 1; done' - grep _MODEL .env.ollama | cut -d= -f2 | xargs -I{} ollama pull {} - env: - OLLAMA_CONTEXT_LENGTH: 131072 # Larger context for goose - OLLAMA_HOST: 0.0.0.0 - # Download Envoy via func-e using implicit default version `aigw` would - # otherwise need to download during test runs. - - name: Download Envoy via func-e - run: go tool -modfile=tools/go.mod func-e run --version - env: - FUNC_E_DATA_HOME: ~/.local/share/aigw - - env: - TEST_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BEDROCK_USER_AWS_ACCESS_KEY_ID }} - TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BEDROCK_USER_AWS_SECRET_ACCESS_KEY }} - TEST_OPENAI_API_KEY: ${{ secrets.ENVOY_AI_GATEWAY_OPENAI_API_KEY }} - TEST_GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: make test-coverage - - if: failure() - run: cat ollama.log || true + - run: make test-coverage - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3