diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 207ba26508..5906a9e2a4 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -68,8 +68,6 @@ jobs: ~/go/bin key: unittest-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }}-${{ matrix.os }} - run: make test-coverage - - if: failure() - run: cat ollama.log || true - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 @@ -337,7 +335,7 @@ jobs: - name: Download Envoy via func-e run: go tool -modfile=tools/go.mod func-e run --version env: - FUNC_E_HOME: /tmp/envoy-gateway # hard-coded directory in EG + FUNC_E_DATA_HOME: ~/.local/share/aigw - name: Install Goose env: GOOSE_VERSION: v1.10.0 diff --git a/Dockerfile b/Dockerfile index 2041140add..6660699a7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,30 +15,44 @@ FROM golang:1.25 AS envoy-downloader ARG TARGETOS ARG TARGETARCH ARG COMMAND_NAME -# Hard-coded directory for envoy-gateway resources -# See https://github.com/envoyproxy/gateway/blob/d95ce4ce564cfff47ed1fd6c97e29c1058aa4a61/internal/infrastructure/host/proxy_infra.go#L16 -WORKDIR /tmp/envoy-gateway +# Download Envoy binary to AIGW_DATA_HOME for the nonroot user +WORKDIR /build RUN if [ "$COMMAND_NAME" = "aigw" ]; then \ go install github.com/tetratelabs/func-e/cmd/func-e@latest && \ - func-e --platform ${TARGETOS}/${TARGETARCH} --home-dir . run --version; \ + FUNC_E_DATA_HOME=/home/nonroot/.local/share/aigw func-e --platform ${TARGETOS}/${TARGETARCH} run --version; \ fi \ - && mkdir -p certs \ - && chown -R 65532:65532 . \ - && chmod -R 755 . + # Create directories for the nonroot user + && mkdir -p /home/nonroot /tmp/envoy-gateway/certs \ + && chown -R 65532:65532 /home/nonroot /tmp/envoy-gateway \ + && chmod -R 755 /home/nonroot /tmp/envoy-gateway FROM gcr.io/distroless/${VARIANT}-debian12:nonroot ARG COMMAND_NAME ARG TARGETOS ARG TARGETARCH +# Copy pre-downloaded Envoy binary and EG certs directory +COPY --from=envoy-downloader /home/nonroot /home/nonroot COPY --from=envoy-downloader /tmp/envoy-gateway /tmp/envoy-gateway COPY ./out/${COMMAND_NAME}-${TARGETOS}-${TARGETARCH} /app USER nonroot:nonroot +# Set AIGW_RUN_ID=0 for predictable file paths in containers. +# This creates the following directory structure: +# ~/.config/aigw/ - XDG config (e.g., envoy-version preference) +# ~/.local/share/aigw/ - XDG data (downloaded Envoy binaries via func-e) +# ~/.local/state/aigw/runs/0/ - XDG state (aigw.log, envoy-gateway-config.yaml, extproc-config.yaml, resources/) +# ~/.local/state/aigw/envoy-runs/0/ - XDG state (func-e stdout.log, stderr.log) +# /tmp/aigw-0/ - XDG runtime (uds.sock, admin-address.txt) +ENV AIGW_RUN_ID=0 + # The healthcheck subcommand performs an HTTP GET to localhost:1064/healthlthy for "aigw run". # NOTE: This is only for aigw in practice since this is ignored by Kubernetes. HEALTHCHECK --interval=10s --timeout=5s --start-period=5s --retries=3 \ CMD ["/app", "healthcheck"] ENTRYPOINT ["/app"] + +# Default CMD for aigw - uses AIGW_RUN_ID from environment +CMD ["run"] diff --git a/cmd/aigw/config.go b/cmd/aigw/config.go index 7094b2b8ba..7d4f3d3da7 100644 --- a/cmd/aigw/config.go +++ b/cmd/aigw/config.go @@ -9,7 +9,9 @@ import ( "errors" "fmt" "os" + "path/filepath" "reflect" + "strings" "github.com/a8m/envsubst" @@ -64,3 +66,29 @@ func readConfig(path string, mcpServers *autoconfig.MCPServers, debug bool) (str } return envsubst.String(config) } + +// expandPath expands environment variables and tilde in paths, then converts to absolute path. +// Returns empty string if input is empty. +// Replaces ~/ with ${HOME}/ before expanding environment variables. +func expandPath(path string) string { + if path == "" { + return "" + } + + // Replace ~/ with ${HOME}/ + if strings.HasPrefix(path, "~/") { + path = "${HOME}/" + path[2:] + } + + // Expand environment variables + expanded := os.ExpandEnv(path) + + // Convert to absolute path + abs, err := filepath.Abs(expanded) + if err != nil { + // If we can't get absolute path, return expanded path + return expanded + } + + return abs +} diff --git a/cmd/aigw/config_test.go b/cmd/aigw/config_test.go index 5d35968733..348e70f1c4 100644 --- a/cmd/aigw/config_test.go +++ b/cmd/aigw/config_test.go @@ -135,6 +135,73 @@ func TestReadConfig(t *testing.T) { }) } +func TestExpandPath(t *testing.T) { + homeDir, err := os.UserHomeDir() + require.NoError(t, err) + + tests := []struct { + name string + path string + envVars map[string]string + expected string + }{ + { + name: "empty path returns empty", + path: "", + expected: "", + }, + { + name: "tilde path", + path: "~/test/file.txt", + expected: filepath.Join(homeDir, "test/file.txt"), + }, + { + name: "tilde slash returns HOME", + path: "~/", + expected: homeDir, + }, + { + name: "absolute path unchanged", + path: "/absolute/path/file.txt", + expected: "/absolute/path/file.txt", + }, + { + name: "env var expansion", + path: "${HOME}/test", + expected: filepath.Join(homeDir, "test"), + }, + { + name: "custom env var", + path: "${CUSTOM_DIR}/file.txt", + envVars: map[string]string{"CUSTOM_DIR": "/custom"}, + expected: "/custom/file.txt", + }, + { + name: "tilde with env var", + path: "~/test/${USER}", + envVars: map[string]string{"USER": "testuser"}, + expected: filepath.Join(homeDir, "test/testuser"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + actual := expandPath(tt.path) + require.Equal(t, tt.expected, actual) + }) + } + t.Run("relative/path", func(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + expected := filepath.Join(cwd, "relative/path") + actual := expandPath("relative/path") + require.Equal(t, expected, actual) + }) +} + func TestRecreateDir(t *testing.T) { tests := []struct { name string diff --git a/cmd/aigw/main.go b/cmd/aigw/main.go index c062e4ce3c..152ba1fbca 100644 --- a/cmd/aigw/main.go +++ b/cmd/aigw/main.go @@ -12,6 +12,7 @@ import ( "io" "log" "os" + "time" "github.com/alecthomas/kong" ctrl "sigs.k8s.io/controller-runtime" @@ -19,11 +20,18 @@ import ( "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" "github.com/envoyproxy/ai-gateway/internal/autoconfig" "github.com/envoyproxy/ai-gateway/internal/version" + "github.com/envoyproxy/ai-gateway/internal/xdg" ) type ( // cmd corresponds to the top-level `aigw` command. cmd struct { + // Global XDG flags + ConfigHome string `name:"config-home" env:"AIGW_CONFIG_HOME" help:"Configuration files directory. Defaults to ~/.config/aigw" type:"path"` + DataHome string `name:"data-home" env:"AIGW_DATA_HOME" help:"Downloaded Envoy binaries directory. Defaults to ~/.local/share/aigw" type:"path"` + StateHome string `name:"state-home" env:"AIGW_STATE_HOME" help:"Persistent state and logs directory. Defaults to ~/.local/state/aigw" type:"path"` + RuntimeDir string `name:"runtime-dir" env:"AIGW_RUNTIME_DIR" help:"Ephemeral runtime files directory. Defaults to /tmp/aigw-$UID" type:"path"` + // Version is the sub-command to show the version. Version struct{} `cmd:"" help:"Show version."` // Run is the sub-command parsed by the `cmdRun` struct. @@ -34,16 +42,74 @@ type ( // cmdRun corresponds to `aigw run` command. cmdRun struct { Debug bool `help:"Enable debug logging emitted to stderr."` - Path string `arg:"" name:"path" optional:"" help:"Path to the AI Gateway configuration yaml file. Optional when at least OPENAI_API_KEY, AZURE_OPENAI_API_KEY, or ANTHROPIC_API_KEY is set." type:"path"` + Path string `arg:"" name:"path" optional:"" help:"Path to the AI Gateway configuration yaml file. Defaults to $AIGW_CONFIG_HOME/config.yaml if exists, otherwise optional when at least OPENAI_API_KEY, AZURE_OPENAI_API_KEY or ANTHROPIC_API_KEY is set." type:"path"` AdminPort int `help:"HTTP port for the admin server (serves /metrics and /health endpoints)." default:"1064"` McpConfig string `name:"mcp-config" help:"Path to MCP servers configuration file." type:"path"` McpJSON string `name:"mcp-json" help:"JSON string of MCP servers configuration."` + RunID string `name:"run-id" env:"AIGW_RUN_ID" help:"Run identifier for this invocation. Defaults to timestamp-based ID or $AIGW_RUN_ID. Use '0' for Docker/Kubernetes."` mcpConfig *autoconfig.MCPServers `kong:"-"` // Internal field: normalized MCP JSON data + dirs *xdg.Directories `kong:"-"` // Internal field: XDG directories, set by BeforeApply + runOpts *runOpts `kong:"-"` // Internal field: run options, set by Validate } // cmdHealthcheck corresponds to `aigw healthcheck` command. cmdHealthcheck struct{} ) +// BeforeApply is called by Kong before applying defaults to set XDG directory defaults. +func (c *cmd) BeforeApply(_ *kong.Context) error { + // Expand paths unconditionally (handles ~/, env vars, and converts to absolute) + // Set defaults only if not set (empty string) + if c.ConfigHome == "" { + c.ConfigHome = "~/.config/aigw" + } + c.ConfigHome = expandPath(c.ConfigHome) + + if c.DataHome == "" { + c.DataHome = "~/.local/share/aigw" + } + c.DataHome = expandPath(c.DataHome) + + if c.StateHome == "" { + c.StateHome = "~/.local/state/aigw" + } + c.StateHome = expandPath(c.StateHome) + + if c.RuntimeDir == "" { + c.RuntimeDir = "/tmp/aigw-${UID}" + } + c.RuntimeDir = expandPath(c.RuntimeDir) + + // Populate Run.dirs with expanded XDG directories for use in Run.BeforeApply + c.Run.dirs = &xdg.Directories{ + ConfigHome: c.ConfigHome, + DataHome: c.DataHome, + StateHome: c.StateHome, + RuntimeDir: c.RuntimeDir, + } + + return nil +} + +// BeforeApply is called by Kong before applying defaults to set computed default values. +func (c *cmdRun) BeforeApply(_ *kong.Context) error { + // Set RunID default if not provided + if c.RunID == "" { + c.RunID = generateRunID(time.Now()) + } + + // Set Path to default config.yaml if it exists and Path not provided + if c.Path == "" && c.dirs != nil { + defaultPath := c.dirs.ConfigHome + "/config.yaml" + if _, err := os.Stat(defaultPath); err == nil { + c.Path = defaultPath + } + } + // Expand Path (handles ~/, env vars, and converts to absolute) + c.Path = expandPath(c.Path) + + return nil +} + // Validate is called by Kong after parsing to validate the cmdRun arguments. func (c *cmdRun) Validate() error { if c.McpConfig != "" && c.McpJSON != "" { @@ -53,6 +119,8 @@ func (c *cmdRun) Validate() error { return fmt.Errorf("you must supply at least OPENAI_API_KEY, AZURE_OPENAI_API_KEY, ANTHROPIC_API_KEY, or a config file path") } + c.McpConfig = expandPath(c.McpConfig) + var mcpJSON string if c.McpConfig != "" { raw, err := os.ReadFile(c.McpConfig) @@ -71,11 +139,18 @@ func (c *cmdRun) Validate() error { } c.mcpConfig = &mcpConfig } + + opts, err := newRunOpts(c.dirs, c.RunID, c.Path, mainlib.Main) + if err != nil { + return fmt.Errorf("failed to create run options: %w", err) + } + c.runOpts = opts + return nil } type ( - runFn func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error + runFn func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error healthcheckFn func(context.Context, io.Writer, io.Writer) error ) @@ -106,11 +181,12 @@ func doMain(ctx context.Context, stdout, stderr io.Writer, args []string, exitFn } parsed, err := parser.Parse(args) parser.FatalIfErrorf(err) + switch parsed.Command() { case "version": _, _ = fmt.Fprintf(stdout, "Envoy AI Gateway CLI: %s\n", version.Version) case "run", "run ": - err = rf(ctx, c.Run, runOpts{extProcLauncher: mainlib.Main}, stdout, stderr) + err = rf(ctx, c.Run, c.Run.runOpts, stdout, stderr) if err != nil { log.Fatalf("Error running: %v", err) } @@ -123,3 +199,11 @@ func doMain(ctx context.Context, stdout, stderr io.Writer, args []string, exitFn panic("unreachable") } } + +// generateRunID generates a unique run identifier based on the current time. +// Defaults to the same convention as func-e: "YYYYMMDD_HHMMSS_UUU" format. +// Last 3 digits of microseconds to allow concurrent runs. +func generateRunID(now time.Time) string { + micro := now.Nanosecond() / 1000 % 1000 + return fmt.Sprintf("%s_%03d", now.Format("20060102_150405"), micro) +} diff --git a/cmd/aigw/main_test.go b/cmd/aigw/main_test.go index 36c14e8607..80f92eb401 100644 --- a/cmd/aigw/main_test.go +++ b/cmd/aigw/main_test.go @@ -8,6 +8,7 @@ package main import ( "bytes" "context" + "fmt" "io" "os" "path/filepath" @@ -30,21 +31,29 @@ func Test_doMain(t *testing.T) { { name: "help", args: []string{"--help"}, - expOut: `Usage: aigw + expOut: `Usage: aigw [flags] Envoy AI Gateway CLI Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) Commands: - version + version [flags] Show version. run [] [flags] Run the AI Gateway locally for given configuration. - healthcheck + healthcheck [flags] Docker HEALTHCHECK command. Run "aigw --help" for more information on a command. @@ -60,60 +69,80 @@ Run "aigw --help" for more information on a command. name: "version help", args: []string{"version", "--help"}, expPanicCode: ptr.To(0), - expOut: `Usage: aigw version + expOut: `Usage: aigw version [flags] Show version. Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) `, }, { name: "run no arg", args: []string{"run"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, expPanicCode: ptr.To(80), }, { name: "run with OpenAI env", args: []string{"run"}, env: map[string]string{"OPENAI_API_KEY": "dummy-key"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, }, { name: "run with Anthropic env", args: []string{"run"}, env: map[string]string{"ANTHROPIC_API_KEY": "dummy-key"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, }, { name: "run help", args: []string{"run", "--help"}, - rf: func(context.Context, cmdRun, runOpts, io.Writer, io.Writer) error { return nil }, + rf: func(context.Context, cmdRun, *runOpts, io.Writer, io.Writer) error { return nil }, expOut: `Usage: aigw run [] [flags] Run the AI Gateway locally for given configuration. Arguments: - [] Path to the AI Gateway configuration yaml file. Optional when at - least OPENAI_API_KEY, AZURE_OPENAI_API_KEY, or ANTHROPIC_API_KEY + [] Path to the AI Gateway configuration yaml file. Defaults to + $AIGW_CONFIG_HOME/config.yaml if exists, otherwise optional when + at least OPENAI_API_KEY, AZURE_OPENAI_API_KEY or ANTHROPIC_API_KEY is set. Flags: - -h, --help Show context-sensitive help. + -h, --help Show context-sensitive help. + --config-home=STRING Configuration files directory. Defaults to + ~/.config/aigw ($AIGW_CONFIG_HOME) + --data-home=STRING Downloaded Envoy binaries directory. Defaults to + ~/.local/share/aigw ($AIGW_DATA_HOME) + --state-home=STRING Persistent state and logs directory. Defaults to + ~/.local/state/aigw ($AIGW_STATE_HOME) + --runtime-dir=STRING Ephemeral runtime files directory. Defaults to + /tmp/aigw-$UID ($AIGW_RUNTIME_DIR) - --debug Enable debug logging emitted to stderr. - --admin-port=1064 HTTP port for the admin server (serves /metrics and - /health endpoints). - --mcp-config=STRING Path to MCP servers configuration file. - --mcp-json=STRING JSON string of MCP servers configuration. + --debug Enable debug logging emitted to stderr. + --admin-port=1064 HTTP port for the admin server (serves /metrics + and /health endpoints). + --mcp-config=STRING Path to MCP servers configuration file. + --mcp-json=STRING JSON string of MCP servers configuration. + --run-id=STRING Run identifier for this invocation. Defaults to + timestamp-based ID or $AIGW_RUN_ID. Use '0' for + Docker/Kubernetes ($AIGW_RUN_ID). `, expPanicCode: ptr.To(0), }, { name: "run with path", args: []string{"run", "./path"}, - rf: func(_ context.Context, c cmdRun, _ runOpts, _, _ io.Writer) error { + rf: func(_ context.Context, c cmdRun, _ *runOpts, _, _ io.Writer) error { abs, err := filepath.Abs("./path") require.NoError(t, err) require.Equal(t, abs, c.Path) @@ -134,46 +163,231 @@ Flags: } else { doMain(t.Context(), out, os.Stderr, tt.args, nil, tt.rf, tt.hf) } + fmt.Println(out.String()) require.Equal(t, tt.expOut, out.String()) }) } } +func TestCmd_BeforeApply(t *testing.T) { + tests := []struct { + name string + configHome string + dataHome string + stateHome string + runtimeDir string + envVars map[string]string + expectedConfig string + expectedData string + expectedState string + expectedRuntime string + }{ + { + name: "sets defaults when all empty", + configHome: "", + dataHome: "", + stateHome: "", + runtimeDir: "", + envVars: map[string]string{"HOME": "/home/test", "UID": "1000"}, + expectedConfig: "/home/test/.config/aigw", + expectedData: "/home/test/.local/share/aigw", + expectedState: "/home/test/.local/state/aigw", + expectedRuntime: "/tmp/aigw-1000", + }, + { + name: "preserves explicit values", + configHome: "/custom/config", + dataHome: "/custom/data", + stateHome: "/custom/state", + runtimeDir: "/custom/runtime", + expectedConfig: "/custom/config", + expectedData: "/custom/data", + expectedState: "/custom/state", + expectedRuntime: "/custom/runtime", + }, + { + name: "mixes defaults and explicit values", + configHome: "/custom/config", + dataHome: "", + stateHome: "/custom/state", + runtimeDir: "", + envVars: map[string]string{"HOME": "/home/test", "UID": "1000"}, + expectedConfig: "/custom/config", + expectedData: "/home/test/.local/share/aigw", + expectedState: "/custom/state", + expectedRuntime: "/tmp/aigw-1000", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + c := cmd{ + ConfigHome: tt.configHome, + DataHome: tt.dataHome, + StateHome: tt.stateHome, + RuntimeDir: tt.runtimeDir, + } + + err := c.BeforeApply(nil) + require.NoError(t, err) + + require.Equal(t, tt.expectedConfig, c.ConfigHome) + require.Equal(t, tt.expectedData, c.DataHome) + require.Equal(t, tt.expectedState, c.StateHome) + require.Equal(t, tt.expectedRuntime, c.RuntimeDir) + + // Verify Run.dirs is populated + require.NotNil(t, c.Run.dirs) + require.Equal(t, tt.expectedConfig, c.Run.dirs.ConfigHome) + require.Equal(t, tt.expectedData, c.Run.dirs.DataHome) + require.Equal(t, tt.expectedState, c.Run.dirs.StateHome) + require.Equal(t, tt.expectedRuntime, c.Run.dirs.RuntimeDir) + }) + } +} + +func TestCmdRun_BeforeApply(t *testing.T) { + tests := []struct { + name string + path string + runID string + setupDirs func(t *testing.T, configHome string) + expectedPath string + expectedID string // empty means check it's generated + }{ + { + name: "generates runID when empty", + path: "", + runID: "", + expectedPath: "", + expectedID: "", // will verify it's non-empty + }, + { + name: "preserves explicit runID", + path: "", + runID: "my-custom-id", + expectedPath: "", + expectedID: "my-custom-id", + }, + { + name: "preserves explicit path", + path: "/explicit/config.yaml", + runID: "", + expectedPath: "/explicit/config.yaml", + expectedID: "", + }, + { + name: "sets path to default when config.yaml exists", + path: "", + runID: "", + setupDirs: func(t *testing.T, configHome string) { + err := os.WriteFile(filepath.Join(configHome, "config.yaml"), []byte("test"), 0o600) + require.NoError(t, err) + }, + expectedPath: "", // will be {configHome}/config.yaml + expectedID: "", + }, + { + name: "leaves path empty when config.yaml does not exist", + path: "", + runID: "", + expectedPath: "", + expectedID: "", + }, + { + name: "preserves explicit path even when config.yaml exists", + path: "/explicit/config.yaml", + runID: "", + setupDirs: func(t *testing.T, configHome string) { + err := os.WriteFile(filepath.Join(configHome, "config.yaml"), []byte("test"), 0o600) + require.NoError(t, err) + }, + expectedPath: "/explicit/config.yaml", + expectedID: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + configHome := t.TempDir() + + if tt.setupDirs != nil { + tt.setupDirs(t, configHome) + } + + dirs := newTempDirectories(t) + dirs.ConfigHome = configHome + + c := cmdRun{ + Path: tt.path, + RunID: tt.runID, + dirs: dirs, + } + + err := c.BeforeApply(nil) + require.NoError(t, err) + + // Check Path + if tt.expectedPath == "" && tt.path == "" && tt.setupDirs != nil { + // Special case: should be set to default + expected := filepath.Join(configHome, "config.yaml") + require.Equal(t, expected, c.Path) + } else { + require.Equal(t, tt.expectedPath, c.Path) + } + + // Check RunID + if tt.expectedID == "" && tt.runID == "" { + // Should be generated + require.NotEmpty(t, c.RunID) + // Verify format: YYYYMMDD_HHMMSS_UUU + require.Regexp(t, `^\d{8}_\d{6}_\d{3}$`, c.RunID) + } else { + require.Equal(t, tt.expectedID, c.RunID) + } + }) + } +} + func TestCmdRun_Validate(t *testing.T) { tests := []struct { name string - cmd cmdRun + path string envVars map[string]string expectedError string }{ { name: "no config and no env vars", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{}, expectedError: "you must supply at least OPENAI_API_KEY, AZURE_OPENAI_API_KEY, ANTHROPIC_API_KEY, or a config file path", }, { name: "config path provided", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{}, }, { name: "OPENAI_API_KEY set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", }, }, { name: "AZURE_OPENAI_API_KEY set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "AZURE_OPENAI_API_KEY": "azure-key", }, }, { name: "both API keys set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", "AZURE_OPENAI_API_KEY": "azure-key", @@ -181,21 +395,21 @@ func TestCmdRun_Validate(t *testing.T) { }, { name: "ANTHROPIC_API_KEY set", - cmd: cmdRun{Path: ""}, + path: "", envVars: map[string]string{ "ANTHROPIC_API_KEY": "sk-ant-test", }, }, { name: "config path and OPENAI_API_KEY both set", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{ "OPENAI_API_KEY": "sk-test", }, }, { name: "config path and AZURE_OPENAI_API_KEY both set", - cmd: cmdRun{Path: "/path/to/config.yaml"}, + path: "/path/to/config.yaml", envVars: map[string]string{ "AZURE_OPENAI_API_KEY": "azure-key", }, @@ -208,12 +422,22 @@ func TestCmdRun_Validate(t *testing.T) { t.Setenv(k, v) } - err := tt.cmd.Validate() + cmd := cmdRun{ + Path: tt.path, + RunID: "test-run-id", + dirs: newTempDirectories(t), + } + + err := cmd.Validate() if tt.expectedError != "" { require.EqualError(t, err, tt.expectedError) + require.Nil(t, cmd.runOpts) } else { require.NoError(t, err) + require.NotNil(t, cmd.runOpts) + require.Equal(t, tt.path, cmd.runOpts.configPath) + require.Equal(t, "test-run-id", cmd.runOpts.runID) } }) } diff --git a/cmd/aigw/run.go b/cmd/aigw/run.go index dc2c9d4a38..6de96fe8db 100644 --- a/cmd/aigw/run.go +++ b/cmd/aigw/run.go @@ -39,6 +39,7 @@ import ( "github.com/envoyproxy/ai-gateway/internal/extensionserver" "github.com/envoyproxy/ai-gateway/internal/filterapi" "github.com/envoyproxy/ai-gateway/internal/internalapi" + "github.com/envoyproxy/ai-gateway/internal/xdg" ) // This is the template for the Envoy Gateway configuration where PLACEHOLDER_TMPDIR will be replaced with the temporary @@ -79,34 +80,20 @@ type runCmdContext struct { fakeClientSet *fake.Clientset } -// runOpts are the options for the run command. -type runOpts struct { - // extProcLauncher is the function used to launch the external processor. - extProcLauncher func(ctx context.Context, args []string, w io.Writer) error -} - // run starts the AI Gateway locally for a given configuration. // -// This will create a temporary directory and a file: -// 1. ${os.TempDir}/envoy-gateway-config.yaml: This contains the configuration for the Envoy Gateway agent to run, derived from envoyGatewayConfig. -// 2. ${os.TempDir}/envoy-ai-gateway-resources: This will contain the EG resource generated by the translation and deployed by EG. -func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) error { +// All files are written to XDG Base Directory locations: +// 1. State: $AIGW_STATE_HOME/runs/{runID}/ - configs, resources, logs +// 2. Runtime: $AIGW_RUNTIME_DIR/{runID}/ - ephemeral sockets +func run(ctx context.Context, c cmdRun, o *runOpts, stdout, stderr io.Writer) error { start := time.Now() - var debugLogger *slog.Logger - if c.Debug { - logHandler := slog.NewTextHandler(stderr, &slog.HandlerOptions{}) - ctrl.SetLogger(logr.FromSlogHandler(logHandler)) - debugLogger = slog.New(logHandler) - } else { - stdout = io.Discard // until GW logs are reasonable - debugLogger = slog.New(slog.NewTextHandler(io.Discard, nil)) - } // First, we need to create the self-signed certificates used for communication between the EG and Envoy. // Certificates will be placed at /tmp/envoy-gateway/certs, which is currently is not configurable: // https://github.com/envoyproxy/gateway/blob/779c0a6bbdf7dacbf25a730140a112f99c239f0e/internal/infrastructure/host/infra.go#L22-L23 // - // TODO: maybe make it skip if the certs are already there, but not sure if it's worth the complexity. + // TODO: Override Envoy Gateway cert directory to use $AIGW_RUNTIME_DIR once possible via + // https://github.com/envoyproxy/gateway/pull/7225 certGenOut := &bytes.Buffer{} certGen := root.GetRootCommand() certGen.SetOut(certGenOut) @@ -116,30 +103,39 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err return fmt.Errorf("failed to execute certgen: %w: %s", err, certGenOut.String()) } - tmpdir := filepath.Join(os.TempDir(), "aigw-run") - if err := recreateDir(tmpdir); err != nil { - return fmt.Errorf("failed to create temporary directory %s: %w", tmpdir, err) + // Create aigw log file in run directory + aigwLogFile, err := os.Create(o.logPath) + if err != nil { + return fmt.Errorf("failed to create aigw log file %s: %w", o.logPath, err) } - egConfigPath := filepath.Join(tmpdir, "envoy-gateway-config.yaml") // 1. The path to the Envoy Gateway config. - resourcesTmpdir := filepath.Join(tmpdir, "/envoy-ai-gateway-resources") // 2. The path to the resources. - if err := recreateDir(resourcesTmpdir); err != nil { - return err + defer aigwLogFile.Close() + + // Tee debug logger to both stderr and log file + var logWriter io.Writer + if c.Debug { + logWriter = io.MultiWriter(stderr, aigwLogFile) + // keep stdout visible in debug mode + } else { + logWriter = aigwLogFile + stdout = io.Discard // until GW logs are reasonable } - // Write the Envoy Gateway config which points to the resourcesTmpdir to tell Envoy Gateway where to find the resources. - debugLogger.Info("Writing Envoy Gateway config", "path", egConfigPath) - err := os.WriteFile(egConfigPath, []byte(strings.ReplaceAll( - envoyGatewayConfigTemplate, "PLACEHOLDER_TMPDIR", resourcesTmpdir), + logHandler := slog.NewTextHandler(logWriter, &slog.HandlerOptions{}) + ctrl.SetLogger(logr.FromSlogHandler(logHandler)) + debugLogger := slog.New(logHandler) + + // Write the Envoy Gateway config which points to the resourcesDir to tell Envoy Gateway where to find the resources. + resourcesDir := filepath.Dir(o.egResourcesPath) + debugLogger.Info("Writing Envoy Gateway config", "path", o.egConfigPath) + err = os.WriteFile(o.egConfigPath, []byte(strings.ReplaceAll( + envoyGatewayConfigTemplate, "PLACEHOLDER_TMPDIR", resourcesDir), ), 0o600) if err != nil { - return fmt.Errorf("failed to write file %s: %w", egConfigPath, err) + return fmt.Errorf("failed to write file %s: %w", o.egConfigPath, err) } - // Write the Envoy Gateway resources into a file under resourcesTmpdir. - resourceYamlPath := filepath.Join(resourcesTmpdir, "config.yaml") - debugLogger.Info("Creating Envoy Gateway resource file", "path", resourceYamlPath) - udsPath := filepath.Join(tmpdir, "uds.sock") - _ = os.Remove(udsPath) + // Write the Envoy Gateway resources into a file under resourcesDir. + debugLogger.Info("Creating Envoy Gateway resource file", "path", o.egResourcesPath) // Do the translation of the given AI Gateway resources Yaml into Envoy Gateway resources and write them to the file. resourcesBuf := &bytes.Buffer{} @@ -148,12 +144,12 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err envoyGatewayResourcesOut: resourcesBuf, stderrLogger: debugLogger, stderr: stderr, - tmpdir: tmpdir, - udsPath: udsPath, + tmpdir: filepath.Dir(o.logPath), // runDir + udsPath: o.extprocUDSPath, adminPort: c.AdminPort, extProcLauncher: o.extProcLauncher, } - aiGatewayResourcesYaml, err := readConfig(c.Path, c.mcpConfig, c.Debug) + aiGatewayResourcesYaml, err := readConfig(o.configPath, c.mcpConfig, c.Debug) if err != nil { return err } @@ -161,13 +157,13 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err if err != nil { return fmt.Errorf("failed to write envoy resources and run extproc: %w", err) } - err = os.WriteFile(resourceYamlPath, resourcesBuf.Bytes(), 0o600) + err = os.WriteFile(o.egResourcesPath, resourcesBuf.Bytes(), 0o600) if err != nil { - return fmt.Errorf("failed to write file %s: %w", resourceYamlPath, err) + return fmt.Errorf("failed to write file %s: %w", o.egResourcesPath, err) } - // Set up middleware with startup hook now that we know listenerPort - redirectEnvoyStdio := newEnvoyRunMiddleware(start, listenerPort, stdout, stderr) + // Set up middleware with XDG directories and startup hook now that we know listenerPort + redirectEnvoyStdio := newEnvoyRunMiddleware(&o.Directories, o.runID, start, listenerPort, stdout, stderr) ctx = middleware.WithRunMiddleware(ctx, redirectEnvoyStdio) lis, err := net.Listen("tcp", "localhost:1061") @@ -175,7 +171,7 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err return fmt.Errorf("failed to listen: %w", err) } s := grpc.NewServer() - extSrv := extensionserver.New(fakeClient, ctrl.Log, udsPath, true) + extSrv := extensionserver.New(fakeClient, ctrl.Log, o.extprocUDSPath, true) egextension.RegisterEnvoyGatewayExtensionServer(s, extSrv) grpc_health_v1.RegisterHealthServer(s, extSrv) @@ -212,7 +208,7 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err server.SetOut(io.Discard) server.SetErr(io.Discard) } - server.SetArgs([]string{"server", "--config-path", egConfigPath}) + server.SetArgs([]string{"server", "--config-path", o.egConfigPath}) // Start the gateway server. This will block until the server is stopped. // The startup hook (configured via middleware) will print the status message when Envoy is ready. @@ -223,8 +219,8 @@ func run(ctx context.Context, c cmdRun, o runOpts, stdout, stderr io.Writer) err } // newEnvoyRunMiddleware sets options for running Envoy and returns a middleware -// that configures Envoy I/O and sets up a startup hook to print the ready message. -func newEnvoyRunMiddleware(start time.Time, listenerPort int, stdout, stderr io.Writer) func(next api.RunFunc) api.RunFunc { +// that configures Envoy I/O, XDG directories, and sets up a startup hook to print the ready message. +func newEnvoyRunMiddleware(dirs *xdg.Directories, runID string, start time.Time, listenerPort int, stdout, stderr io.Writer) func(next api.RunFunc) api.RunFunc { // Define startup hook that will be called when Envoy admin is ready startupHook := func(_ context.Context, adminClient admin.AdminClient, _ string) error { // Print a status message without any timestamp formatting @@ -234,13 +230,18 @@ func newEnvoyRunMiddleware(start time.Time, listenerPort int, stdout, stderr io. return nil } - // aigw is primarily an Envoy controller, so ensure its output is visible + // Override func-e XDG paths to use aigw's directories and add startup hook overrides := []api.RunOption{ api.EnvoyOut(stdout), api.EnvoyErr(stderr), + // func-e will use these paths instead of its defaults + api.ConfigHome(dirs.ConfigHome), + api.DataHome(dirs.DataHome), + api.StateHome(dirs.StateHome), + api.RuntimeDir(dirs.RuntimeDir), + api.RunID(runID), admin.WithStartupHook(startupHook), } - return func(next api.RunFunc) api.RunFunc { return func(ctx context.Context, args []string, options ...api.RunOption) error { return next(ctx, args, append(options, overrides...)...) diff --git a/cmd/aigw/run_test.go b/cmd/aigw/run_test.go index 23dfdc02f6..35430e40be 100644 --- a/cmd/aigw/run_test.go +++ b/cmd/aigw/run_test.go @@ -44,7 +44,7 @@ func TestRun(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) defer cleanupRun(t, cancel) - opts := runOpts{extProcLauncher: func(context.Context, []string, io.Writer) error { return nil }} + opts := testRunOpts(t, func(context.Context, []string, io.Writer) error { return nil }) require.NoError(t, run(ctx, cmdRun{Debug: true, AdminPort: adminPort}, opts, stdout, stderr)) } @@ -53,7 +53,9 @@ func cleanupRun(t testing.TB, cancel context.CancelFunc) { if err := internaltesting.AwaitPortClosed(1975, 10*time.Second); err != nil { t.Logf("Failed to close port 1975: %v", err) } - // Delete the hard-coded path to certs defined in Envoy AI Gateway + // Delete the hard-coded path to certs defined in Envoy Gateway + // TODO: Remove once EG supports configurable cert directory + // https://github.com/envoyproxy/gateway/pull/7225 if err := os.RemoveAll("/tmp/envoy-gateway/certs"); err != nil { t.Logf("Failed to delete envoy gateway certs: %v", err) } @@ -67,9 +69,8 @@ func TestRunExtprocStartFailure(t *testing.T) { errChan := make(chan error) mockErr := errors.New("mock extproc error") go func() { - errChan <- run(ctx, cmdRun{}, runOpts{ - extProcLauncher: func(context.Context, []string, io.Writer) error { return mockErr }, - }, os.Stdout, io.Discard) + opts := testRunOpts(t, func(context.Context, []string, io.Writer) error { return mockErr }) + errChan <- run(ctx, cmdRun{}, opts, os.Stdout, io.Discard) }() select { @@ -241,15 +242,16 @@ func Test_newEnvoyMiddleware(t *testing.T) { start := time.Now() listenerPort := 1975 - middleware := newEnvoyRunMiddleware(start, listenerPort, &stdout, &stderr) + dirs := newTempDirectories(t) + middleware := newEnvoyRunMiddleware(dirs, "test-run", start, listenerPort, &stdout, &stderr) require.NotNil(t, middleware) err := middleware(func(ctx context.Context, args []string, options ...api.RunOption) error { require.Equal(t, t.Context(), ctx) require.Equal(t, []string{"test"}, args) - // 3 = EnvoyOut, EnvoyErr, StartupHook - require.Len(t, options, 3+len(tt.inputOptions)) + // 8 = EnvoyOut, EnvoyErr, ConfigHome, DataHome, StateHome, RuntimeDir, RunID, StartupHook + require.Len(t, options, 8+len(tt.inputOptions)) return nil })(t.Context(), []string{"test"}, tt.inputOptions...) require.NoError(t, err) @@ -265,3 +267,13 @@ func readFileFromProjectRoot(t *testing.T, file string) string { require.NoError(t, err) return string(b) } + +// testRunOpts creates runOpts for testing. +// This ensures test isolation by using t.TempDir() for all XDG directories. +func testRunOpts(t *testing.T, extProcLauncher func(context.Context, []string, io.Writer) error) *runOpts { + t.Helper() + dirs := newTempDirectories(t) + opts, err := newRunOpts(dirs, "test-run", "", extProcLauncher) + require.NoError(t, err) + return opts +} diff --git a/cmd/aigw/runopts.go b/cmd/aigw/runopts.go new file mode 100644 index 0000000000..baa0fe925c --- /dev/null +++ b/cmd/aigw/runopts.go @@ -0,0 +1,95 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package main + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/envoyproxy/ai-gateway/internal/xdg" +) + +// runOpts are the options for the run command. +type runOpts struct { + xdg.Directories + // runID is the unique identifier for this run + runID string + // extProcLauncher is the function used to launch the external processor. + extProcLauncher func(ctx context.Context, args []string, w io.Writer) error + + // Computed paths derived from Directories and runID + // configPath is the resolved aigw config file path. Either --path flag, {ConfigHome}/config.yaml if exists, or empty. + // Empty means auto-generate from OPENAI_API_KEY/AZURE_OPENAI_API_KEY environment variables. + configPath string + // logPath is {StateHome}/runs/{runID}/aigw.log + // Contains: aigw debug/info/error logs + logPath string + // egConfigPath is {StateHome}/runs/{runID}/envoy-gateway-config.yaml + // Contains: generated Envoy Gateway config that references egResourcesPath directory + // Passed to: Envoy Gateway via `envoy-gateway server --config-path ` + egConfigPath string + // egResourcesPath is {StateHome}/runs/{runID}/envoy-ai-gateway-resources/config.yaml + // Contains: Gateway, HTTPRoute, HTTPRouteFilter, Backend, Secret, BackendTrafficPolicy, SecurityPolicy, EnvoyExtensionPolicy objects + // Derived from: translating configPath (aigw resources -> Envoy Gateway resources) + // Referenced by: egConfigPath (tells Envoy Gateway where to load resources from the parent directory) + // Note: Must be in a subdirectory (not a flat file) because Envoy Gateway config template requires a directory path + egResourcesPath string + // extprocUDSPath is {RuntimeDir}/{runID}/uds.sock + // Unix domain socket for Envoy <-> aigw extproc communication + extprocUDSPath string + // extprocConfigPath is {StateHome}/runs/{runID}/extproc-config.yaml + // Contains: filterapi.Config YAML for external processor + // Derived from: translating configPath (extracts filter config from aigw resources) + extprocConfigPath string +} + +// newRunOpts creates runOpts with all paths computed and creates directories +// that aigw writes to directly (e.g. not ones owned by func-e or Envoy +// Gateway). Note: configPath may be empty (will auto-generate from env vars). +func newRunOpts(dirs *xdg.Directories, runID, configPath string, extProcLauncher func(context.Context, []string, io.Writer) error) (*runOpts, error) { + opts := &runOpts{ + Directories: *dirs, + runID: runID, + configPath: configPath, + extProcLauncher: extProcLauncher, + } + + // Compute all paths + runDir := filepath.Join(dirs.StateHome, "runs", runID) + opts.logPath = filepath.Join(runDir, "aigw.log") + opts.egConfigPath = filepath.Join(runDir, "envoy-gateway-config.yaml") + opts.egResourcesPath = filepath.Join(runDir, "envoy-ai-gateway-resources", "config.yaml") + opts.extprocConfigPath = filepath.Join(runDir, "extproc-config.yaml") + opts.extprocUDSPath = filepath.Join(dirs.RuntimeDir, runID, "uds.sock") + + // Create directories that aigw writes to + // runDir: for log, config, extproc-config (0o750 per XDG spec for StateHome) + if err := os.MkdirAll(runDir, 0o750); err != nil { + return nil, fmt.Errorf("failed to create run directory %s: %w", runDir, err) + } + + // Recreate runDir/envoy-ai-gateway-resources: for egResourcesPath (0o750) + // Remove if exists to ensure a clean state, then create + resourcesDir := filepath.Dir(opts.egResourcesPath) + if err := os.RemoveAll(resourcesDir); err != nil { + return nil, fmt.Errorf("failed to remove resources directory %s: %w", resourcesDir, err) + } + if err := os.MkdirAll(resourcesDir, 0o750); err != nil { + return nil, fmt.Errorf("failed to create resources directory %s: %w", resourcesDir, err) + } + + // RuntimeDir/{runID}: for UDS socket (0o700 per XDG spec for RuntimeDir) + // Remove UDS socket if exists to ensure a clean state + if err := os.MkdirAll(filepath.Dir(opts.extprocUDSPath), 0o700); err != nil { + return nil, fmt.Errorf("failed to create runtime directory %s: %w", filepath.Dir(opts.extprocUDSPath), err) + } + _ = os.Remove(opts.extprocUDSPath) + + return opts, nil +} diff --git a/cmd/aigw/runopts_test.go b/cmd/aigw/runopts_test.go new file mode 100644 index 0000000000..ec772f0fc3 --- /dev/null +++ b/cmd/aigw/runopts_test.go @@ -0,0 +1,204 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package main + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/envoyproxy/ai-gateway/internal/xdg" +) + +func newTempDirectories(t *testing.T) *xdg.Directories { + return &xdg.Directories{ + ConfigHome: t.TempDir(), + DataHome: t.TempDir(), + StateHome: t.TempDir(), + RuntimeDir: t.TempDir(), + } +} + +func TestNewRunOpts(t *testing.T) { + mockLauncher := func(_ context.Context, _ []string, _ io.Writer) error { return nil } + + t.Run("sets all fields correctly", func(t *testing.T) { + dirs := newTempDirectories(t) + runID := "test-run-123" + configPath := "/explicit/config.yaml" + + actual, err := newRunOpts(dirs, runID, configPath, mockLauncher) + require.NoError(t, err) + require.NotNil(t, actual) + + require.Equal(t, runID, actual.runID) + require.NotNil(t, actual.extProcLauncher) + require.Equal(t, configPath, actual.configPath) + + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + paths := []struct { + name string + expected string + actual string + }{ + {"logPath", filepath.Join(expectedRunDir, "aigw.log"), actual.logPath}, + {"egConfigPath", filepath.Join(expectedRunDir, "envoy-gateway-config.yaml"), actual.egConfigPath}, + {"egResourcesPath", filepath.Join(expectedRunDir, "envoy-ai-gateway-resources", "config.yaml"), actual.egResourcesPath}, + {"extprocConfigPath", filepath.Join(expectedRunDir, "extproc-config.yaml"), actual.extprocConfigPath}, + {"extprocUDSPath", filepath.Join(dirs.RuntimeDir, runID, "uds.sock"), actual.extprocUDSPath}, + } + + for _, p := range paths { + require.Equal(t, p.expected, p.actual, p.name) + require.True(t, filepath.IsAbs(p.actual), p.name) + } + + require.DirExists(t, expectedRunDir) + require.DirExists(t, filepath.Dir(actual.egResourcesPath)) + require.DirExists(t, filepath.Dir(actual.extprocUDSPath)) + }) + + t.Run("empty configPath remains empty", func(t *testing.T) { + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, "test-run", "", mockLauncher) + require.NoError(t, err) + require.Empty(t, actual.configPath) + }) +} + +func TestNewRunOpts_Permissions(t *testing.T) { + runID := "test-run-permissions" + + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, runID, "", nil) + require.NoError(t, err) + + // Verify runDir created with correct permissions + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + info, err := os.Stat(expectedRunDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o750), info.Mode().Perm()) + + // Verify egResourcesPath parent created with correct permissions + expectedResourcesDir := filepath.Dir(actual.egResourcesPath) + info, err = os.Stat(expectedResourcesDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o750), info.Mode().Perm()) + + // Verify RuntimeDir/{runID} created with correct permissions + expectedRuntimeRunDir := filepath.Join(dirs.RuntimeDir, runID) + info, err = os.Stat(expectedRuntimeRunDir) + require.NoError(t, err) + require.True(t, info.IsDir()) + require.Equal(t, os.FileMode(0o700), info.Mode().Perm()) +} + +func TestNewRunOpts_DirectoryContents(t *testing.T) { + runID := "test-run-empty" + + dirs := newTempDirectories(t) + + actual, err := newRunOpts(dirs, runID, "", nil) + require.NoError(t, err) + + // Verify runDir contains only expected entries + expectedRunDir := filepath.Join(dirs.StateHome, "runs", runID) + actualEntries, err := os.ReadDir(expectedRunDir) + require.NoError(t, err) + require.Len(t, actualEntries, 1) + require.Equal(t, "envoy-ai-gateway-resources", actualEntries[0].Name()) + + // Verify resourcesDir is empty + expectedResourcesDir := filepath.Dir(actual.egResourcesPath) + actualEntries, err = os.ReadDir(expectedResourcesDir) + require.NoError(t, err) + require.Empty(t, actualEntries) + + // Verify runtimeRunDir is empty + expectedRuntimeRunDir := filepath.Join(dirs.RuntimeDir, runID) + actualEntries, err = os.ReadDir(expectedRuntimeRunDir) + require.NoError(t, err) + require.Empty(t, actualEntries) +} + +func TestNewRunOpts_Errors(t *testing.T) { + t.Run("error when runDir creation fails", func(t *testing.T) { + baseDir := t.TempDir() + stateHome := filepath.Join(baseDir, "nonexistent", "readonly") + + // Make the parent read-only + parent := filepath.Dir(stateHome) + err := os.MkdirAll(parent, 0o755) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(parent, 0o755) + }) + err = os.Chmod(parent, 0o555) + require.NoError(t, err) + + dirs := newTempDirectories(t) + dirs.StateHome = stateHome + + _, err = newRunOpts(dirs, "test-run", "", nil) + require.Error(t, err) + }) + + t.Run("error when resources directory creation fails", func(t *testing.T) { + stateHome := t.TempDir() + + // Pre-create runDir successfully + runDir := filepath.Join(stateHome, "runs", "test-run-fail-resources") + err := os.MkdirAll(runDir, 0o750) + require.NoError(t, err) + + // Create a file where resources directory should be + resourcesParent := filepath.Join(runDir, "envoy-ai-gateway-resources") + err = os.WriteFile(resourcesParent, []byte("block"), 0o600) + require.NoError(t, err) + + // Make runDir read-only so RemoveAll fails + err = os.Chmod(runDir, 0o555) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(runDir, 0o755) + }) + + dirs := newTempDirectories(t) + dirs.StateHome = stateHome + + _, err = newRunOpts(dirs, "test-run-fail-resources", "", nil) + require.Error(t, err) + }) + + t.Run("error when runtime directory creation fails", func(t *testing.T) { + baseDir := t.TempDir() + runtimeDir := filepath.Join(baseDir, "nonexistent", "readonly") + + // Make the parent read-only + parent := filepath.Dir(runtimeDir) + err := os.MkdirAll(parent, 0o755) + require.NoError(t, err) + t.Cleanup(func() { + _ = os.Chmod(parent, 0o755) + }) + err = os.Chmod(parent, 0o555) + require.NoError(t, err) + + dirs := newTempDirectories(t) + dirs.RuntimeDir = runtimeDir + + _, err = newRunOpts(dirs, "test-run", "", nil) + require.Error(t, err) + }) +} diff --git a/go.mod b/go.mod index bcb50aaf2c..2e9ea17ff3 100644 --- a/go.mod +++ b/go.mod @@ -173,7 +173,7 @@ require ( github.com/kelseyhightower/envconfig v1.4.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect - github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c // indirect + github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect github.com/lyft/gostats v0.4.1 // indirect github.com/magiconair/properties v1.8.10 // indirect github.com/mailru/easyjson v0.9.0 // indirect diff --git a/go.sum b/go.sum index 8d459411b9..cd81dd8f61 100644 --- a/go.sum +++ b/go.sum @@ -243,7 +243,6 @@ github.com/google/cel-go v0.26.1 h1:iPbVVEdkhTX++hpe3lzSk7D3G3QSYqLGoHOcEio+UXQ= github.com/google/cel-go v0.26.1/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB31qAwjAohdSTU= @@ -300,8 +299,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c h1:VtwQ41oftZwlMnOEbMWQtSEUgU64U4s+GHk7hZK+jtY= -github.com/lufia/plan9stats v0.0.0-20220913051719-115f729f3c8c/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= +github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIiZhtifTV5OUqqiP82UAl0h87xj/l9k= +github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg= github.com/lyft/gostats v0.4.1 h1:oR6p4HRCGxt0nUntmZIWmYMgyothBi3eZH2A71vRjsc= github.com/lyft/gostats v0.4.1/go.mod h1:Tpx2xRzz4t+T2Tx0xdVgIoBdR2UMVz+dKnE3X01XSd8= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= diff --git a/internal/xdg/directories.go b/internal/xdg/directories.go new file mode 100644 index 0000000000..6b5a7f8589 --- /dev/null +++ b/internal/xdg/directories.go @@ -0,0 +1,34 @@ +// Copyright Envoy AI Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package xdg + +// Directories holds XDG Base Directory paths for aigw. +// See https://specifications.freedesktop.org/basedir-spec/latest/ +type Directories struct { + // ConfigHome is the base directory for user-specific configuration files. + // XDG specification: $XDG_CONFIG_HOME + // Default: ~/.config/aigw (or $AIGW_CONFIG_HOME) + // Contents: config.yaml (default config), envoy-version (func-e version preference) + ConfigHome string + + // DataHome is the base directory for user-specific data files. + // XDG specification: $XDG_DATA_HOME + // Default: ~/.local/share/aigw (or $AIGW_DATA_HOME) + // Contents: envoy-versions/ (downloaded Envoy binaries via func-e) + DataHome string + + // StateHome is the base directory for user-specific state data. + // XDG specification: $XDG_STATE_HOME + // Default: ~/.local/state/aigw (or $AIGW_STATE_HOME) + // Contents: runs/{runID}/ (per-run logs and configs), envoy-runs/{runID}/ (func-e logs) + StateHome string + + // RuntimeDir is the base directory for user-specific runtime files. + // XDG specification: $XDG_RUNTIME_DIR + // Default: /tmp/aigw-${UID} (or $AIGW_RUNTIME_DIR) + // Contents: {runID}/uds.sock (extproc socket), {runID}/admin-address.txt (func-e admin) + RuntimeDir string +} diff --git a/site/docs/cli/installation.md b/site/docs/cli/installation.md index 9b6c59d51f..d24dff1d86 100644 --- a/site/docs/cli/installation.md +++ b/site/docs/cli/installation.md @@ -22,10 +22,7 @@ you can mount it as a volume. The following example runs the AI Gateway with the default configuration for the [OpenAI provider](../getting-started/connect-providers/openai.md): ```shell -$ docker run --rm -p 1975:1975 -e OPENAI_API_KEY=OPENAI_API_KEY envoyproxy/ai-gateway-cli run -looking up the latest patch for Envoy version 1.35 -1.35.3 is already downloaded -starting: /tmp/envoy-gateway/versions/1.35.3/bin/envoy in run directory /tmp/envoy-gateway/runs/1758086300246501521 +docker run --rm -p 1975:1975 -e OPENAI_API_KEY=OPENAI_API_KEY envoyproxy/ai-gateway-cli run ``` ## Building the latest version @@ -76,6 +73,26 @@ Commands: Run "aigw --help" for more information on a command. ``` +## Configuration + +The [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) defines standard locations for user-specific files: + +- **Config files**: User-specific configuration (persistent, shared) +- **Data files**: Downloaded binaries (persistent, shared) +- **State files**: Logs and configs per run (persistent, debugging) +- **Runtime files**: Ephemeral files like sockets (deleted on reboot) + +`aigw` adopts these conventions to separate configuration, downloaded Envoy binaries, logs, and ephemeral runtime files. + +| Environment Variable | Default Path | CLI Flag | +| -------------------- | --------------------- | --------------- | +| `AIGW_CONFIG_HOME` | `~/.config/aigw` | `--config-home` | +| `AIGW_DATA_HOME` | `~/.local/share/aigw` | `--data-home` | +| `AIGW_STATE_HOME` | `~/.local/state/aigw` | `--state-home` | +| `AIGW_RUNTIME_DIR` | `/tmp/aigw-${UID}` | `--runtime-dir` | + +**Priority**: CLI flags > Environment variables > Defaults + ## What's next? The following sections provide more information about each of the CLI commands: diff --git a/site/docs/cli/run.md b/site/docs/cli/run.md index cb8f59900a..c255585ef1 100644 --- a/site/docs/cli/run.md +++ b/site/docs/cli/run.md @@ -300,6 +300,63 @@ focused on retrieval and semantic analysis. See [docker-compose-otel.yaml][docker-compose-otel.yaml] for a complete example configuration. +## Configuration + +### File Locations + +`aigw run` uses the [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) to organize files: + +| Environment Variable | Default Path | Purpose | +| -------------------- | --------------------- | --------------------------- | +| `AIGW_CONFIG_HOME` | `~/.config/aigw` | User configuration files | +| `AIGW_DATA_HOME` | `~/.local/share/aigw` | Downloaded Envoy binaries | +| `AIGW_STATE_HOME` | `~/.local/state/aigw` | Persistent logs and configs | +| `AIGW_RUNTIME_DIR` | `/tmp/aigw-${UID}` | Ephemeral runtime files | + +See [Installation - Configuration](./installation.md#configuration) for more details about XDG directories. + +### File Mappings + +Each invocation creates a unique run identifier (`runID`) in format `YYYYMMDD_HHMMSS_UUU` to isolate concurrent runs: + +| File Type | Purpose | Path | Type | +| ------------------------- | ---------------------------------------- | ---------------------------------------------------------------- | ------- | +| Default Config | Configuration file location | `${AIGW_CONFIG_HOME}/config.yaml` | CONFIG | +| Envoy Version Preference | Selected Envoy version (via func-e) | `${AIGW_CONFIG_HOME}/envoy-version` | CONFIG | +| Envoy Binaries | Downloaded executables (via func-e) | `${AIGW_DATA_HOME}/envoy-versions/{version}/bin/envoy` | DATA | +| AIGW Logs | Gateway logs and stderr output | `${AIGW_STATE_HOME}/runs/{runID}/aigw.log` | STATE | +| Envoy Gateway Config | Generated EG configuration | `${AIGW_STATE_HOME}/runs/{runID}/envoy-gateway-config.yaml` | STATE | +| Envoy Gateway Resources | Generated EG resources (Gateway, Routes) | `${AIGW_STATE_HOME}/runs/{runID}/envoy-ai-gateway-resources/...` | STATE | +| External Processor Config | Generated extproc configuration | `${AIGW_STATE_HOME}/runs/{runID}/extproc-config.yaml` | STATE | +| Envoy Run Logs (func-e) | Envoy stdout/stderr (via func-e) | `${AIGW_STATE_HOME}/envoy-runs/{runID}/stdout.log,stderr.log` | STATE | +| UDS Socket | Unix domain socket for extproc | `${AIGW_RUNTIME_DIR}/{runID}/uds.sock` | RUNTIME | +| Admin Address (func-e) | Envoy admin endpoint (via func-e) | `${AIGW_RUNTIME_DIR}/{runID}/admin-address.txt` | RUNTIME | + +**File Categories:** + +- **CONFIG**: User-specific configuration (persistent, shared across runs) +- **DATA**: Downloaded binaries (persistent, shared across runs) +- **STATE**: Per-run logs and configs (persistent for debugging) +- **RUNTIME**: Ephemeral files like sockets (cleaned on reboot) + +### `runID` + +By default, `aigw run` generates a timestamp-based `runID` for each invocation. You can customize this for predictable paths: + +```shell +# Use run ID "0" for Docker/Kubernetes deployments +aigw run --run-id=0 + +# Or via environment variable +AIGW_RUN_ID=production aigw run +``` + +Custom run IDs: + +- Enable predictable file paths in containers +- Allow correlation across multiple runs with the same ID +- Must not contain path separators (`/` or `\`) + --- [openinference]: https://github.com/Arize-ai/openinference/tree/main/spec