Skip to content

Commit 4d68a76

Browse files
authored
Refactor Flight ingress into server layer and harden lifecycle (#214)
* controlplane: fix flight ingress lifecycle and config * more agents instructions * Fix flight session reaping for active handles Keep auth sessions alive while query/prepared handles are still active so handle TTL remains effective. Add tests for reaper behavior and file/env config precedence coverage for flight ingress duration settings. * Finish flight ingress refactor plan
1 parent 04f9268 commit 4d68a76

File tree

16 files changed

+2310
-1196
lines changed

16 files changed

+2310
-1196
lines changed

AGENTS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
# AGENTS
22

3+
## Work Style
4+
- Prefer small, incremental PRs aligned to deliverables.
5+
- TDD with red-green cycle is required: write/adjust tests first and run them to confirm they fail (red), then implement the minimum code to make them pass (green).
6+
- Keep configs and flags explicit; document defaults in README.
7+
- Provide runbooks for local dev and failure recovery.
8+
- When following a plan file, mark tasks upon completion
9+
- When creating new branch from origin/main, do not track origin/main.
310
- Always run lint before committing.
11+
- Parallelize using subagents when possible.

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ Duckgres exposes Prometheus metrics on `:9090/metrics`. The metrics port is curr
6363
| `duckgres_auth_failures_total` | Counter | Total number of authentication failures |
6464
| `duckgres_rate_limit_rejects_total` | Counter | Total number of connections rejected due to rate limiting |
6565
| `duckgres_rate_limited_ips` | Gauge | Number of currently rate-limited IP addresses |
66+
| `duckgres_flight_auth_sessions_active` | Gauge | Number of active Flight auth sessions on the control plane |
67+
| `duckgres_control_plane_workers_active` | Gauge | Number of active control-plane worker processes |
68+
| `duckgres_flight_sessions_reaped_total{trigger}` | Counter | Number of Flight auth sessions reaped (`trigger=periodic|forced`) |
69+
| `duckgres_flight_max_workers_retry_total{outcome}` | Counter | Max-worker retry outcomes for Flight session creation (`outcome=attempted|succeeded|failed`) |
6670

6771
### Testing Metrics
6872

@@ -128,6 +132,9 @@ Create a `duckgres.yaml` file (see `duckgres.example.yaml` for a complete exampl
128132
host: "0.0.0.0"
129133
port: 5432
130134
flight_port: 8815
135+
flight_session_idle_ttl: "10m"
136+
flight_session_reap_interval: "1m"
137+
flight_handle_idle_ttl: "15m"
131138
data_dir: "./data"
132139

133140
tls:
@@ -166,6 +173,9 @@ Run with config file:
166173
| `DUCKGRES_HOST` | Host to bind to | `0.0.0.0` |
167174
| `DUCKGRES_PORT` | Port to listen on | `5432` |
168175
| `DUCKGRES_FLIGHT_PORT` | Control-plane Flight SQL ingress port (`0` disables) | `0` |
176+
| `DUCKGRES_FLIGHT_SESSION_IDLE_TTL` | Flight auth session idle TTL | `10m` |
177+
| `DUCKGRES_FLIGHT_SESSION_REAP_INTERVAL` | Flight auth session reap interval | `1m` |
178+
| `DUCKGRES_FLIGHT_HANDLE_IDLE_TTL` | Flight prepared/query handle idle TTL | `15m` |
169179
| `DUCKGRES_DATA_DIR` | Directory for DuckDB files | `./data` |
170180
| `DUCKGRES_CERT` | TLS certificate file | `./certs/server.crt` |
171181
| `DUCKGRES_KEY` | TLS private key file | `./certs/server.key` |
@@ -208,6 +218,9 @@ Options:
208218
-host string Host to bind to
209219
-port int Port to listen on
210220
-flight-port int Control-plane Arrow Flight SQL ingress port, 0=disabled
221+
-flight-session-idle-ttl string Flight auth session idle TTL (e.g., '10m')
222+
-flight-session-reap-interval string Flight auth session reap interval (e.g., '1m')
223+
-flight-handle-idle-ttl string Flight prepared/query handle idle TTL (e.g., '15m')
211224
-data-dir string Directory for DuckDB files
212225
-cert string TLS certificate file
213226
-key string TLS private key file

config_resolution.go

Lines changed: 92 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,26 @@ import (
1010
type configCLIInputs struct {
1111
Set map[string]bool
1212

13-
Host string
14-
Port int
15-
FlightPort int
16-
DataDir string
17-
CertFile string
18-
KeyFile string
19-
ProcessIsolation bool
20-
IdleTimeout string
21-
MemoryLimit string
22-
Threads int
23-
MemoryBudget string
24-
MemoryRebalance bool
25-
MaxWorkers int
26-
MinWorkers int
27-
ACMEDomain string
28-
ACMEEmail string
29-
ACMECacheDir string
13+
Host string
14+
Port int
15+
FlightPort int
16+
FlightSessionIdleTTL string
17+
FlightSessionReapInterval string
18+
FlightHandleIdleTTL string
19+
DataDir string
20+
CertFile string
21+
KeyFile string
22+
ProcessIsolation bool
23+
IdleTimeout string
24+
MemoryLimit string
25+
Threads int
26+
MemoryBudget string
27+
MemoryRebalance bool
28+
MaxWorkers int
29+
MinWorkers int
30+
ACMEDomain string
31+
ACMEEmail string
32+
ACMECacheDir string
3033
}
3134

3235
type resolvedConfig struct {
@@ -35,12 +38,15 @@ type resolvedConfig struct {
3538

3639
func defaultServerConfig() server.Config {
3740
return server.Config{
38-
Host: "0.0.0.0",
39-
Port: 5432,
40-
FlightPort: 0,
41-
DataDir: "./data",
42-
TLSCertFile: "./certs/server.crt",
43-
TLSKeyFile: "./certs/server.key",
41+
Host: "0.0.0.0",
42+
Port: 5432,
43+
FlightPort: 0,
44+
FlightSessionIdleTTL: 10 * time.Minute,
45+
FlightSessionReapInterval: 1 * time.Minute,
46+
FlightHandleIdleTTL: 15 * time.Minute,
47+
DataDir: "./data",
48+
TLSCertFile: "./certs/server.crt",
49+
TLSKeyFile: "./certs/server.key",
4450
Users: map[string]string{
4551
"postgres": "postgres",
4652
},
@@ -71,6 +77,27 @@ func resolveEffectiveConfig(fileCfg *FileConfig, cli configCLIInputs, getenv fun
7177
if fileCfg.FlightPort != 0 {
7278
cfg.FlightPort = fileCfg.FlightPort
7379
}
80+
if fileCfg.FlightSessionIdleTTL != "" {
81+
if d, err := time.ParseDuration(fileCfg.FlightSessionIdleTTL); err == nil {
82+
cfg.FlightSessionIdleTTL = d
83+
} else {
84+
warn("Invalid flight_session_idle_ttl duration: " + err.Error())
85+
}
86+
}
87+
if fileCfg.FlightSessionReapInterval != "" {
88+
if d, err := time.ParseDuration(fileCfg.FlightSessionReapInterval); err == nil {
89+
cfg.FlightSessionReapInterval = d
90+
} else {
91+
warn("Invalid flight_session_reap_interval duration: " + err.Error())
92+
}
93+
}
94+
if fileCfg.FlightHandleIdleTTL != "" {
95+
if d, err := time.ParseDuration(fileCfg.FlightHandleIdleTTL); err == nil {
96+
cfg.FlightHandleIdleTTL = d
97+
} else {
98+
warn("Invalid flight_handle_idle_ttl duration: " + err.Error())
99+
}
100+
}
74101
if fileCfg.DataDir != "" {
75102
cfg.DataDir = fileCfg.DataDir
76103
}
@@ -203,6 +230,27 @@ func resolveEffectiveConfig(fileCfg *FileConfig, cli configCLIInputs, getenv fun
203230
warn("Invalid DUCKGRES_FLIGHT_PORT: " + err.Error())
204231
}
205232
}
233+
if v := getenv("DUCKGRES_FLIGHT_SESSION_IDLE_TTL"); v != "" {
234+
if d, err := time.ParseDuration(v); err == nil {
235+
cfg.FlightSessionIdleTTL = d
236+
} else {
237+
warn("Invalid DUCKGRES_FLIGHT_SESSION_IDLE_TTL duration: " + err.Error())
238+
}
239+
}
240+
if v := getenv("DUCKGRES_FLIGHT_SESSION_REAP_INTERVAL"); v != "" {
241+
if d, err := time.ParseDuration(v); err == nil {
242+
cfg.FlightSessionReapInterval = d
243+
} else {
244+
warn("Invalid DUCKGRES_FLIGHT_SESSION_REAP_INTERVAL duration: " + err.Error())
245+
}
246+
}
247+
if v := getenv("DUCKGRES_FLIGHT_HANDLE_IDLE_TTL"); v != "" {
248+
if d, err := time.ParseDuration(v); err == nil {
249+
cfg.FlightHandleIdleTTL = d
250+
} else {
251+
warn("Invalid DUCKGRES_FLIGHT_HANDLE_IDLE_TTL duration: " + err.Error())
252+
}
253+
}
206254
if v := getenv("DUCKGRES_DATA_DIR"); v != "" {
207255
cfg.DataDir = v
208256
}
@@ -316,6 +364,27 @@ func resolveEffectiveConfig(fileCfg *FileConfig, cli configCLIInputs, getenv fun
316364
if cli.Set["flight-port"] {
317365
cfg.FlightPort = cli.FlightPort
318366
}
367+
if cli.Set["flight-session-idle-ttl"] {
368+
if d, err := time.ParseDuration(cli.FlightSessionIdleTTL); err == nil {
369+
cfg.FlightSessionIdleTTL = d
370+
} else {
371+
warn("Invalid --flight-session-idle-ttl duration: " + err.Error())
372+
}
373+
}
374+
if cli.Set["flight-session-reap-interval"] {
375+
if d, err := time.ParseDuration(cli.FlightSessionReapInterval); err == nil {
376+
cfg.FlightSessionReapInterval = d
377+
} else {
378+
warn("Invalid --flight-session-reap-interval duration: " + err.Error())
379+
}
380+
}
381+
if cli.Set["flight-handle-idle-ttl"] {
382+
if d, err := time.ParseDuration(cli.FlightHandleIdleTTL); err == nil {
383+
cfg.FlightHandleIdleTTL = d
384+
} else {
385+
warn("Invalid --flight-handle-idle-ttl duration: " + err.Error())
386+
}
387+
}
319388
if cli.Set["data-dir"] {
320389
cfg.DataDir = cli.DataDir
321390
}

controlplane/control.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,11 @@ func RunControlPlane(cfg ControlPlaneConfig) {
170170
// It is intentionally started after pre-warm to avoid concurrent worker
171171
// creation races between pre-warm and first external Flight requests.
172172
if cfg.FlightPort > 0 {
173-
flightIngress, err := NewFlightIngress(cfg.Host, cfg.FlightPort, tlsCfg, cfg.Users, sessions)
173+
flightIngress, err := NewFlightIngress(cfg.Host, cfg.FlightPort, tlsCfg, cfg.Users, sessions, FlightIngressConfig{
174+
SessionIdleTTL: cfg.FlightSessionIdleTTL,
175+
SessionReapTick: cfg.FlightSessionReapInterval,
176+
HandleIdleTTL: cfg.FlightHandleIdleTTL,
177+
})
174178
if err != nil {
175179
slog.Error("Failed to initialize Flight ingress.", "error", err)
176180
os.Exit(1)
@@ -731,7 +735,11 @@ func (cp *ControlPlane) recoverFlightIngressAfterFailedReload() {
731735
return
732736
}
733737

734-
flightIngress, err := NewFlightIngress(cp.cfg.Host, cp.cfg.FlightPort, cp.tlsConfig, cp.cfg.Users, cp.sessions)
738+
flightIngress, err := NewFlightIngress(cp.cfg.Host, cp.cfg.FlightPort, cp.tlsConfig, cp.cfg.Users, cp.sessions, FlightIngressConfig{
739+
SessionIdleTTL: cp.cfg.FlightSessionIdleTTL,
740+
SessionReapTick: cp.cfg.FlightSessionReapInterval,
741+
HandleIdleTTL: cp.cfg.FlightHandleIdleTTL,
742+
})
735743
if err != nil {
736744
slog.Error("Failed to recover Flight ingress after reload failure.", "error", err)
737745
return

0 commit comments

Comments
 (0)