Skip to content

Commit 5771b5d

Browse files
committed
Replace poor man's Config with something proper
1 parent 6a38f0a commit 5771b5d

File tree

5 files changed

+137
-204
lines changed

5 files changed

+137
-204
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ tokio = { version = "1", features = [
6767
] }
6868
tracing = "0.1"
6969
mock_instant = { version = "0.6", optional = true }
70-
clap = { version = "4", features = ["derive"], optional = true }
70+
clap = { version = "4", features = ["derive", "env"] }
7171
rand = { version = "0.9", optional = true }
7272
rand_distr = { version = "0.5", optional = true }
7373
http = { version = "1", optional = true }
@@ -86,5 +86,5 @@ testcontainers-modules = { version = "0.11", features = ["minio"] }
8686

8787
[features]
8888
mock-clock = ["dep:mock_instant"]
89-
sim = ["dep:clap", "dep:rand", "dep:rand_distr", "dep:http"]
89+
sim = ["dep:rand", "dep:rand_distr", "dep:http"]
9090
docker-tests = []

src/bin/s3_cache.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::collections::HashMap;
2-
1+
use clap::Parser;
32
use s3_cache::{Config, start_app};
43

54
fn main() {
@@ -8,11 +7,8 @@ fn main() {
87
// other thread makes sure that this invariant is upheld.
98
unsafe { std::env::set_var("AWS_EC2_METADATA_DISABLED", "true") };
109

11-
let vars: HashMap<String, String> = std::env::vars().collect();
12-
let config = Config::from_env(&vars);
13-
14-
// Drop ENV hashmap to free memory before we start the server.
15-
drop(vars);
10+
let config = Config::parse();
11+
config.validate();
1612

1713
let runtime = tokio::runtime::Builder::new_multi_thread()
1814
.enable_all()

src/config.rs

Lines changed: 108 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -1,134 +1,97 @@
1-
use std::{
2-
collections::HashMap,
3-
fmt::{Display, Formatter},
4-
net::SocketAddr,
5-
};
1+
use std::fmt::{Display, Formatter};
2+
use std::net::SocketAddr;
63

7-
/// Configuration for the S3 caching proxy server.
4+
use clap::Parser;
5+
6+
/// S3-compatible caching proxy.
87
///
9-
/// All settings can be loaded from environment variables via [`from_env`](Self::from_env).
8+
/// All options can be set as CLI flags (`--upstream-endpoint`) or environment variables
9+
/// (`UPSTREAM_ENDPOINT`). CLI flags take precedence over environment variables.
10+
#[derive(Parser)]
11+
#[command(version, about = "S3-compatible caching proxy")]
1012
pub struct Config {
13+
/// Proxy listen address
14+
#[arg(long, env = "LISTEN_ADDR", default_value = "0.0.0.0:8080")]
1115
pub listen_addr: SocketAddr,
16+
17+
/// S3-compatible upstream endpoint URL
18+
#[arg(long, env = "UPSTREAM_ENDPOINT")]
1219
pub upstream_endpoint: String,
20+
21+
/// Access key for upstream S3
22+
#[arg(long, env = "UPSTREAM_ACCESS_KEY_ID")]
1323
pub upstream_access_key_id: String,
24+
25+
/// Secret key for upstream S3
26+
#[arg(long, env = "UPSTREAM_SECRET_ACCESS_KEY")]
1427
pub upstream_secret_access_key: String,
28+
29+
/// AWS region for signing upstream requests.
30+
/// Must match the region your MinIO/S3 backend is configured with, or `us-east-1`
31+
/// (MinIO accepts `us-east-1` as a backward-compatibility alias for any region).
32+
#[arg(long, env = "UPSTREAM_REGION", default_value = "us-east-1")]
1533
pub upstream_region: String,
34+
35+
/// Access key accepted from proxy clients
36+
#[arg(long, env = "CLIENT_ACCESS_KEY_ID")]
1637
pub client_access_key_id: String,
38+
39+
/// Secret key accepted from proxy clients
40+
#[arg(long, env = "CLIENT_SECRET_ACCESS_KEY")]
1741
pub client_secret_access_key: String,
42+
43+
/// Enable caching
44+
#[arg(long, env = "CACHE_ENABLED", default_value_t = true, action = clap::ArgAction::Set)]
1845
pub cache_enabled: bool,
19-
pub cache_dryrun: bool,
46+
47+
/// Dry-run mode: serve from cache but do not write new entries
48+
#[arg(long, env = "CACHE_DRY_RUN", default_value_t = false, action = clap::ArgAction::Set)]
49+
pub cache_dry_run: bool,
50+
51+
/// Number of cache shards
52+
#[arg(long, env = "CACHE_SHARDS", default_value_t = 16)]
2053
pub cache_shards: usize,
54+
55+
/// Maximum number of cache entries
56+
#[arg(long, env = "CACHE_MAX_ENTRIES", default_value_t = 10_000)]
2157
pub cache_max_entries: usize,
58+
59+
/// Maximum cache size in bytes (default: 1 GB)
60+
#[arg(long, env = "CACHE_MAX_SIZE_BYTES", default_value_t = 1_073_741_824)]
2261
pub cache_max_size_bytes: usize,
62+
63+
/// Maximum cacheable object size in bytes (default: 10 MB)
64+
#[arg(
65+
long,
66+
env = "CACHE_MAX_OBJECT_SIZE_BYTES",
67+
default_value_t = 10_485_760
68+
)]
2369
pub cache_max_object_size_bytes: usize,
70+
71+
/// Cache time-to-live in seconds (default: 24 hours)
72+
#[arg(long, env = "CACHE_TTL_SECONDS", default_value_t = 86_400)]
2473
pub cache_ttl_seconds: usize,
74+
75+
/// Tokio worker thread count
76+
#[arg(long, env = "WORKER_THREADS", default_value_t = 4)]
2577
pub worker_threads: usize,
78+
79+
/// OpenTelemetry OTLP gRPC endpoint
80+
#[arg(long, env = "OTEL_GRPC_ENDPOINT_URL")]
2681
pub otel_grpc_endpoint_url: Option<String>,
82+
83+
/// Prometheus textfile collector directory
84+
#[arg(long, env = "PROMETHEUS_TEXTFILE_DIR")]
2785
pub prometheus_textfile_dir: Option<String>,
2886
}
2987

3088
impl Config {
31-
/// Loads configuration from environment variables.
32-
///
33-
/// # Required environment variables
34-
///
35-
/// - `UPSTREAM_ENDPOINT`: S3-compatible endpoint URL
36-
/// - `UPSTREAM_ACCESS_KEY_ID`: Access key for upstream S3
37-
/// - `UPSTREAM_SECRET_ACCESS_KEY`: Secret key for upstream S3
38-
/// - `CLIENT_ACCESS_KEY_ID`: Access key for proxy clients
39-
/// - `CLIENT_SECRET_ACCESS_KEY`: Secret key for proxy clients
40-
///
41-
/// # Optional environment variables with defaults
42-
///
43-
/// - `LISTEN_ADDR`: Proxy listen address (default: `0.0.0.0:8080`)
44-
/// - `UPSTREAM_REGION`: AWS region used to sign upstream requests (default: `us-east-1`).
45-
/// Must match the region your MinIO/S3 backend is configured with, **or** `us-east-1`
46-
/// (MinIO accepts `us-east-1` as a backward-compatibility alias for any configured region).
47-
/// - `CACHE_ENABLED`: Enable caching (default: `true`)
48-
/// - `CACHE_DRYRUN`: Dry-run mode for cache validation (default: `false`)
49-
/// - `CACHE_SHARDS`: Number of cache shards (default: `16`)
50-
/// - `CACHE_MAX_ENTRIES`: Maximum cache entries (default: `10000`)
51-
/// - `CACHE_MAX_SIZE_BYTES`: Maximum cache size in bytes (default: `1073741824` = 1 GB)
52-
/// - `CACHE_MAX_OBJECT_SIZE_BYTES`: Maximum cacheable object size (default: `10485760` = 10 MB)
53-
/// - `CACHE_TTL_SECONDS`: Cache time-to-live in seconds (default: `86400` = 24 hours)
54-
/// - `WORKER_THREADS`: Tokio worker threads (default: `4`)
55-
/// - `OTEL_GRPC_ENDPOINT_URL`: OpenTelemetry OTLP endpoint (optional)
56-
/// - `PROMETHEUS_TEXTFILE_DIR`: Prometheus textfile collector directory (optional)
89+
/// Validates cross-field constraints. Call this after parsing.
5790
///
5891
/// # Panics
5992
///
60-
/// Panics if required variables are missing or if validation fails.
61-
pub fn from_env(vars: &HashMap<String, String>) -> Self {
62-
let config = Self {
63-
listen_addr: vars
64-
.get("LISTEN_ADDR")
65-
.map(|s| s.parse().expect("invalid LISTEN_ADDR"))
66-
.unwrap_or_else(|| "0.0.0.0:8080".parse().unwrap()),
67-
upstream_endpoint: vars
68-
.get("UPSTREAM_ENDPOINT")
69-
.cloned()
70-
.expect("UPSTREAM_ENDPOINT is required"),
71-
upstream_access_key_id: vars
72-
.get("UPSTREAM_ACCESS_KEY_ID")
73-
.cloned()
74-
.expect("UPSTREAM_ACCESS_KEY_ID is required"),
75-
upstream_secret_access_key: vars
76-
.get("UPSTREAM_SECRET_ACCESS_KEY")
77-
.cloned()
78-
.expect("UPSTREAM_SECRET_ACCESS_KEY is required"),
79-
upstream_region: vars
80-
.get("UPSTREAM_REGION")
81-
.cloned()
82-
.unwrap_or_else(|| "us-east-1".to_string()),
83-
client_access_key_id: vars
84-
.get("CLIENT_ACCESS_KEY_ID")
85-
.cloned()
86-
.expect("CLIENT_ACCESS_KEY_ID is required"),
87-
client_secret_access_key: vars
88-
.get("CLIENT_SECRET_ACCESS_KEY")
89-
.cloned()
90-
.expect("CLIENT_SECRET_ACCESS_KEY is required"),
91-
cache_enabled: vars
92-
.get("CACHE_ENABLED")
93-
.map(|s| s.parse().expect("invalid CACHE_ENABLED"))
94-
.unwrap_or(true),
95-
cache_dryrun: vars
96-
.get("CACHE_DRYRUN")
97-
.map(|s| s.parse().expect("invalid CACHE_DRYRUN"))
98-
.unwrap_or(false),
99-
cache_shards: vars
100-
.get("CACHE_SHARDS")
101-
.map(|s| s.parse().expect("invalid CACHE_SHARDS"))
102-
.unwrap_or(16),
103-
cache_max_entries: vars
104-
.get("CACHE_MAX_ENTRIES")
105-
.map(|s| s.parse().expect("invalid CACHE_MAX_ENTRIES"))
106-
.unwrap_or(10_000),
107-
cache_max_size_bytes: vars
108-
.get("CACHE_MAX_SIZE_BYTES")
109-
.map(|s| s.parse().expect("invalid CACHE_MAX_SIZE_BYTES"))
110-
.unwrap_or(1_073_741_824),
111-
cache_max_object_size_bytes: vars
112-
.get("CACHE_MAX_OBJECT_SIZE_BYTES")
113-
.map(|s: &String| s.parse().expect("invalid CACHE_MAX_OBJECT_SIZE_BYTES"))
114-
.unwrap_or(10_485_760),
115-
cache_ttl_seconds: vars
116-
.get("CACHE_TTL_SECONDS")
117-
.map(|s| s.parse().expect("invalid CACHE_TTL_SECONDS"))
118-
.unwrap_or(86_400),
119-
worker_threads: vars
120-
.get("WORKER_THREADS")
121-
.map(|s| s.parse().expect("invalid WORKER_THREADS"))
122-
.unwrap_or(4),
123-
otel_grpc_endpoint_url: vars.get("OTEL_GRPC_ENDPOINT_URL").cloned(),
124-
prometheus_textfile_dir: vars.get("PROMETHEUS_TEXTFILE_DIR").cloned(),
125-
};
126-
127-
config.validate();
128-
config
129-
}
130-
131-
fn validate(&self) {
93+
/// Panics if any constraint is violated.
94+
pub fn validate(&self) {
13295
if self.cache_max_size_bytes < self.cache_max_object_size_bytes {
13396
panic!(
13497
"Invalid configuration: cache_max_size_bytes ({}) must be >= max_cacheable_object_size ({})",
@@ -161,7 +124,7 @@ impl Display for Config {
161124
"Config{{ listen_addr: {}, upstream_endpoint: {}, upstream_region: {}, \
162125
cache_max_entries: {}, cache_max_size_bytes: {}, cache_ttl_seconds: {}, \
163126
max_cacheable_object_size: {}, otel_grpc_endpoint_url: {:?}, cache_shards: {}, \
164-
cache_dryrun: {}, worker_threads: {}, prometheus_textfile_dir: {:?} }}",
127+
cache_dry_run: {}, worker_threads: {}, prometheus_textfile_dir: {:?} }}",
165128
self.listen_addr,
166129
self.upstream_endpoint,
167130
self.upstream_region,
@@ -171,7 +134,7 @@ impl Display for Config {
171134
self.cache_max_object_size_bytes,
172135
self.otel_grpc_endpoint_url,
173136
self.cache_shards,
174-
self.cache_dryrun,
137+
self.cache_dry_run,
175138
self.worker_threads,
176139
self.prometheus_textfile_dir,
177140
)
@@ -180,34 +143,36 @@ impl Display for Config {
180143

181144
#[cfg(test)]
182145
mod tests {
146+
use std::net::SocketAddr;
147+
183148
use super::*;
184149

185-
fn minimal_env() -> HashMap<String, String> {
186-
let mut env = HashMap::new();
187-
env.insert(
188-
"UPSTREAM_ENDPOINT".to_string(),
189-
"http://minio:9000".to_string(),
190-
);
191-
env.insert(
192-
"UPSTREAM_ACCESS_KEY_ID".to_string(),
193-
"minioadmin".to_string(),
194-
);
195-
env.insert(
196-
"UPSTREAM_SECRET_ACCESS_KEY".to_string(),
197-
"minioadmin".to_string(),
198-
);
199-
env.insert("CLIENT_ACCESS_KEY_ID".to_string(), "testclient".to_string());
200-
env.insert(
201-
"CLIENT_SECRET_ACCESS_KEY".to_string(),
202-
"testclient".to_string(),
203-
);
204-
env
150+
fn minimal_config() -> Config {
151+
Config {
152+
listen_addr: "0.0.0.0:8080".parse::<SocketAddr>().unwrap(),
153+
upstream_endpoint: "http://minio:9000".to_string(),
154+
upstream_access_key_id: "minioadmin".to_string(),
155+
upstream_secret_access_key: "minioadmin".to_string(),
156+
upstream_region: "us-east-1".to_string(),
157+
client_access_key_id: "testclient".to_string(),
158+
client_secret_access_key: "testclient".to_string(),
159+
cache_enabled: true,
160+
cache_dry_run: false,
161+
cache_shards: 16,
162+
cache_max_entries: 10_000,
163+
cache_max_size_bytes: 1_073_741_824,
164+
cache_max_object_size_bytes: 10_485_760,
165+
cache_ttl_seconds: 86_400,
166+
worker_threads: 4,
167+
otel_grpc_endpoint_url: None,
168+
prometheus_textfile_dir: None,
169+
}
205170
}
206171

207172
#[test]
208173
fn config_valid() {
209-
let env = minimal_env();
210-
let config = Config::from_env(&env);
174+
let config = minimal_config();
175+
config.validate();
211176
assert_eq!(config.cache_max_entries, 10_000);
212177
assert_eq!(config.cache_max_size_bytes, 1_073_741_824);
213178
assert_eq!(config.cache_max_object_size_bytes, 10_485_760);
@@ -216,36 +181,33 @@ mod tests {
216181
#[test]
217182
#[should_panic(expected = "cache_max_size_bytes")]
218183
fn config_max_size_too_small() {
219-
let mut env = minimal_env();
220-
env.insert("CACHE_MAX_SIZE_BYTES".to_string(), "1000".to_string());
221-
env.insert(
222-
"CACHE_MAX_OBJECT_SIZE_BYTES".to_string(),
223-
"2000".to_string(),
224-
);
225-
Config::from_env(&env);
184+
let mut config = minimal_config();
185+
config.cache_max_size_bytes = 1000;
186+
config.cache_max_object_size_bytes = 2000;
187+
config.validate();
226188
}
227189

228190
#[test]
229191
#[should_panic(expected = "cache_ttl_seconds")]
230192
fn config_zero_ttl() {
231-
let mut env = minimal_env();
232-
env.insert("CACHE_TTL_SECONDS".to_string(), "0".to_string());
233-
Config::from_env(&env);
193+
let mut config = minimal_config();
194+
config.cache_ttl_seconds = 0;
195+
config.validate();
234196
}
235197

236198
#[test]
237199
#[should_panic(expected = "cache_max_entries")]
238200
fn config_zero_max_entries() {
239-
let mut env = minimal_env();
240-
env.insert("CACHE_MAX_ENTRIES".to_string(), "0".to_string());
241-
Config::from_env(&env);
201+
let mut config = minimal_config();
202+
config.cache_max_entries = 0;
203+
config.validate();
242204
}
243205

244206
#[test]
245207
#[should_panic(expected = "worker_threads")]
246208
fn config_zero_worker_threads() {
247-
let mut env = minimal_env();
248-
env.insert("WORKER_THREADS".to_string(), "0".to_string());
249-
Config::from_env(&env);
209+
let mut config = minimal_config();
210+
config.worker_threads = 0;
211+
config.validate();
250212
}
251213
}

0 commit comments

Comments
 (0)