Skip to content

Commit 3d85cb1

Browse files
committed
docs: add ROADMAP.md with health check hardening items
fix: address PR #71 review comments - Add 'validate' to available help topics hint message - Use EXIT_USAGE (1) instead of EXIT_RUNTIME (5) for config parse failures in execute_validate - Add transformCapacity check to /health/ready endpoint - Skip memoryUsage check in /health/ready when RSS is unavailable (non-Linux) - Add 'validate' command to README Commands table - Add platform-specific note to OpenAPI /health/ready description fix: resolve fmt failure and cross-platform test failure - Run cargo fmt to fix formatting in cli.rs and mod.rs - Fix health_ready_memory_check_includes_details test to expect memoryUsage check absent on non-Linux (RSS unavailable)
1 parent 6cdc27d commit 3d85cb1

File tree

5 files changed

+55
-28
lines changed

5 files changed

+55
-28
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ See the [Docker](#docker) section for running with Docker instead.
203203
| `convert` | Convert and transform an image file (can be omitted; see above) |
204204
| `inspect` | Show metadata (format, dimensions, alpha) of an image |
205205
| `serve` | Start the HTTP image-transform server (implied when server flags are used at the top level) |
206+
| `validate` | Validate server configuration without starting the server (useful in CI/CD) |
206207
| `sign` | Generate a signed public URL for the server |
207208
| `completions` | Generate shell completion scripts |
208209
| `version` | Print version information |

ROADMAP.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Roadmap
2+
3+
Items planned for future releases, roughly in priority order.
4+
5+
## Health Check Hardening
6+
7+
- [#72 — Add hysteresis to readiness probe resource checks to prevent flapping](https://github.com/nao1215/truss/issues/72)
8+
- [#73 — Consider authentication for /health diagnostic endpoint](https://github.com/nao1215/truss/issues/73)
9+
- [#74 — Cache syscall results in health check endpoints](https://github.com/nao1215/truss/issues/74)

doc/openapi.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,15 @@ paths:
394394
verifying the storage root is accessible, the cache root (if configured)
395395
is accessible, the object-storage backend (if configured) is
396396
network-reachable, cache disk free space is above the configured
397-
threshold (`TRUSS_HEALTH_CACHE_MIN_FREE_BYTES`), and process memory
398-
usage is below the configured limit (`TRUSS_HEALTH_MAX_MEMORY_BYTES`).
397+
threshold (`TRUSS_HEALTH_CACHE_MIN_FREE_BYTES`), process memory
398+
usage is below the configured limit (`TRUSS_HEALTH_MAX_MEMORY_BYTES`),
399+
and transform capacity is not exhausted.
400+
401+
**Platform note:** RSS memory and cache disk free space are read on
402+
Linux only. On other platforms `TRUSS_HEALTH_MAX_MEMORY_BYTES` and
403+
`TRUSS_HEALTH_CACHE_MIN_FREE_BYTES` are effectively no-ops — those
404+
checks are omitted from the response and the remaining checks
405+
(object-storage reachability, transform capacity, etc.) still apply.
399406
400407
The object-storage reachability check (S3, GCS, or Azure) confirms
401408
that the configured bucket/container exists and the service responds to

src/adapters/cli.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,8 @@ fn parse_help_topic(topic: Option<String>) -> Result<Command, CliError> {
10591059
message: format!("unknown help topic '{other}'"),
10601060
usage: None,
10611061
hint: Some(
1062-
"available topics: convert, inspect, serve, sign, completions, version".to_string(),
1062+
"available topics: convert, inspect, serve, validate, sign, completions, version"
1063+
.to_string(),
10631064
),
10641065
}),
10651066
}
@@ -1499,7 +1500,7 @@ fn execute_validate<W: Write>(stdout: &mut W) -> Result<(), CliError> {
14991500
Ok(())
15001501
}
15011502
Err(error) => Err(runtime_error(
1502-
EXIT_RUNTIME,
1503+
EXIT_USAGE,
15031504
&format!("invalid configuration: {error}"),
15041505
)),
15051506
}

src/adapters/server/mod.rs

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,8 +1069,7 @@ fn handle_stream(mut stream: TcpStream, config: &ServerConfig) -> io::Result<()>
10691069
}
10701070

10711071
requests_served += 1;
1072-
let close_after =
1073-
client_wants_close || requests_served >= config.keep_alive_max_requests;
1072+
let close_after = client_wants_close || requests_served >= config.keep_alive_max_requests;
10741073

10751074
write_response(&mut stream, response, close_after)?;
10761075
record_http_request_duration(route, start);
@@ -1431,19 +1430,33 @@ fn handle_health_ready(config: &ServerConfig) -> HttpResponse {
14311430
}
14321431
}
14331432

1434-
if let Some(max_mem) = config.health_max_memory_bytes {
1435-
let rss = process_rss_bytes();
1436-
let ok = rss.is_none_or(|r| r <= max_mem);
1433+
// Concurrency utilization
1434+
let in_flight = config.transforms_in_flight.load(Ordering::Relaxed);
1435+
let overloaded = in_flight >= config.max_concurrent_transforms;
1436+
checks.push(json!({
1437+
"name": "transformCapacity",
1438+
"status": if overloaded { "fail" } else { "ok" },
1439+
"current": in_flight,
1440+
"max": config.max_concurrent_transforms,
1441+
}));
1442+
if overloaded {
1443+
all_ok = false;
1444+
}
1445+
1446+
// Memory usage (Linux only) — skip entirely when RSS is unavailable
1447+
if let Some(rss_bytes) = process_rss_bytes() {
1448+
let threshold = config.health_max_memory_bytes;
1449+
let mem_ok = threshold.is_none_or(|max| rss_bytes <= max);
14371450
let mut check = json!({
14381451
"name": "memoryUsage",
1439-
"status": if ok { "ok" } else { "fail" },
1452+
"status": if mem_ok { "ok" } else { "fail" },
1453+
"rssBytes": rss_bytes,
14401454
});
1441-
if let Some(r) = rss {
1442-
check["rssBytes"] = json!(r);
1455+
if let Some(max) = threshold {
1456+
check["thresholdBytes"] = json!(max);
14431457
}
1444-
check["thresholdBytes"] = json!(max_mem);
14451458
checks.push(check);
1446-
if !ok {
1459+
if !mem_ok {
14471460
all_ok = false;
14481461
}
14491462
}
@@ -5530,8 +5543,7 @@ mod tests {
55305543
&config,
55315544
);
55325545
assert_eq!(response.status, "503 Service Unavailable");
5533-
let body: serde_json::Value =
5534-
serde_json::from_slice(&response.body).expect("parse body");
5546+
let body: serde_json::Value = serde_json::from_slice(&response.body).expect("parse body");
55355547
assert_eq!(body["status"], "fail");
55365548
let checks = body["checks"].as_array().expect("checks array");
55375549
let storage_check = checks
@@ -5557,8 +5569,7 @@ mod tests {
55575569
},
55585570
&config,
55595571
);
5560-
let body: serde_json::Value =
5561-
serde_json::from_slice(&response.body).expect("parse body");
5572+
let body: serde_json::Value = serde_json::from_slice(&response.body).expect("parse body");
55625573
let checks = body["checks"].as_array().expect("checks array");
55635574
let disk_check = checks
55645575
.iter()
@@ -5585,8 +5596,7 @@ mod tests {
55855596
},
55865597
&config,
55875598
);
5588-
let body: serde_json::Value =
5589-
serde_json::from_slice(&response.body).expect("parse body");
5599+
let body: serde_json::Value = serde_json::from_slice(&response.body).expect("parse body");
55905600
let checks = body["checks"].as_array().expect("checks array");
55915601
assert!(
55925602
checks.iter().all(|c| c["name"] != "cacheDiskFree"),
@@ -5609,17 +5619,16 @@ mod tests {
56095619
},
56105620
&config,
56115621
);
5612-
let body: serde_json::Value =
5613-
serde_json::from_slice(&response.body).expect("parse body");
5622+
let body: serde_json::Value = serde_json::from_slice(&response.body).expect("parse body");
56145623
let checks = body["checks"].as_array().expect("checks array");
5615-
let mem = checks
5616-
.iter()
5617-
.find(|c| c["name"] == "memoryUsage")
5618-
.expect("memoryUsage check");
5619-
assert_eq!(mem["status"], "ok");
5620-
assert_eq!(mem["thresholdBytes"], u64::MAX);
5624+
let mem = checks.iter().find(|c| c["name"] == "memoryUsage");
56215625
if cfg!(target_os = "linux") {
5626+
let mem = mem.expect("memoryUsage check present on Linux");
5627+
assert_eq!(mem["status"], "ok");
5628+
assert_eq!(mem["thresholdBytes"], u64::MAX);
56225629
assert!(mem["rssBytes"].as_u64().is_some());
5630+
} else {
5631+
assert!(mem.is_none(), "memoryUsage should be absent on non-Linux");
56235632
}
56245633
}
56255634
}

0 commit comments

Comments
 (0)