Skip to content

Commit c0342fd

Browse files
committed
fix(auth): validate sandbox pod owner references
Require Kubernetes ServiceAccount bootstrap to validate the live pod's controlling Sandbox ownerReference before minting a gateway sandbox JWT. The K8s resolver now verifies the pod sandbox-id annotation, the controlling Sandbox CR UID, and the Sandbox CR sandbox-id label in addition to TokenReview and live pod UID checks. Update gateway architecture and user-facing auth docs to describe the additional Kubernetes bootstrap binding checks. Tested with focused k8s_sa tests, full pre-commit, and a fresh Helm dev cluster sandbox create.
1 parent 94853bb commit c0342fd

5 files changed

Lines changed: 277 additions & 26 deletions

File tree

architecture/gateway.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ Podman, and VM drivers deliver the initial token through supervisor-only
5656
runtime material; Kubernetes supervisors exchange a projected ServiceAccount
5757
token through `IssueSandboxToken`. The gateway validates that projected token
5858
with Kubernetes `TokenReview`, requires the configured sandbox service account,
59-
checks the returned pod binding against the live pod UID, and reads the pod's
60-
sandbox annotation before minting the gateway JWT. Supervisors renew gateway
61-
JWTs in memory before expiry only while the sandbox record still exists. Older
62-
tokens are not server-revoked; deployments bound replay exposure with short
59+
checks the returned pod binding against the live pod UID, and verifies the pod's
60+
controlling `Sandbox` ownerReference against the live Sandbox CR UID and
61+
sandbox-id label before minting the gateway JWT. Supervisors renew gateway JWTs
62+
in memory before expiry only while the sandbox record still exists. Older tokens
63+
are not server-revoked; deployments bound replay exposure with short
6364
`gateway_jwt.ttl_secs` lifetimes.
6465

6566
Gateway JWT signing-key rotation is currently an offline operator action. The

crates/openshell-driver-kubernetes/src/driver.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,9 +1177,10 @@ fn sandbox_template_to_k8s(
11771177
}
11781178
// Carry the sandbox UUID as a pod annotation so the gateway can resolve
11791179
// a projected SA token claim (pod name + uid) back to a sandbox identity
1180-
// when the supervisor calls `IssueSandboxToken` at startup. The gateway's
1181-
// K8s Role does NOT grant `patch pods`, so this annotation is
1182-
// effectively immutable post-create (see plan §11.8).
1180+
// when the supervisor calls `IssueSandboxToken` at startup. The gateway
1181+
// also verifies the pod's controlling Sandbox ownerReference against the
1182+
// live CR before accepting this annotation. Its K8s Role does NOT grant
1183+
// `patch pods`, so this annotation is effectively immutable post-create.
11831184
let mut pod_annotations = platform_config_struct(template, "annotations")
11841185
.and_then(|v| match v {
11851186
serde_json::Value::Object(map) => Some(map),

crates/openshell-server/src/auth/k8s_sa.rs

Lines changed: 265 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
//!
66
//! Path-scoped to `IssueSandboxToken`. Validates a projected SA token
77
//! presented by a sandbox pod, reads the pod's `openshell.io/sandbox-id`
8-
//! annotation, and returns a [`Principal::Sandbox`] with
9-
//! [`SandboxIdentitySource::K8sServiceAccount`]. The `IssueSandboxToken`
10-
//! handler then mints a gateway-signed JWT for that sandbox id; subsequent
11-
//! gRPC calls from the supervisor use the gateway-minted JWT validated by
8+
//! annotation, verifies the pod is controlled by the corresponding Sandbox CR,
9+
//! and returns a [`Principal::Sandbox`] with
10+
//! [`SandboxIdentitySource::K8sServiceAccount`]. The `IssueSandboxToken` handler
11+
//! then mints a gateway-signed JWT for that sandbox id; subsequent gRPC calls
12+
//! from the supervisor use the gateway-minted JWT validated by
1213
//! [`super::sandbox_jwt::SandboxJwtAuthenticator`].
1314
//!
1415
//! This is the only authenticator that talks to the K8s apiserver. It is
@@ -22,7 +23,8 @@ use k8s_openapi::api::{
2223
core::v1::Pod,
2324
};
2425
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
25-
use kube::api::{Api, PostParams};
26+
use kube::api::{Api, ApiResource, PostParams};
27+
use kube::core::{DynamicObject, gvk::GroupVersionKind};
2628
use std::sync::Arc;
2729
use tonic::Status;
2830
use tracing::{debug, info, warn};
@@ -32,10 +34,16 @@ use tracing::{debug, info, warn};
3234
pub const ISSUE_SANDBOX_TOKEN_PATH: &str = "/openshell.v1.OpenShell/IssueSandboxToken";
3335

3436
/// Pod annotation that binds a sandbox pod to its UUID. Set by the
35-
/// Kubernetes compute driver at pod-create time. The gateway treats this
36-
/// annotation as authoritative; the K8s `Role` granted to the gateway must
37-
/// not include `patch pods` (see plan §11.8).
37+
/// Kubernetes compute driver at pod-create time. The gateway accepts this
38+
/// annotation only after validating the pod's `TokenReview` binding, live UID,
39+
/// and owning Sandbox CR. The K8s `Role` granted to the gateway must not
40+
/// include `patch pods` (see plan §11.8).
3841
pub const SANDBOX_ID_ANNOTATION: &str = "openshell.io/sandbox-id";
42+
const SANDBOX_API_GROUP: &str = "agents.x-k8s.io";
43+
const SANDBOX_API_VERSION: &str = "v1alpha1";
44+
const SANDBOX_API_VERSION_FULL: &str = "agents.x-k8s.io/v1alpha1";
45+
const SANDBOX_KIND: &str = "Sandbox";
46+
const SANDBOX_ID_LABEL: &str = "openshell.ai/sandbox-id";
3947
const POD_NAME_EXTRA: &str = "authentication.kubernetes.io/pod-name";
4048
const POD_UID_EXTRA: &str = "authentication.kubernetes.io/pod-uid";
4149

@@ -130,11 +138,18 @@ struct TokenReviewIdentity {
130138
pod_uid: String,
131139
}
132140

141+
#[derive(Debug, Clone, PartialEq, Eq)]
142+
struct SandboxOwnerReference {
143+
name: String,
144+
uid: String,
145+
}
146+
133147
/// Resolver backed by the apiserver's `TokenReview` API and `kube::Client`
134148
/// for the per-pod annotation lookup.
135149
pub struct LiveK8sResolver {
136150
token_reviews_api: Api<TokenReview>,
137151
pods_api: Api<Pod>,
152+
sandboxes_api: Api<DynamicObject>,
138153
expected_audience: String,
139154
sandbox_namespace: String,
140155
expected_service_account: String,
@@ -148,10 +163,16 @@ impl LiveK8sResolver {
148163
expected_service_account: String,
149164
) -> Self {
150165
let token_reviews_api: Api<TokenReview> = Api::all(client.clone());
151-
let pods_api: Api<Pod> = Api::namespaced(client, namespace);
166+
let pods_api: Api<Pod> = Api::namespaced(client.clone(), namespace);
167+
let sandbox_gvk =
168+
GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND);
169+
let sandbox_resource = ApiResource::from_gvk(&sandbox_gvk);
170+
let sandboxes_api: Api<DynamicObject> =
171+
Api::namespaced_with(client, namespace, &sandbox_resource);
152172
Self {
153173
token_reviews_api,
154174
pods_api,
175+
sandboxes_api,
155176
expected_audience,
156177
sandbox_namespace: namespace.to_string(),
157178
expected_service_account,
@@ -234,13 +255,27 @@ impl K8sIdentityResolver for LiveK8sResolver {
234255
return Err(Status::permission_denied("SA token pod UID mismatch"));
235256
}
236257

237-
let sandbox_id = pod
238-
.metadata
239-
.annotations
240-
.as_ref()
241-
.and_then(|a| a.get(SANDBOX_ID_ANNOTATION))
242-
.cloned()
243-
.unwrap_or_default();
258+
let sandbox_id = pod_sandbox_id(&pod)?;
259+
260+
let owner = sandbox_owner_reference(&pod)?;
261+
let sandbox_cr = self.sandboxes_api.get_opt(&owner.name).await.map_err(|e| {
262+
warn!(
263+
pod = %identity.pod_name,
264+
sandbox_owner = %owner.name,
265+
error = %e,
266+
"failed to fetch owning Sandbox CR for pod identity validation"
267+
);
268+
Status::internal(format!("sandbox GET failed: {e}"))
269+
})?;
270+
let Some(sandbox_cr) = sandbox_cr else {
271+
warn!(
272+
pod = %identity.pod_name,
273+
sandbox_owner = %owner.name,
274+
"pod ownerReference points to a Sandbox CR that does not exist"
275+
);
276+
return Err(Status::permission_denied("sandbox owner not found"));
277+
};
278+
validate_sandbox_owner_reference(&owner, &sandbox_id, &sandbox_cr)?;
244279

245280
Ok(Some(ResolvedK8sIdentity {
246281
sandbox_id,
@@ -315,6 +350,93 @@ fn user_extra_one(user: &UserInfo, key: &str) -> Result<String, Status> {
315350
Ok(values[0].clone())
316351
}
317352

353+
#[allow(clippy::result_large_err)]
354+
fn pod_sandbox_id(pod: &Pod) -> Result<String, Status> {
355+
let sandbox_id = pod
356+
.metadata
357+
.annotations
358+
.as_ref()
359+
.and_then(|a| a.get(SANDBOX_ID_ANNOTATION))
360+
.cloned()
361+
.unwrap_or_default();
362+
if sandbox_id.is_empty() {
363+
return Err(Status::permission_denied(
364+
"pod is not bound to a sandbox identity",
365+
));
366+
}
367+
Ok(sandbox_id)
368+
}
369+
370+
#[allow(clippy::result_large_err)]
371+
fn sandbox_owner_reference(pod: &Pod) -> Result<SandboxOwnerReference, Status> {
372+
let owner_refs = pod.metadata.owner_references.as_deref().unwrap_or_default();
373+
let mut sandbox_refs = owner_refs.iter().filter(|owner| {
374+
owner.api_version == SANDBOX_API_VERSION_FULL && owner.kind == SANDBOX_KIND
375+
});
376+
let Some(owner) = sandbox_refs.next() else {
377+
return Err(Status::permission_denied(
378+
"pod is not controlled by an OpenShell Sandbox",
379+
));
380+
};
381+
if sandbox_refs.next().is_some() {
382+
return Err(Status::permission_denied(
383+
"pod has multiple OpenShell Sandbox owners",
384+
));
385+
}
386+
if owner.controller != Some(true) {
387+
return Err(Status::permission_denied(
388+
"pod Sandbox ownerReference is not controlling",
389+
));
390+
}
391+
if owner.name.is_empty() || owner.uid.is_empty() {
392+
return Err(Status::permission_denied(
393+
"pod Sandbox ownerReference is incomplete",
394+
));
395+
}
396+
Ok(SandboxOwnerReference {
397+
name: owner.name.clone(),
398+
uid: owner.uid.clone(),
399+
})
400+
}
401+
402+
#[allow(clippy::result_large_err)]
403+
fn validate_sandbox_owner_reference(
404+
owner: &SandboxOwnerReference,
405+
sandbox_id: &str,
406+
sandbox_cr: &DynamicObject,
407+
) -> Result<(), Status> {
408+
let actual_uid = sandbox_cr.metadata.uid.as_deref().unwrap_or_default();
409+
if actual_uid != owner.uid {
410+
warn!(
411+
sandbox_owner = %owner.name,
412+
owner_uid = %owner.uid,
413+
actual_uid = %actual_uid,
414+
"pod Sandbox ownerReference UID does not match live Sandbox CR"
415+
);
416+
return Err(Status::permission_denied("sandbox owner UID mismatch"));
417+
}
418+
419+
let actual_sandbox_id = sandbox_cr
420+
.metadata
421+
.labels
422+
.as_ref()
423+
.and_then(|labels| labels.get(SANDBOX_ID_LABEL))
424+
.map(String::as_str)
425+
.unwrap_or_default();
426+
if actual_sandbox_id != sandbox_id {
427+
warn!(
428+
sandbox_owner = %owner.name,
429+
owner_uid = %owner.uid,
430+
pod_sandbox_id = %sandbox_id,
431+
cr_sandbox_id = %actual_sandbox_id,
432+
"pod sandbox annotation does not match owning Sandbox CR label"
433+
);
434+
return Err(Status::permission_denied("sandbox owner ID mismatch"));
435+
}
436+
437+
Ok(())
438+
}
439+
318440
#[cfg(test)]
319441
pub mod test_support {
320442
use super::*;
@@ -352,6 +474,7 @@ pub mod test_support {
352474
mod tests {
353475
use super::test_support::FakeResolver;
354476
use super::*;
477+
use k8s_openapi::apimachinery::pkg::apis::meta::v1::OwnerReference;
355478
use std::collections::BTreeMap;
356479

357480
fn bearer_headers(token: &str) -> http::HeaderMap {
@@ -391,6 +514,52 @@ mod tests {
391514
}
392515
}
393516

517+
fn sandbox_owner(name: &str, uid: &str) -> OwnerReference {
518+
OwnerReference {
519+
api_version: SANDBOX_API_VERSION_FULL.to_string(),
520+
block_owner_deletion: None,
521+
controller: Some(true),
522+
kind: SANDBOX_KIND.to_string(),
523+
name: name.to_string(),
524+
uid: uid.to_string(),
525+
}
526+
}
527+
528+
fn pod_with_owner_refs(owner_references: Vec<OwnerReference>) -> Pod {
529+
Pod {
530+
metadata: ObjectMeta {
531+
owner_references: Some(owner_references),
532+
..Default::default()
533+
},
534+
..Default::default()
535+
}
536+
}
537+
538+
fn pod_with_sandbox_id(sandbox_id: Option<&str>) -> Pod {
539+
Pod {
540+
metadata: ObjectMeta {
541+
annotations: sandbox_id.map(|id| {
542+
BTreeMap::from([(SANDBOX_ID_ANNOTATION.to_string(), id.to_string())])
543+
}),
544+
..Default::default()
545+
},
546+
..Default::default()
547+
}
548+
}
549+
550+
fn sandbox_cr(name: &str, uid: &str, sandbox_id: &str) -> DynamicObject {
551+
let sandbox_gvk =
552+
GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND);
553+
let sandbox_resource = ApiResource::from_gvk(&sandbox_gvk);
554+
let mut cr = DynamicObject::new(name, &sandbox_resource);
555+
cr.metadata.uid = Some(uid.to_string());
556+
cr.metadata.labels = Some(BTreeMap::from([(
557+
SANDBOX_ID_LABEL.to_string(),
558+
sandbox_id.to_string(),
559+
)]));
560+
cr
561+
}
562+
394563
#[test]
395564
fn token_review_identity_extracts_pod_binding() {
396565
let status = token_review_status(
@@ -491,6 +660,86 @@ mod tests {
491660
assert_eq!(err.code(), tonic::Code::PermissionDenied);
492661
}
493662

663+
#[test]
664+
fn pod_sandbox_id_requires_annotation() {
665+
assert_eq!(
666+
pod_sandbox_id(&pod_with_sandbox_id(Some("sandbox-id-a"))).unwrap(),
667+
"sandbox-id-a"
668+
);
669+
670+
let err = pod_sandbox_id(&pod_with_sandbox_id(None))
671+
.expect_err("missing sandbox-id annotation must fail");
672+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
673+
}
674+
675+
#[test]
676+
fn sandbox_owner_reference_extracts_controlling_sandbox_owner() {
677+
let pod = pod_with_owner_refs(vec![sandbox_owner("sandbox-a", "cr-uid-a")]);
678+
679+
let owner = sandbox_owner_reference(&pod).expect("expected Sandbox owner");
680+
681+
assert_eq!(
682+
owner,
683+
SandboxOwnerReference {
684+
name: "sandbox-a".to_string(),
685+
uid: "cr-uid-a".to_string(),
686+
}
687+
);
688+
}
689+
690+
#[test]
691+
fn sandbox_owner_reference_rejects_missing_owner() {
692+
let pod = pod_with_owner_refs(vec![]);
693+
694+
let err = sandbox_owner_reference(&pod).expect_err("missing owner must fail");
695+
696+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
697+
}
698+
699+
#[test]
700+
fn sandbox_owner_reference_requires_controlling_owner() {
701+
let mut owner = sandbox_owner("sandbox-a", "cr-uid-a");
702+
owner.controller = Some(false);
703+
let pod = pod_with_owner_refs(vec![owner]);
704+
705+
let err = sandbox_owner_reference(&pod).expect_err("non-controller owner must fail");
706+
707+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
708+
}
709+
710+
#[test]
711+
fn sandbox_owner_reference_rejects_ambiguous_sandbox_owners() {
712+
let pod = pod_with_owner_refs(vec![
713+
sandbox_owner("sandbox-a", "cr-uid-a"),
714+
sandbox_owner("sandbox-b", "cr-uid-b"),
715+
]);
716+
717+
let err = sandbox_owner_reference(&pod).expect_err("multiple owners must fail");
718+
719+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
720+
}
721+
722+
#[test]
723+
fn validate_sandbox_owner_reference_requires_matching_cr_uid_and_label() {
724+
let owner = SandboxOwnerReference {
725+
name: "sandbox-a".to_string(),
726+
uid: "cr-uid-a".to_string(),
727+
};
728+
let cr = sandbox_cr("sandbox-a", "cr-uid-a", "sandbox-id-a");
729+
validate_sandbox_owner_reference(&owner, "sandbox-id-a", &cr)
730+
.expect("matching CR should be accepted");
731+
732+
let wrong_uid = sandbox_cr("sandbox-a", "cr-uid-b", "sandbox-id-a");
733+
let err = validate_sandbox_owner_reference(&owner, "sandbox-id-a", &wrong_uid)
734+
.expect_err("wrong CR UID must fail");
735+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
736+
737+
let wrong_label = sandbox_cr("sandbox-a", "cr-uid-a", "sandbox-id-b");
738+
let err = validate_sandbox_owner_reference(&owner, "sandbox-id-a", &wrong_label)
739+
.expect_err("wrong sandbox-id label must fail");
740+
assert_eq!(err.code(), tonic::Code::PermissionDenied);
741+
}
742+
494743
#[tokio::test]
495744
async fn authenticates_on_issue_path_only() {
496745
let resolved = ResolvedK8sIdentity {

0 commit comments

Comments
 (0)