diff --git a/Cargo.lock b/Cargo.lock index 87adc5e2b..750339ec1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3710,6 +3710,7 @@ dependencies = [ "openshell-driver-podman", "openshell-ocsf", "openshell-policy", + "openshell-prover", "openshell-providers", "openshell-router", "petname", diff --git a/architecture/security-policy.md b/architecture/security-policy.md index b0d56e3a2..72ce1e4b1 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -68,21 +68,66 @@ because it changes the effective access model for every sandbox on the gateway. ## Policy Advisor The policy advisor pipeline turns observed denials into draft policy -recommendations: +recommendations. There are two proposers (sandbox-side mechanistic mapper, +agent-authored via `policy.local`); the gateway is the single referee. + +1. **Submit.** Both proposers POST through the same `SubmitPolicyAnalysis` + path. Each chunk is persisted with its `analysis_mode` for audit provenance. +2. **Validate.** The gateway runs the prover (`openshell-prover`) on every + chunk regardless of mode. The prover builds a Z3 model from the merged + policy plus the sandbox's attached-provider credential set, then computes + the delta of findings between the current baseline and the merged policy. +3. **Auto-approval gate (proposer-agnostic, opt-in).** Auto-approval fires + when *both* (a) the prover delta is empty (`prover: no new findings`) AND + (b) the `proposal_approval_mode` setting resolves to `"auto"` — gateway + scope wins, sandbox scope is the per-sandbox override, default is + `"manual"`. When both hold, the gateway internally invokes the approve + path with actor identity `system:auto`. The audit event uses + `CONFIG:APPROVED` and carries `auto=true`, `source=`, + `prover_delta=empty`, and `resolved_from=` as unmapped + fields, with message text `"auto-approved: no new prover findings"` — + never `safe`. The opt-in gate preserves OpenShell's default-deny + posture: with no setting at either scope, every proposal lands in + `pending` for human review, even when the prover sees no findings. +4. **Implicit supersede.** On any successful submission, the gateway scans + the sandbox's pending chunks for matches on `(host, port, binary)` and + auto-rejects the older ones with reason `"superseded by chunk X"`. This + gives the agent a refinement path (broad mechanistic L4 → narrow agent + L7) without an explicit `supersedes_chunk_id` field. +5. **Escalation.** Anything else lands in `pending` for human review. + +## What the prover decides + +The prover answers four formal questions about each proposed policy +change. Each "yes" answer becomes its own categorical finding — there is +no severity grade. Any finding (of any category) blocks auto-approval. +The categories are intended to be (mostly) mutually exclusive per +underlying change: the gateway suppresses `capability_expansion` paths +whose `(binary, host, port)` is also in the `credential_reach_expansion` +delta, so a brand-new credentialed reach surfaces as one finding rather +than one reach + N method findings. + +| Category | The prover detects… | +|---|---| +| `link_local_reach` | The proposal grants reach to a host in `169.254.0.0/16` or `fe80::/10`. Unconditional — cloud-metadata endpoints serve credentials regardless of sandbox state. | +| `l7_bypass_credentialed` | The proposal lets a binary using a non-HTTP wire protocol (`git-remote-https`, `ssh`, `nc`) reach a host where a sandbox credential is in scope. The L7 proxy cannot inspect the wire protocol; the reviewer decides whether to trust the binary with the credential. | +| `credential_reach_expansion` | A binary gained credentialed reach to a (host, port) it could not reach before. New authenticated reach is a stated intent change; the reviewer confirms the binary should authenticate to the host at all. | +| `capability_expansion` | On a (binary, host, port) that already had credentialed reach, the policy adds a new HTTP method. The reviewer sees exactly which method was added (e.g., PUT) and decides if it's part of the agent's task. | -1. The sandbox aggregates denied network events. -2. A mechanistic mapper proposes minimal endpoint, binary, or rule additions. -3. The gateway validates and stores draft recommendations. -4. A human or admin workflow approves or rejects drafts. -5. Approved drafts merge into the target sandbox policy. +"Credential in scope" is sandbox-coarse, not binary-fine: a credential is +considered in scope if the sandbox has a provider attached whose +`target_hosts` include the proposed endpoint's host. v1 does not model +credential scopes (read-only vs write); presence is enough. Proposals intentionally omit `allowed_ips`. If a proposed rule targets a host that resolves to a private IP, the proxy's runtime SSRF classification blocks the connection. The operator must then add an explicit `allowed_ips` entry to permit it — a two-step flow that keeps SSRF protection on by default. -The advisor should propose narrow additions and preserve explicit-deny behavior. -It is a workflow aid, not an automatic permission grant. +The advisor proposes narrow additions and preserves explicit-deny behavior. +Auto-approval is gated on prover determinism, not human judgment; an LLM-based +contextual reviewer is a deliberate future addition layered on top of the +deterministic prover gate. ## Security Logging diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index ca242be32..fccf069ac 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -1030,6 +1030,11 @@ enum DoctorCommands { } #[derive(Subcommand, Debug)] +// `Create` carries enough optional fields to be ~3x larger than the next +// variant; boxing it would obscure the clap derive ergonomics for one +// (rare) enum allocation per parse, which isn't worth the readability +// cost. +#[allow(clippy::large_enum_variant)] enum SandboxCommands { /// Create a sandbox. #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] @@ -1138,6 +1143,18 @@ enum SandboxCommands { #[arg(long = "label")] labels: Vec, + /// Approval mode for agent-authored policy proposals. + /// + /// `manual` (default): every proposal lands in the draft inbox for + /// human review, regardless of the prover verdict. + /// + /// `auto`: proposals whose prover delta is empty are approved + /// automatically; proposals with findings still require human + /// approval. Auto mode is an explicit opt-in — `OpenShell`'s + /// default-deny posture is preserved unless you choose otherwise. + #[arg(long, value_parser = ["manual", "auto"], default_value = "manual")] + approval_mode: String, + /// Command to run after "--" (defaults to an interactive shell). #[arg(last = true, allow_hyphen_values = true)] command: Vec, @@ -2383,6 +2400,7 @@ async fn main() -> Result<()> { auto_providers, no_auto_providers, labels, + approval_mode, command, } => { // Resolve --tty / --no-tty into an Option override. @@ -2451,6 +2469,7 @@ async fn main() -> Result<()> { tty_override, auto_providers_override, &labels_map, + &approval_mode, &tls, )) .await?; @@ -3653,6 +3672,60 @@ mod tests { } } + /// `sandbox create` defaults `--approval-mode` to `"manual"`. The CLI + /// always sends an explicit value so the wire form is human-readable + /// (the gateway treats `""` as `"manual"` too, but the CLI's job is to + /// be unambiguous). + #[test] + fn sandbox_create_approval_mode_defaults_to_manual() { + let cli = Cli::try_parse_from(["openshell", "sandbox", "create"]) + .expect("sandbox create with no flags should parse"); + match cli.command { + Some(Commands::Sandbox { + command: Some(SandboxCommands::Create { approval_mode, .. }), + .. + }) => { + assert_eq!(approval_mode, "manual"); + } + other => panic!("expected SandboxCommands::Create, got: {other:?}"), + } + } + + /// `--approval-mode auto` parses through. + #[test] + fn sandbox_create_approval_mode_accepts_auto() { + let cli = + Cli::try_parse_from(["openshell", "sandbox", "create", "--approval-mode", "auto"]) + .expect("--approval-mode auto should parse"); + match cli.command { + Some(Commands::Sandbox { + command: Some(SandboxCommands::Create { approval_mode, .. }), + .. + }) => { + assert_eq!(approval_mode, "auto"); + } + other => panic!("expected SandboxCommands::Create, got: {other:?}"), + } + } + + /// `--approval-mode ` is rejected by clap's value parser, so the + /// CLI can't smuggle through a future-mode value that the gateway + /// doesn't yet know about. + #[test] + fn sandbox_create_approval_mode_rejects_unknown_value() { + let result = Cli::try_parse_from([ + "openshell", + "sandbox", + "create", + "--approval-mode", + "auto_on_low_risk", + ]); + assert!( + result.is_err(), + "--approval-mode auto_on_low_risk should be rejected until added to the value parser" + ); + } + #[test] fn sandbox_create_resource_flags_parse() { let cli = Cli::try_parse_from([ diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 198cb4b0a..a5421970c 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -1622,6 +1622,7 @@ pub async fn sandbox_create( tty_override: Option, auto_providers_override: Option, labels: &HashMap, + approval_mode: &str, tls: &TlsOptions, ) -> Result<()> { if editor.is_some() && !command.is_empty() { @@ -1730,6 +1731,37 @@ pub async fn sandbox_create( let _ = save_last_sandbox(gateway, &sandbox_name); } + // Persist `--approval-mode` as a sandbox-scoped setting now that the + // sandbox exists. `manual` is the implicit default (no setting needed); + // any other value is written so it survives sandbox restarts and can be + // flipped later via `openshell settings set proposal_approval_mode`. + // If the write fails the sandbox still runs in default `manual` — surface + // the recovery command so the user can retry. + if approval_mode != "manual" { + let setting = parse_cli_setting_value(settings::PROPOSAL_APPROVAL_MODE_KEY, approval_mode)?; + match client + .update_config(UpdateConfigRequest { + name: sandbox_name.clone(), + policy: None, + setting_key: settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + setting_value: Some(setting), + delete_setting: false, + global: false, + merge_operations: vec![], + }) + .await + { + Ok(_) => {} + Err(status) => { + eprintln!( + "{} failed to set approval mode '{approval_mode}' on sandbox '{sandbox_name}': {}\n retry with: openshell settings set {sandbox_name} proposal_approval_mode {approval_mode}", + "warning:".yellow().bold(), + status.message(), + ); + } + } + } + // Set up display — interactive terminals get a step-based checklist with // spinners; non-interactive (pipes / CI) get timestamped lines. let mut display = if interactive { @@ -6030,6 +6062,13 @@ pub async fn sandbox_draft_get( chunk.security_notes.yellow() ); } + if !chunk.validation_result.is_empty() { + println!( + " {} {}", + "Validation:".dimmed(), + chunk.validation_result.cyan() + ); + } if let Some(ref rule) = chunk.proposed_rule { println!(" {} {}", "Endpoints:".dimmed(), format_endpoints(rule)); diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 1ad00dd6e..52f58fe13 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -739,6 +739,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -780,6 +781,7 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -865,6 +867,7 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -917,6 +920,7 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -961,6 +965,7 @@ async fn sandbox_create_times_out_when_only_logs_arrive() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1001,6 +1006,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1045,6 +1051,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() { Some(true), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1089,6 +1096,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await @@ -1133,6 +1141,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { Some(false), Some(false), &HashMap::new(), + "manual", &tls, ) .await diff --git a/crates/openshell-core/src/settings.rs b/crates/openshell-core/src/settings.rs index 897317a5a..733bb1f03 100644 --- a/crates/openshell-core/src/settings.rs +++ b/crates/openshell-core/src/settings.rs @@ -59,6 +59,21 @@ pub const PROVIDERS_V2_ENABLED_KEY: &str = "providers_v2_enabled"; /// still applies when this flag is on. pub const AGENT_POLICY_PROPOSALS_ENABLED_KEY: &str = "agent_policy_proposals_enabled"; +/// Approval mode for agent-authored policy proposals. +/// +/// `"manual"` (the default when unset): every proposal lands in the draft +/// inbox for human review, regardless of the prover verdict. `"auto"`: +/// proposals whose prover delta is empty are approved automatically; +/// proposals with findings still require human approval. Any other value +/// (typos, future-reserved modes like `"auto_on_low_risk"`) falls back to +/// manual — auto mode is an explicit, exact opt-in. +/// +/// Resolution precedence (matches the rest of the settings model): gateway +/// scope wins over sandbox scope. A reviewer can pin manual mode for a +/// fleet by setting it globally; per-sandbox overrides only apply when no +/// global is set. +pub const PROPOSAL_APPROVAL_MODE_KEY: &str = "proposal_approval_mode"; + pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ // Gateway-level opt-in for provider profile policy composition. Defaults // to false when unset. @@ -79,6 +94,12 @@ pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ key: AGENT_POLICY_PROPOSALS_ENABLED_KEY, kind: SettingValueKind::Bool, }, + // Approval mode for agent-authored proposals. See + // PROPOSAL_APPROVAL_MODE_KEY for details. Defaults to manual. + RegisteredSetting { + key: PROPOSAL_APPROVAL_MODE_KEY, + kind: SettingValueKind::String, + }, // Test-only keys live behind the `dev-settings` feature flag so they // don't appear in production builds. #[cfg(feature = "dev-settings")] diff --git a/crates/openshell-ocsf/src/format/shorthand.rs b/crates/openshell-ocsf/src/format/shorthand.rs index 08b413429..96f3632dc 100644 --- a/crates/openshell-ocsf/src/format/shorthand.rs +++ b/crates/openshell-ocsf/src/format/shorthand.rs @@ -300,22 +300,41 @@ impl OcsfEvent { }, ); let what = e.base.message.as_deref().unwrap_or("config"); - let version_ctx = e + // Bracketed suffix carries the structured provenance fields a + // reviewer needs to scan a CONFIG audit line. Auto-approval + // emits `auto`/`source`/`prover_delta`; every config change + // also carries `policy_version` and `policy_hash`. Order is + // stable so logs are greppable. + let suffix = e .base .unmapped .as_ref() - .and_then(|u| { - let ver = u.get("policy_version").and_then(|v| v.as_str()); - let hash = u.get("policy_hash").and_then(|v| v.as_str()); - match (ver, hash) { - (Some(v), Some(h)) => Some(format!(" [version:{v} hash:{h}]")), - (Some(v), None) => Some(format!(" [version:{v}]")), - _ => None, + .map(|u| { + let mut parts: Vec = Vec::new(); + let mut push = |key: &str| { + if let Some(value) = u.get(key).and_then(|v| v.as_str()) { + parts.push(format!("{key}:{value}")); + } + }; + push("auto"); + push("source"); + push("prover_delta"); + push("resolved_from"); + if let Some(ver) = u.get("policy_version").and_then(|v| v.as_str()) { + parts.push(format!("version:{ver}")); + } + if let Some(hash) = u.get("policy_hash").and_then(|v| v.as_str()) { + parts.push(format!("hash:{hash}")); + } + if parts.is_empty() { + String::new() + } else { + format!(" [{}]", parts.join(" ")) } }) .unwrap_or_default(); - format!("CONFIG:{state} {sev} {what}{version_ctx}") + format!("CONFIG:{state} {sev} {what}{suffix}") } Self::Base(e) => { @@ -829,6 +848,37 @@ mod tests { ); } + /// Auto-approval audit events carry `auto`, `source`, `prover_delta`, and + /// `resolved_from` as unmapped fields. Lock the suffix order so operators + /// (and the demo's grep) can rely on it. + #[test] + fn test_config_state_change_shorthand_includes_auto_approve_fields() { + let mut b = base(5019, "Device Config State Change", 5, "Discovery", 1, "Log"); + b.set_message("auto-approved: no new prover findings (source=agent_authored)"); + b.add_unmapped("auto", serde_json::json!("true")); + b.add_unmapped("source", serde_json::json!("agent_authored")); + b.add_unmapped("prover_delta", serde_json::json!("empty")); + b.add_unmapped("resolved_from", serde_json::json!("sandbox")); + b.add_unmapped("policy_version", serde_json::json!("v4")); + b.add_unmapped("policy_hash", serde_json::json!("sha256:cafe")); + + let event = OcsfEvent::DeviceConfigStateChange(DeviceConfigStateChangeEvent { + base: b, + state: Some(StateId::Other), + state_custom_label: Some("APPROVED".to_string()), + security_level: None, + prev_security_level: None, + }); + + let shorthand = event.format_shorthand(); + assert_eq!( + shorthand, + "CONFIG:APPROVED [INFO] auto-approved: no new prover findings (source=agent_authored) \ + [auto:true source:agent_authored prover_delta:empty resolved_from:sandbox \ + version:v4 hash:sha256:cafe]" + ); + } + #[test] fn test_base_event_shorthand() { let mut b = base(0, "Base Event", 0, "Uncategorized", 99, "Other"); diff --git a/crates/openshell-policy/src/merge.rs b/crates/openshell-policy/src/merge.rs index c01445b11..60da5e4f1 100644 --- a/crates/openshell-policy/src/merge.rs +++ b/crates/openshell-policy/src/merge.rs @@ -392,17 +392,36 @@ fn add_rule( incoming_rule.name = rule_name.to_string(); } + // Endpoint-overlap fallback: when a chunk arrives with a new rule_name + // that doesn't already exist, fold it into a same-host/port rule if one + // is present. This is intentional for user-authored policies (incremental + // refinements live under one rule name). + // + // Provider-injected rules (`_provider_*` — see `compose.rs::provider_rule_name`) + // are deliberately EXCLUDED from this fallback. Provider profiles supply a + // baseline layer that should stay separate from agent/user contributions; + // merging an agent's narrow proposal into a provider's broad rule would + // (a) expand the provider rule's `access` shorthand into wildcard + // `path: "**"` rules at the prover's input, masking the agent's narrow + // scope behind the existing broad coverage, and (b) silently widen the + // provider rule's binary list. The agent's contribution is kept on its + // own rule key, the prover sees the actual narrow proposal, and the + // reviewer gets honest signal about what's being added. let target_key = if policy.network_policies.contains_key(rule_name) { Some(rule_name.to_string()) } else { let mut keys: Vec<_> = policy.network_policies.keys().cloned().collect(); keys.sort(); - keys.into_iter().find(|key| { - policy - .network_policies - .get(key) - .is_some_and(|existing_rule| rules_share_endpoint(existing_rule, &incoming_rule)) - }) + keys.into_iter() + .filter(|k| !k.starts_with("_provider_")) + .find(|key| { + policy + .network_policies + .get(key) + .is_some_and(|existing_rule| { + rules_share_endpoint(existing_rule, &incoming_rule) + }) + }) }; if let Some(key) = target_key { @@ -619,15 +638,28 @@ fn find_endpoint_mut<'a>( host: &str, port: u32, ) -> Option<&'a mut NetworkEndpoint> { + // `_provider_*` rules are excluded from this lookup for the same reason + // they're excluded from `add_rule`'s endpoint-overlap fallback: callers + // (`AddAllowRules`, `AddDenyRules`) must not mutate provider-injected + // rules in place. If the operation should target a provider rule, the + // caller should reference it by its exact name through the merge ops + // that take a `rule_name`. Defense-in-depth: even if a future caller + // accidentally passes a composed policy here, `AddAllowRules` would no + // longer be able to expand a provider rule's `access` shorthand into + // wildcard `path: "**"` rules (which would mask the prover's narrowness + // verdict on agent contributions). let mut keys: Vec<_> = policy.network_policies.keys().cloned().collect(); keys.sort(); - let target_key = keys.into_iter().find(|key| { - policy.network_policies.get(key).is_some_and(|rule| { - rule.endpoints - .iter() - .any(|endpoint| endpoint_matches_host_port(endpoint, host, port)) - }) - })?; + let target_key = keys + .into_iter() + .filter(|k| !k.starts_with("_provider_")) + .find(|key| { + policy.network_policies.get(key).is_some_and(|rule| { + rule.endpoints + .iter() + .any(|endpoint| endpoint_matches_host_port(endpoint, host, port)) + }) + })?; policy .network_policies @@ -1571,4 +1603,159 @@ mod tests { .contains_key("allow_api_example_com_443") ); } + + /// Provider-injected rules (`_provider_*`) are excluded from the + /// endpoint-overlap fallback: an agent chunk for the same `(host, port)` + /// as a provider rule lands as its own key instead of being merged into + /// the provider's rule. This keeps agent contributions honestly narrow + /// (no silent expansion via the provider rule's `access` shorthand) and + /// preserves binary-list separation. + #[test] + fn add_rule_does_not_merge_agent_chunk_into_provider_rule() { + use crate::compose::{ProviderPolicyLayer, compose_effective_policy}; + use openshell_core::proto::SandboxPolicy; + + // Compose a policy where the github provider profile contributes a + // `_provider_*` rule for api.github.com with `access: read-write` + // and gh/git binaries. + let provider_rule = NetworkPolicyRule { + name: "_provider_work_github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + access: "read-write".to_string(), + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/gh".to_string(), + ..Default::default() + }], + }; + let composed = compose_effective_policy( + &SandboxPolicy::default(), + &[ProviderPolicyLayer { + rule_name: "_provider_work_github".to_string(), + rule: provider_rule, + }], + ); + assert!( + composed + .network_policies + .contains_key("_provider_work_github"), + "precondition: provider rule must be present in baseline" + ); + + // Agent submits a narrow PUT rule targeting the same host/port via + // curl. Without the filter, this would merge into the provider rule. + let agent_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![rest_rule("PUT", "/repos/owner/repo/contents/file.md")], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let result = merge_policy( + composed, + &[PolicyMergeOp::AddRule { + rule_name: "github_contents_put".to_string(), + rule: agent_rule, + }], + ) + .expect("merge should succeed"); + + // The agent's chunk lands as its own rule key. + assert!( + result + .policy + .network_policies + .contains_key("github_contents_put"), + "agent chunk must land as a separate rule (not merged into the provider rule); \ + got keys: {:?}", + result.policy.network_policies.keys().collect::>() + ); + + // The provider rule is unchanged: still has only gh as a binary + // (no silent broadening), still has the read-write shorthand + // intact (no preset expansion into wildcard paths). + let provider_rule_after = result + .policy + .network_policies + .get("_provider_work_github") + .expect("provider rule must still be present"); + assert_eq!( + provider_rule_after.binaries.len(), + 1, + "provider rule's binary list must NOT have been merged with the agent's binaries" + ); + assert_eq!(provider_rule_after.binaries[0].path, "/usr/bin/gh"); + assert_eq!( + provider_rule_after.endpoints[0].access, "read-write", + "provider rule's `access` shorthand must remain intact" + ); + assert!( + provider_rule_after.endpoints[0].rules.is_empty(), + "provider rule must NOT have had its access expanded into explicit wildcard rules" + ); + + // The agent's rule retains its narrow scope. + let agent_rule_after = &result.policy.network_policies["github_contents_put"]; + assert_eq!(agent_rule_after.binaries[0].path, "/usr/bin/curl"); + assert_eq!(agent_rule_after.endpoints[0].rules.len(), 1); + } + + /// Non-provider rules still merge by endpoint overlap when the incoming + /// `rule_name` doesn't match an existing key. This preserves the + /// long-standing behavior for user-authored and mechanistic chunks. + #[test] + fn add_rule_still_merges_user_chunk_into_user_rule_by_endpoint_overlap() { + let mut policy = restrictive_default_policy(); + policy.network_policies.insert( + "custom_github".to_string(), + rule_with_endpoint("custom_github", "api.github.com", 443), + ); + + let incoming = NetworkPolicyRule { + name: "ignored_when_merging".to_string(), + endpoints: vec![endpoint("api.github.com", 443)], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let result = merge_policy( + policy, + &[PolicyMergeOp::AddRule { + rule_name: "different_name".to_string(), + rule: incoming, + }], + ) + .expect("merge should succeed"); + + // No new rule entry was created — the chunk merged into the + // existing user rule via endpoint overlap. + assert!( + !result + .policy + .network_policies + .contains_key("different_name"), + "user-authored rule overlap should still merge (no new key); \ + got keys: {:?}", + result.policy.network_policies.keys().collect::>() + ); + let merged = &result.policy.network_policies["custom_github"]; + assert!( + merged.binaries.iter().any(|b| b.path == "/usr/bin/curl"), + "user rule should have absorbed the incoming curl binary" + ); + } } diff --git a/crates/openshell-prover/README.md b/crates/openshell-prover/README.md new file mode 100644 index 000000000..f8b45eca6 --- /dev/null +++ b/crates/openshell-prover/README.md @@ -0,0 +1,136 @@ + + + +# openshell-prover + +Formal verifier for OpenShell sandbox policies. Encodes a policy + its +attached credential set + a binary capability registry as a Z3 SMT +model, then runs reachability queries to detect credentialed-reach and +capability changes a reviewer should be aware of. + +Used by the gateway to gate auto-approval of agent-authored policy +proposals: any finding blocks auto-approval, an empty delta lets the +chunk pass through (when the reviewer opts in via the +`proposal_approval_mode` setting at either gateway or sandbox scope). + +## What it decides + +The prover answers four formal questions. Each "yes" answer is its own +categorical finding — there is no severity grade. The categories live +in [`finding::category`](src/finding.rs). + +| Category | Question the prover decides | +|---|---| +| `link_local_reach` | Does this policy grant reach to a host in `169.254.0.0/16` or `fe80::/10`? | +| `l7_bypass_credentialed` | Does it let a binary using a non-HTTP wire protocol (per the binary registry's `bypasses_l7` flag) reach a host where a credential is in scope? | +| `credential_reach_expansion` | Does it let a binary reach a (host, port) with a credential in scope, where the binary couldn't reach that endpoint before? | +| `capability_expansion` | On a (binary, host, port) the binary already reaches with credentials, does it add a new HTTP method? | + +The first two are unconditional risks. The latter two are *delta* +properties — the gateway runs the prover on both the baseline policy +and the merged policy and surfaces only the new paths. + +## Evidence shape + +Each finding carries one or more [`FindingPath::Exfil`](src/finding.rs) +entries: + +```rust +pub struct ExfilPath { + pub binary: String, + pub endpoint_host: String, + pub endpoint_port: u16, + pub mechanism: String, // human-readable description + pub policy_name: String, // rule the path traverses + pub category: String, // one of the category constants + pub method: String, // populated for capability_expansion; empty otherwise +} +``` + +The gateway's `finding_delta` keys paths by `(category, binary, +host:port, category, method)` so that adding a new method on an +already-reached host surfaces as exactly one new path (not the whole +re-emission of the existing method set). + +### Category suppression at the delta layer + +`capability_expansion` paths whose `(binary, host, port)` tuple is also +in the `credential_reach_expansion` delta are suppressed by the +gateway. A brand-new credentialed reach is described by the +reach-expansion finding alone, not also by N per-method findings. + +## Adding a new category + +1. Add a constant to `src/finding.rs::category`. +2. In `src/queries.rs::check_credential_safety`, add the branch that + detects the new category and emits one `ExfilPath` per evidence + row. Set `path.category` to the new constant. +3. In `src/report.rs::format_path_line`, add a `match` arm rendering + the per-path display string the reviewer sees. +4. (Gateway) If the new category should be suppressed by another, add + the suppression rule to `crates/openshell-server/src/grpc/policy.rs::finding_delta`. +5. Add a unit test in `src/queries.rs` and an integration test in + `crates/openshell-server/src/grpc/policy.rs::tests`. + +The four v1 categories cover the formal properties the OpenShell +auto-approval gate cares about today. Additional categories (e.g., +"destructive method introduced," "new outbound TLS without SNI") would +be additive — they don't displace existing categories. + +## What the prover does *not* decide + +- **Semantic risk of an action.** The prover models *can the binary do + this?*, not *is this destructive?*. `PUT /repos/.../contents/file.md` + and `GET /repos/.../contents/file.md` are both authenticated actions; + the reviewer (or a downstream layer like an LLM contextual reviewer + or an intent file) decides if the action is desired. +- **Cross-sandbox or cross-binary intent.** The model is per-sandbox. + If two sandboxes share a credential through external policy, the + prover reasons about each independently. +- **Runtime behavior.** The prover analyzes the policy as written; it + doesn't observe the proxy's actual decisions. The proxy is the + enforcement layer; the prover is the change-review layer. + +## Inputs + +- **Policy** — a `SandboxPolicy` proto, parsed via + `openshell-policy::parse_sandbox_policy`. +- **Credential set** — built from the sandbox's attached providers in + `crates/openshell-server/src/grpc/policy.rs::build_credential_set_for_sandbox`. + v1 captures presence only (host-coarse); no scope modeling. +- **Binary registry** — YAML descriptors at + `crates/openshell-prover/registry/binaries/*.yaml`. Each describes + the binary's protocols, `bypasses_l7` flag, and `can_exfiltrate` + capability. + +## Outputs + +- A list of `Finding` values, one per fired category. Each finding's + `query` field holds the category name. +- The CLI renderer (`report::render_compact` / `render_report`) prints + human-readable output for the `openshell-prover` binary. +- The gateway calls `report::finding_shorthand` to build the + `validation_result` string persisted on each draft chunk. + +## Z3 model layout + +See `src/model.rs`. Briefly: + +- Bool sorts per `(binary, endpoint)` pair encode policy reachability, + filtered by binary capability flags (`can_exfiltrate`, + `bypasses_l7`). +- Bool sorts per `(binary, host)` encode credential-in-scope (one + credential set per sandbox). +- The reachability formula composes these into the SAT query the + `queries::check_credential_safety` loop iterates over. + +## Tests + +- Unit tests in each module (`src/queries.rs`, `src/report.rs`, + `src/policy.rs`) cover individual primitives and category emission. +- Integration tests in `src/lib.rs::tests` exercise the full + parse → build_model → run_all_queries pipeline against testdata + policies in `testdata/`. +- Gateway-level acceptance tests in + `crates/openshell-server/src/grpc/policy.rs::tests` lock in the + end-to-end `validation_result` shape and the auto-approval gate. diff --git a/crates/openshell-prover/src/accepted_risks.rs b/crates/openshell-prover/src/accepted_risks.rs index 61aa025be..8c28a4418 100644 --- a/crates/openshell-prover/src/accepted_risks.rs +++ b/crates/openshell-prover/src/accepted_risks.rs @@ -80,23 +80,12 @@ pub fn load_accepted_risks(path: &Path) -> Result> { /// Check if a single finding path matches an accepted risk. fn path_matches_risk(path: &FindingPath, risk: &AcceptedRisk) -> bool { - if !risk.binary.is_empty() { - let path_binary = match path { - FindingPath::Exfil(p) => &p.binary, - FindingPath::WriteBypass(p) => &p.binary, - }; - if path_binary != &risk.binary { - return false; - } + let FindingPath::Exfil(p) = path; + if !risk.binary.is_empty() && p.binary != risk.binary { + return false; } - if !risk.endpoint.is_empty() { - let endpoint_host = match path { - FindingPath::Exfil(p) => &p.endpoint_host, - FindingPath::WriteBypass(p) => &p.endpoint_host, - }; - if endpoint_host != &risk.endpoint { - return false; - } + if !risk.endpoint.is_empty() && p.endpoint_host != risk.endpoint { + return false; } true } diff --git a/crates/openshell-prover/src/credentials.rs b/crates/openshell-prover/src/credentials.rs index dffbc2e8b..c23387be1 100644 --- a/crates/openshell-prover/src/credentials.rs +++ b/crates/openshell-prover/src/credentials.rs @@ -135,17 +135,26 @@ pub struct CredentialSet { } impl CredentialSet { - /// Credentials that target a given host. + /// Credentials that target a given host. Comparison is case-insensitive + /// so a policy author writing `API.github.com` matches credentials + /// registered for `api.github.com`. pub fn credentials_for_host(&self, host: &str) -> Vec<&Credential> { + let needle = host.to_ascii_lowercase(); self.credentials .iter() - .filter(|c| c.target_hosts.iter().any(|h| h == host)) + .filter(|c| { + c.target_hosts + .iter() + .any(|h| h.eq_ignore_ascii_case(&needle)) + }) .collect() } - /// API capability registry for a given host. + /// API capability registry for a given host. Case-insensitive match. pub fn api_for_host(&self, host: &str) -> Option<&ApiCapability> { - self.api_registries.values().find(|api| api.host == host) + self.api_registries + .values() + .find(|api| api.host.eq_ignore_ascii_case(host)) } } diff --git a/crates/openshell-prover/src/finding.rs b/crates/openshell-prover/src/finding.rs index ab4d4f47f..4e06d1b4e 100644 --- a/crates/openshell-prover/src/finding.rs +++ b/crates/openshell-prover/src/finding.rs @@ -2,26 +2,41 @@ // SPDX-License-Identifier: Apache-2.0 //! Finding types emitted by verification queries. +//! +//! The prover answers four formal questions about a proposed policy and +//! emits one finding category per "yes" answer. Findings are categorical +//! (not severity-graded): the reviewer reads the category name and the +//! structured evidence to decide. The auto-approval gate is binary — +//! delta empty = candidate for auto-approval; any finding = human review. +//! +//! Categories: +//! +//! - `credential_reach_expansion` — a binary gained credentialed reach to +//! a (host, port) it could not reach before. +//! - `capability_expansion` — on a (binary, host, port) that already had +//! credentialed reach, a new HTTP method was added. +//! - `l7_bypass_credentialed` — a binary using a wire protocol the L7 +//! proxy cannot inspect (`git-remote-https`, `ssh`, `nc`) gained reach +//! to a host where a credential is in scope. +//! - `link_local_reach` — any reach to a link-local IP range +//! (`169.254.0.0/16`, `fe80::/10`), unconditional. Cloud metadata +//! endpoints serve credentials regardless of the sandbox's own +//! credential state. -use std::fmt; - -/// Severity level for a finding. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum RiskLevel { - High, - Critical, -} - -impl fmt::Display for RiskLevel { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::High => write!(f, "HIGH"), - Self::Critical => write!(f, "CRITICAL"), - } - } +/// Stable category names. Used as the `query` field on [`Finding`] and +/// in the per-path key used by `finding_delta`. +pub mod category { + pub const CREDENTIAL_REACH_EXPANSION: &str = "credential_reach_expansion"; + pub const CAPABILITY_EXPANSION: &str = "capability_expansion"; + pub const L7_BYPASS_CREDENTIALED: &str = "l7_bypass_credentialed"; + pub const LINK_LOCAL_REACH: &str = "link_local_reach"; } -/// A concrete path through which data can be exfiltrated. +/// A concrete path through which the prover observed a tracked property. +/// +/// One `ExfilPath` per (binary, host, port, category) tuple — plus +/// `method` for `capability_expansion` so the gateway's per-path delta +/// surfaces the specific method that was added. #[derive(Debug, Clone)] pub struct ExfilPath { pub binary: String, @@ -29,37 +44,30 @@ pub struct ExfilPath { pub endpoint_port: u16, pub mechanism: String, pub policy_name: String, - /// One of `"l4_only"`, `"l7_allows_write"`, `"l7_bypassed"`. - pub l7_status: String, -} - -/// A path that allows writing despite read-only intent. -#[derive(Debug, Clone)] -pub struct WriteBypassPath { - pub binary: String, - pub endpoint_host: String, - pub endpoint_port: u16, - pub policy_name: String, - pub policy_intent: String, - /// One of `"l4_only"`, `"l7_bypass_protocol"`, `"credential_write_scope"`. - pub bypass_reason: String, - pub credential_actions: Vec, + /// Category name (see `category::*` constants). + pub category: String, + /// HTTP method, populated only for `capability_expansion` paths. + /// Empty string for the other categories. + pub method: String, } /// Concrete evidence attached to a [`Finding`]. #[derive(Debug, Clone)] pub enum FindingPath { Exfil(ExfilPath), - WriteBypass(WriteBypassPath), } /// A single verification finding. +/// +/// `query` is the category name (one of the `category::*` constants). +/// Each finding carries one or more `paths` with the structured evidence +/// the reviewer needs to decide. There is no severity field — the +/// category itself is the signal. #[derive(Debug, Clone)] pub struct Finding { pub query: String, pub title: String, pub description: String, - pub risk: RiskLevel, pub paths: Vec, pub remediation: Vec, pub accepted: bool, diff --git a/crates/openshell-prover/src/lib.rs b/crates/openshell-prover/src/lib.rs index 82922253d..892e79cba 100644 --- a/crates/openshell-prover/src/lib.rs +++ b/crates/openshell-prover/src/lib.rs @@ -157,9 +157,13 @@ filesystem_policy: assert_eq!(sandbox_count, 1); } - // 6. End-to-end: git push bypass findings detected (uses embedded registry). + // 6. End-to-end: testdata policy with a github credential in scope and a + // bypass-L7 binary (git) emits an `l7_bypass_credentialed` finding. + // The prover output is categorical, not severity-graded. #[test] - fn test_git_push_bypass_findings() { + fn test_findings_for_github_policy() { + use finding::category; + let policy_path = testdata_dir().join("policy.yaml"); let creds_path = testdata_dir().join("credentials.yaml"); @@ -170,23 +174,28 @@ filesystem_policy: let z3_model = build_model(pol, cred_set, bin_reg); let findings = run_all_queries(&z3_model); - let query_types: std::collections::HashSet<&str> = + let categories: std::collections::HashSet<&str> = findings.iter().map(|f| f.query.as_str()).collect(); assert!( - query_types.contains("data_exfiltration"), - "expected data_exfiltration finding" - ); - assert!( - query_types.contains("write_bypass"), - "expected write_bypass finding" - ); - assert!( - findings.iter().any(|f| matches!( - f.risk, - finding::RiskLevel::Critical | finding::RiskLevel::High - )), - "expected at least one critical/high finding" + categories.contains(category::L7_BYPASS_CREDENTIALED), + "expected l7_bypass_credentialed finding for bypass-L7 binary with credential in scope; \ + got categories: {categories:?}" ); + // Every emitted category must be one of the four v1 categories. + let allowed: std::collections::HashSet<&str> = [ + category::LINK_LOCAL_REACH, + category::L7_BYPASS_CREDENTIALED, + category::CREDENTIAL_REACH_EXPANSION, + category::CAPABILITY_EXPANSION, + ] + .into_iter() + .collect(); + for c in &categories { + assert!( + allowed.contains(*c), + "unexpected category {c} emitted by the prover" + ); + } } // 7. Empty policy produces no findings. diff --git a/crates/openshell-prover/src/queries.rs b/crates/openshell-prover/src/queries.rs index 6a0c7f6a6..6aae4b184 100644 --- a/crates/openshell-prover/src/queries.rs +++ b/crates/openshell-prover/src/queries.rs @@ -1,22 +1,72 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Verification queries: `check_data_exfiltration` and `check_write_bypass`. +//! Verification queries. +//! +//! The prover answers four formal questions about a policy and emits one +//! finding category per "yes" answer (see +//! [`crate::finding::category`] for the canonical names). The output is +//! categorical — there is no severity grade. The gateway's +//! `finding_delta` decides which findings are *new* relative to a +//! baseline, and the auto-approval gate triggers when no new findings +//! exist. +//! +//! Categories: +//! +//! 1. **Link-local reach** — any reachable path to a host in +//! `169.254.0.0/16` or `fe80::/10`. Emitted unconditionally: +//! cloud-metadata endpoints serve credentials, so reachability alone +//! is the risk. +//! 2. **L7-bypass + credential** — a binary whose wire protocol the L7 +//! proxy cannot inspect (`git-remote-https`, `ssh`, `nc`) gains reach +//! to a host where a sandbox credential is in scope. +//! 3. **Credential reach expansion** — a binary gains credentialed reach +//! to a host:port it could not reach before. The gateway's delta +//! surfaces only newly-reachable tuples. +//! 4. **Capability expansion** — on a (binary, host, port) that already +//! had credentialed reach, the policy adds a new HTTP method. The +//! gateway's delta surfaces only newly-allowed methods. +//! +//! These categories are intended to be (mostly) mutually exclusive per +//! underlying change: at the gateway, `capability_expansion` paths whose +//! `(binary, host, port)` is also in the `credential_reach_expansion` +//! delta are suppressed, so a brand-new credentialed reach surfaces as +//! one `credential_reach_expansion` finding rather than that plus N +//! capability findings. See `crates/openshell-server/src/grpc/policy.rs`. + +use std::collections::HashSet; +use std::net::IpAddr; use z3::SatResult; -use crate::finding::{ExfilPath, Finding, FindingPath, RiskLevel, WriteBypassPath}; +use crate::finding::{ExfilPath, Finding, FindingPath, category}; use crate::model::ReachabilityModel; -use crate::policy::PolicyIntent; -/// Check for data exfiltration paths from readable filesystem to writable -/// egress channels. -pub fn check_data_exfiltration(model: &ReachabilityModel) -> Vec { - if model.policy.filesystem_policy.readable_paths().is_empty() { - return Vec::new(); +/// Return true iff the host string parses as an IP in a reserved +/// link-local range (IPv4 `169.254.0.0/16` or IPv6 `fe80::/10`). +/// +/// Hostname-only strings (not parseable as IPs) return false. We don't +/// perform DNS resolution at validation time; the model evaluates the +/// policy as written. +pub(crate) fn is_link_local(host: &str) -> bool { + match host.parse::() { + Ok(IpAddr::V4(v4)) => v4.is_link_local(), + Ok(IpAddr::V6(v6)) => v6.is_unicast_link_local(), + Err(_) => false, } +} - let mut exfil_paths: Vec = Vec::new(); +/// Run all four formal queries against the model and emit one finding +/// per category that has at least one path. +/// +/// We deliberately do NOT gate on `filesystem_policy.readable_paths()` +/// being non-empty: the credential itself is the lever for the tracked +/// risks, not anything in `/etc/`. +pub fn check_credential_safety(model: &ReachabilityModel) -> Vec { + let mut reach_paths: Vec = Vec::new(); + let mut capability_paths: Vec = Vec::new(); + let mut bypass_paths: Vec = Vec::new(); + let mut link_local_paths: Vec = Vec::new(); for bpath in &model.binary_paths { let cap = model.binary_registry.get_or_unknown(bpath); @@ -26,228 +76,248 @@ pub fn check_data_exfiltration(model: &ReachabilityModel) -> Vec { for eid in &model.endpoints { let expr = model.can_exfil_via_endpoint(bpath, eid); + if model.check_sat(&expr) != SatResult::Sat { + continue; + } - if model.check_sat(&expr) == SatResult::Sat { - // Determine L7 status and mechanism - let ep_is_l7 = is_endpoint_l7_enforced(&model.policy, &eid.host, eid.port); - let bypass = cap.bypasses_l7(); - - let (l7_status, mut mechanism) = if bypass { - ( - "l7_bypassed".to_owned(), - format!( - "{} — uses non-HTTP protocol, bypasses L7 inspection", - cap.description - ), - ) - } else if !ep_is_l7 { - ( - "l4_only".to_owned(), - format!( - "L4-only endpoint — no HTTP inspection, {bpath} can send arbitrary data" - ), - ) - } else { - // L7 is enforced and allows write — policy is - // working as intended. Not a finding. - continue; - }; - - if !cap.exfil_mechanism.is_empty() { - mechanism = format!("{}. Exfil via: {}", mechanism, cap.exfil_mechanism); - } - - exfil_paths.push(ExfilPath { + let host_is_link_local = is_link_local(&eid.host); + let has_credential = !model.credentials.credentials_for_host(&eid.host).is_empty(); + + // Tier 1: link-local. Unconditional. Other categories not + // emitted on link-local hosts — the link-local signal is the + // story. + if host_is_link_local { + link_local_paths.push(ExfilPath { binary: bpath.clone(), endpoint_host: eid.host.clone(), endpoint_port: eid.port, - mechanism, + mechanism: format!( + "Link-local endpoint — {bpath} can reach the host's metadata range \ + (cloud-credential exfiltration territory regardless of declared scopes)" + ), policy_name: eid.policy_name.clone(), - l7_status, + category: category::LINK_LOCAL_REACH.to_string(), + method: String::new(), }); + continue; } - } - } - if exfil_paths.is_empty() { - return Vec::new(); - } - - let readable = model.policy.filesystem_policy.readable_paths(); - let has_l4_only = exfil_paths.iter().any(|p| p.l7_status == "l4_only"); - let has_bypass = exfil_paths.iter().any(|p| p.l7_status == "l7_bypassed"); - let risk = if has_l4_only || has_bypass { - RiskLevel::Critical - } else { - RiskLevel::High - }; - - let mut remediation = Vec::new(); - if has_l4_only { - remediation.push( - "Add `protocol: rest` with specific L7 rules to L4-only endpoints \ - to enable HTTP inspection and restrict to safe methods/paths." - .to_owned(), - ); - } - if has_bypass { - remediation.push( - "Binaries using non-HTTP protocols (git, ssh, nc) bypass L7 inspection. \ - Remove these binaries from the policy if write access is not intended, \ - or restrict credential scopes to read-only." - .to_owned(), - ); - } - remediation - .push("Restrict filesystem read access to only the paths the agent needs.".to_owned()); - - let paths: Vec = exfil_paths.into_iter().map(FindingPath::Exfil).collect(); - - let n_paths = paths.len(); - vec![Finding { - query: "data_exfiltration".to_owned(), - title: "Data Exfiltration Paths Detected".to_owned(), - description: format!( - "{n_paths} exfiltration path(s) found from {} readable filesystem path(s) to external endpoints.", - readable.len() - ), - risk, - paths, - remediation, - accepted: false, - accepted_reason: String::new(), - }] -} - -/// Check for write capabilities that bypass read-only policy intent. -pub fn check_write_bypass(model: &ReachabilityModel) -> Vec { - let mut bypass_paths: Vec = Vec::new(); + // Un-credentialed reach is not a tracked risk. + if !has_credential { + continue; + } - for (policy_name, rule) in &model.policy.network_policies { - for ep in &rule.endpoints { - // Only check endpoints where the intent is read-only or L4-only - let intent = ep.intent(); - if !matches!(intent, PolicyIntent::ReadOnly) { + // Tier 2: bypass-L7 binary on a credentialed host. Wire + // protocol cannot be inspected; mark and move on. + if cap.bypasses_l7() { + bypass_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "{} — uses non-HTTP protocol, bypasses L7 inspection, and a credential \ + is in scope for this host", + cap.description + ), + policy_name: eid.policy_name.clone(), + category: category::L7_BYPASS_CREDENTIALED.to_string(), + method: String::new(), + }); continue; } - for port in ep.effective_ports() { - for b in &rule.binaries { - let cap = model.binary_registry.get_or_unknown(&b.path); - - // Check: binary bypasses L7 and can write - if cap.bypasses_l7() && cap.can_write() { - let cred_actions = collect_credential_actions(model, &ep.host, &cap); - if !cred_actions.is_empty() - || model.credentials.credentials_for_host(&ep.host).is_empty() - { - bypass_paths.push(WriteBypassPath { - binary: b.path.clone(), - endpoint_host: ep.host.clone(), - endpoint_port: port, - policy_name: policy_name.clone(), - policy_intent: intent.to_string(), - bypass_reason: "l7_bypass_protocol".to_owned(), - credential_actions: cred_actions, - }); - } - } - - // Check: L4-only endpoint + binary can construct HTTP + credential has write - if !ep.is_l7_enforced() && cap.can_construct_http { - let cred_actions = collect_credential_actions(model, &ep.host, &cap); - if !cred_actions.is_empty() { - bypass_paths.push(WriteBypassPath { - binary: b.path.clone(), - endpoint_host: ep.host.clone(), - endpoint_port: port, - policy_name: policy_name.clone(), - policy_intent: intent.to_string(), - bypass_reason: "l4_only".to_owned(), - credential_actions: cred_actions, - }); - } - } - } + // Tiers 3 + 4: credentialed L7 reach. We emit both + // credential_reach_expansion and capability_expansion paths + // here; the gateway's delta will keep only the relevant + // category (see `finding_delta` and the suppression rule). + reach_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "Binary {bpath} has credentialed reach to {host}:{port}", + host = eid.host, + port = eid.port, + ), + policy_name: eid.policy_name.clone(), + category: category::CREDENTIAL_REACH_EXPANSION.to_string(), + method: String::new(), + }); + + // One capability_expansion path per allowed method on this + // (binary, host:port) under this specific rule. + let methods = endpoint_allowed_methods_in_rule( + &model.policy, + &eid.policy_name, + &eid.host, + eid.port, + ); + for method in methods { + capability_paths.push(ExfilPath { + binary: bpath.clone(), + endpoint_host: eid.host.clone(), + endpoint_port: eid.port, + mechanism: format!( + "Method {method} allowed for {bpath} on {host}:{port}", + host = eid.host, + port = eid.port, + ), + policy_name: eid.policy_name.clone(), + category: category::CAPABILITY_EXPANSION.to_string(), + method, + }); } } } - if bypass_paths.is_empty() { - return Vec::new(); + let mut findings = Vec::new(); + if !link_local_paths.is_empty() { + findings.push(build_finding( + category::LINK_LOCAL_REACH, + "Link-Local Reach", + "Reach to a host in a link-local range — cloud-metadata territory.", + link_local_paths, + vec![ + "Endpoint host is in a link-local range (cloud-metadata territory). \ + Sandboxes should not reach these endpoints — reaching them can return \ + host credentials the sandbox should not have." + .to_owned(), + ], + )); } + if !bypass_paths.is_empty() { + findings.push(build_finding( + category::L7_BYPASS_CREDENTIALED, + "L7-Bypass Binary with Credential in Scope", + "A binary using a wire protocol the L7 proxy cannot inspect has reach to \ + a host where a sandbox credential is in scope.", + bypass_paths, + vec![ + "Binaries using non-HTTP protocols (git, ssh, nc) bypass L7 inspection. \ + Remove these binaries from the policy if credentialed write access is \ + not intended." + .to_owned(), + ], + )); + } + if !reach_paths.is_empty() { + findings.push(build_finding( + category::CREDENTIAL_REACH_EXPANSION, + "Credentialed Reach Expansion", + "A binary gained credentialed reach to a (host, port) it could not reach \ + before.", + reach_paths, + vec![ + "Credentialed reach is a privileged action surface. A human reviewer \ + should confirm the binary should be able to authenticate to this host \ + at all." + .to_owned(), + ], + )); + } + if !capability_paths.is_empty() { + findings.push(build_finding( + category::CAPABILITY_EXPANSION, + "Capability Expansion on Credentialed Host", + "New methods were added on a (binary, host, port) that already had \ + credentialed reach. The agent is changing what the sandbox can do with \ + its credentials.", + capability_paths, + vec![ + "A capability expansion is a stated intent change. The reviewer should \ + confirm the new methods (especially mutating methods like PUT, POST, \ + PATCH, DELETE) are part of the agent's task." + .to_owned(), + ], + )); + } + findings +} - let n = bypass_paths.len(); - let paths: Vec = bypass_paths - .into_iter() - .map(FindingPath::WriteBypass) - .collect(); - - vec![Finding { - query: "write_bypass".to_owned(), - title: "Write Bypass Detected — Read-Only Intent Violated".to_owned(), - description: format!("{n} path(s) allow write operations despite read-only policy intent."), - risk: RiskLevel::High, - paths, - remediation: vec![ - "For L4-only endpoints: add `protocol: rest` with `access: read-only` \ - to enable HTTP method filtering." - .to_owned(), - "For L7-bypassing binaries (git, ssh, nc): remove them from the policy's \ - binary list if write access is not intended." - .to_owned(), - "Restrict credential scopes to read-only where possible.".to_owned(), - ], +fn build_finding( + query: &str, + title: &str, + description: &str, + paths: Vec, + remediation: Vec, +) -> Finding { + let n = paths.len(); + Finding { + query: query.to_owned(), + title: title.to_owned(), + // Per-finding description prefixes the count with the category's + // canonical sentence so the audit string is self-describing. + description: format!("{description} ({n} path(s).)"), + paths: paths.into_iter().map(FindingPath::Exfil).collect(), + remediation, accepted: false, accepted_reason: String::new(), - }] + } } -/// Run both verification queries. +/// Run all queries (single entry point for end-to-end callers). pub fn run_all_queries(model: &ReachabilityModel) -> Vec { - let mut findings = Vec::new(); - findings.extend(check_data_exfiltration(model)); - findings.extend(check_write_bypass(model)); - findings + check_credential_safety(model) } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- -/// Check whether an endpoint in the policy is L7-enforced. -fn is_endpoint_l7_enforced(policy: &crate::policy::PolicyModel, host: &str, port: u16) -> bool { - for rule in policy.network_policies.values() { - for ep in &rule.endpoints { - if ep.host == host && ep.effective_ports().contains(&port) { - return ep.is_l7_enforced(); - } +/// Allowed HTTP methods for the endpoint in `policy.network_policies[policy_name]` +/// matching `(host, port)`. Returns empty when the rule or endpoint is not +/// found (e.g. SAT path threaded through a stale model). +fn endpoint_allowed_methods_in_rule( + policy: &crate::policy::PolicyModel, + policy_name: &str, + host: &str, + port: u16, +) -> HashSet { + let Some(rule) = policy.network_policies.get(policy_name) else { + return HashSet::new(); + }; + for ep in &rule.endpoints { + if ep.host.eq_ignore_ascii_case(host) && ep.effective_ports().contains(&port) { + return ep.allowed_methods(); } } - false + HashSet::new() } -/// Collect human-readable credential action descriptions for a host. -fn collect_credential_actions( - model: &ReachabilityModel, - host: &str, - _cap: &crate::registry::BinaryCapability, -) -> Vec { - let creds = model.credentials.credentials_for_host(host); - let api = model.credentials.api_for_host(host); - let mut actions = Vec::new(); - - for cred in &creds { - if let Some(api) = api { - for wa in api.write_actions_for_scopes(&cred.scopes) { - actions.push(format!("{} {} ({})", wa.method, wa.path, wa.action)); - } - } else { - actions.push(format!( - "credential '{}' has scopes: {:?}", - cred.name, cred.scopes - )); - } +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn is_link_local_recognises_ipv4_169_254() { + assert!(is_link_local("169.254.169.254")); + assert!(is_link_local("169.254.0.1")); + assert!(is_link_local("169.254.255.255")); + } + + #[test] + fn is_link_local_recognises_ipv6_fe80() { + assert!(is_link_local("fe80::1")); + assert!(is_link_local("fe80::abcd:ef01")); + } + + #[test] + fn is_link_local_rejects_non_link_local_ips() { + assert!(!is_link_local("8.8.8.8")); + assert!(!is_link_local("10.0.0.1")); + assert!(!is_link_local("192.168.1.1")); + assert!(!is_link_local("::1")); + assert!(!is_link_local("2001:db8::1")); + } + + #[test] + fn is_link_local_rejects_hostnames() { + assert!(!is_link_local("api.github.com")); + assert!(!is_link_local("metadata.google.internal")); + assert!(!is_link_local("")); } - actions } diff --git a/crates/openshell-prover/src/report.rs b/crates/openshell-prover/src/report.rs index 27207a6ae..f250eb1cd 100644 --- a/crates/openshell-prover/src/report.rs +++ b/crates/openshell-prover/src/report.rs @@ -2,191 +2,122 @@ // SPDX-License-Identifier: Apache-2.0 //! Terminal report rendering (full and compact). +//! +//! The prover output is categorical, not severity-graded. Each finding +//! names *what* the policy change does (e.g., `capability_expansion`); +//! per-path evidence carries the structured detail. There is no HIGH / +//! MEDIUM / CRITICAL grade — the category itself is the signal. -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet}; use std::path::Path; use owo_colors::OwoColorize; -use crate::finding::{Finding, FindingPath, RiskLevel}; +use crate::finding::{Finding, FindingPath, category}; // --------------------------------------------------------------------------- -// Compact titles (short labels for each query type) +// Category labels (display strings keyed off `Finding.query`) // --------------------------------------------------------------------------- -fn compact_title(query: &str) -> &str { +fn category_label(query: &str) -> &str { match query { - "data_exfiltration" => "Data exfiltration possible", - "write_bypass" => "Write bypass \u{2014} read-only intent violated", - _ => "Unknown finding", + category::LINK_LOCAL_REACH => "link-local reach", + category::L7_BYPASS_CREDENTIALED => "L7-bypass binary with credential", + category::CREDENTIAL_REACH_EXPANSION => "credentialed reach expansion", + category::CAPABILITY_EXPANSION => "capability expansion on credentialed host", + _ => "unknown finding", } } // --------------------------------------------------------------------------- -// Compact detail line +// One-line shorthand (used by the gateway's `validation_result`) // --------------------------------------------------------------------------- -fn compact_detail(finding: &Finding) -> String { - match finding.query.as_str() { - "data_exfiltration" => { - let mut by_status: HashMap<&str, HashSet> = HashMap::new(); - for path in &finding.paths { - if let FindingPath::Exfil(p) = path { - by_status - .entry(&p.l7_status) - .or_default() - .insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); - } - } - let mut parts = Vec::new(); - if let Some(eps) = by_status.get("l4_only") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "L4-only: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - if let Some(eps) = by_status.get("l7_bypassed") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "wire protocol bypass: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - if let Some(eps) = by_status.get("l7_allows_write") { - let mut sorted: Vec<&String> = eps.iter().collect(); - sorted.sort(); - parts.push(format!( - "L7 write: {}", - sorted - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", ") - )); - } - parts.join("; ") - } - "write_bypass" => { - let mut reasons = HashSet::new(); - let mut endpoints = HashSet::new(); - for path in &finding.paths { - if let FindingPath::WriteBypass(p) = path { - reasons.insert(p.bypass_reason.as_str()); - endpoints.insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); - } - } - let mut sorted_eps: Vec<&String> = endpoints.iter().collect(); - sorted_eps.sort(); - let ep_list = sorted_eps - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", "); - if reasons.contains("l4_only") && reasons.contains("l7_bypass_protocol") { - format!("L4-only + wire protocol: {ep_list}") - } else if reasons.contains("l4_only") { - format!("L4-only (no inspection): {ep_list}") - } else if reasons.contains("l7_bypass_protocol") { - format!("wire protocol bypasses L7: {ep_list}") - } else { - String::new() - } - } - _ => String::new(), - } -} - -// --------------------------------------------------------------------------- -// Risk formatting -// --------------------------------------------------------------------------- - -fn risk_label(risk: RiskLevel) -> String { - match risk { - RiskLevel::Critical => "CRITICAL".to_owned(), - RiskLevel::High => "HIGH".to_owned(), +/// Render a finding as one or more single-line strings, suitable for +/// embedding in the gateway `validation_result`, demo output, and logs. +/// +/// Shape: `: ` — one line per path. The +/// gateway concatenates these into the chunk's `validation_result` so +/// the reviewer reads what changed without parsing the category enum. +pub fn finding_shorthand(finding: &Finding) -> String { + let mut lines = Vec::new(); + for path in &finding.paths { + let FindingPath::Exfil(p) = path; + lines.push(format_path_line(&finding.query, p)); } + lines.join("\n ") } -fn print_risk_label(risk: RiskLevel) { - match risk { - RiskLevel::Critical => print!("{}", "CRITICAL".bold().red()), - RiskLevel::High => print!("{}", " HIGH".red()), +fn format_path_line(query: &str, p: &crate::finding::ExfilPath) -> String { + let endpoint = format!("{}:{}", p.endpoint_host, p.endpoint_port); + match query { + category::LINK_LOCAL_REACH => { + format!("link_local_reach: {endpoint} via {}", p.binary) + } + category::L7_BYPASS_CREDENTIALED => { + format!("l7_bypass_credentialed: {endpoint} via {}", p.binary) + } + category::CREDENTIAL_REACH_EXPANSION => { + format!("credential_reach_expansion: {endpoint} via {}", p.binary) + } + category::CAPABILITY_EXPANSION => { + format!( + "capability_expansion: {method} on {endpoint} via {bin}", + method = p.method, + bin = p.binary + ) + } + _ => format!("{query}: {endpoint} via {}", p.binary), } } // --------------------------------------------------------------------------- -// Compact output +// Compact output (CLI lint mode) // --------------------------------------------------------------------------- -/// Render compact output (one-line-per-finding for demos and CI). -/// Returns exit code: 0 = pass, 1 = critical/high found. +/// Render compact output (one-line-per-finding-line for demos and CI). +/// Returns exit code: 0 = pass, 1 = any findings present. pub fn render_compact(findings: &[Finding], _policy_path: &str, _credentials_path: &str) -> i32 { let active: Vec<&Finding> = findings.iter().filter(|f| !f.accepted).collect(); let accepted: Vec<&Finding> = findings.iter().filter(|f| f.accepted).collect(); for finding in &active { - print!(" "); - print_risk_label(finding.risk); - println!(" {}", compact_title(&finding.query)); - let detail = compact_detail(finding); - if !detail.is_empty() { - println!(" {detail}"); + for path in &finding.paths { + let FindingPath::Exfil(p) = path; + println!(" {} {}", "•".yellow(), format_path_line(&finding.query, p)); + } + if !finding.paths.is_empty() { + println!(); } - println!(); } for finding in &accepted { println!( - " {} {}", + " {} {}", "ACCEPTED".dimmed(), - compact_title(&finding.query).dimmed() + category_label(&finding.query).dimmed() ); } if !accepted.is_empty() { println!(); } - // Verdict - let mut counts: HashMap = HashMap::new(); - for f in &active { - *counts.entry(f.risk).or_default() += 1; - } - let has_critical = counts.contains_key(&RiskLevel::Critical); - let has_high = counts.contains_key(&RiskLevel::High); let accepted_note = if accepted.is_empty() { String::new() } else { format!(", {} accepted", accepted.len()) }; - if has_critical || has_high { - let n = counts.get(&RiskLevel::Critical).unwrap_or(&0) - + counts.get(&RiskLevel::High).unwrap_or(&0); + let path_count: usize = active.iter().map(|f| f.paths.len()).sum(); + if path_count > 0 { println!( - " {} {n} critical/high gaps{accepted_note}", - " FAIL ".white().bold().on_red() + " {} {path_count} finding path(s) require review{accepted_note}", + " REVIEW ".black().bold().on_yellow() ); 1 - } else if !active.is_empty() { - println!( - " {} advisories only{accepted_note}", - " PASS ".black().bold().on_yellow() - ); - 0 } else { println!( - " {} all findings accepted{accepted_note}", + " {} no findings{accepted_note}", " PASS ".white().bold().on_green() ); 0 @@ -198,7 +129,7 @@ pub fn render_compact(findings: &[Finding], _policy_path: &str, _credentials_pat // --------------------------------------------------------------------------- /// Render a full terminal report with finding panels. -/// Returns exit code: 0 = pass, 1 = critical/high found. +/// Returns exit code: 0 = pass, 1 = any findings present. pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: &str) -> i32 { let policy_name = Path::new(policy_path) .file_name() @@ -221,50 +152,36 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: let active: Vec<&Finding> = findings.iter().filter(|f| !f.accepted).collect(); let accepted: Vec<&Finding> = findings.iter().filter(|f| f.accepted).collect(); - // Summary - let mut counts: HashMap = HashMap::new(); + // Per-category summary + let mut counts: BTreeMap<&str, usize> = BTreeMap::new(); for f in &active { - *counts.entry(f.risk).or_default() += 1; + *counts.entry(f.query.as_str()).or_default() += f.paths.len(); + } + + if active.is_empty() && accepted.is_empty() { + println!("{}", "No findings. Policy posture is clean.".green().bold()); + return 0; } println!("{}", "Finding Summary".bold().underline()); - for level in [RiskLevel::Critical, RiskLevel::High] { - if let Some(&count) = counts.get(&level) { - match level { - RiskLevel::Critical => { - println!(" {:>10} {count}", "CRITICAL".bold().red()); - } - RiskLevel::High => println!(" {:>10} {count}", "HIGH".red()), - } - } + for (query, count) in &counts { + println!(" {:>40} {count} path(s)", category_label(query).yellow()); } if !accepted.is_empty() { - println!(" {:>10} {}", "ACCEPTED".dimmed(), accepted.len()); + println!(" {:>40} {}", "ACCEPTED".dimmed(), accepted.len()); } println!(); - if active.is_empty() && accepted.is_empty() { - println!("{}", "No findings. Policy posture is clean.".green().bold()); - return 0; - } - - // Per-finding details for (i, finding) in active.iter().enumerate() { - let label = risk_label(finding.risk); - let border = match finding.risk { - RiskLevel::Critical => format!("{}", format!("[{label}]").bold().red()), - RiskLevel::High => format!("{}", format!("[{label}]").red()), - }; - - println!("--- Finding #{} {border} ---", i + 1); + println!( + "--- Finding #{} [{}] ---", + i + 1, + category_label(&finding.query) + ); println!(" {}", finding.title.bold()); println!(" {}", finding.description); println!(); - - // Render paths render_paths(&finding.paths); - - // Remediation if !finding.remediation.is_empty() { println!(" {}", "Remediation:".bold()); for r in &finding.remediation { @@ -274,13 +191,12 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: } } - // Accepted findings if !accepted.is_empty() { - println!("{}", "--- Accepted Risks ---".dimmed()); + println!("{}", "--- Accepted Findings ---".dimmed()); for finding in &accepted { println!( " {} {}", - risk_label(finding.risk).dimmed(), + category_label(&finding.query).dimmed(), finding.title.dimmed() ); println!( @@ -291,33 +207,20 @@ pub fn render_report(findings: &[Finding], policy_path: &str, credentials_path: } } - // Verdict - let has_critical = counts.contains_key(&RiskLevel::Critical); - let has_high = counts.contains_key(&RiskLevel::High); + let path_count: usize = active.iter().map(|f| f.paths.len()).sum(); let accepted_note = if accepted.is_empty() { String::new() } else { format!(" ({} accepted)", accepted.len()) }; - - if has_critical { - println!( - "{}{accepted_note}", - "FAIL \u{2014} Critical gaps found.".bold().red() - ); - 1 - } else if has_high { + if path_count > 0 { println!( "{}{accepted_note}", - "FAIL \u{2014} High-risk gaps found.".bold().red() + "REVIEW \u{2014} prover findings require human attention." + .bold() + .yellow() ); 1 - } else if !active.is_empty() { - println!( - "{}{accepted_note}", - "PASS \u{2014} Advisories only.".bold().yellow() - ); - 0 } else { println!( "{}{accepted_note}", @@ -331,63 +234,134 @@ fn render_paths(paths: &[FindingPath]) { if paths.is_empty() { return; } - - match &paths[0] { - FindingPath::Exfil(_) => render_exfil_paths(paths), - FindingPath::WriteBypass(_) => render_write_bypass_paths(paths), - } -} - -fn render_exfil_paths(paths: &[FindingPath]) { - println!( - " {:<30} {:<25} {:<15} {}", - "Binary".bold(), - "Endpoint".bold(), - "L7 Status".bold(), - "Mechanism".bold(), - ); + // Group paths by binary for compact display. + let mut by_binary: BTreeMap<&str, Vec<&crate::finding::ExfilPath>> = BTreeMap::new(); for path in paths { - if let FindingPath::Exfil(p) = path { - let l7_display = match p.l7_status.as_str() { - "l4_only" => format!("{}", "L4-only".red()), - "l7_bypassed" => format!("{}", "bypassed".red()), - "l7_allows_write" => format!("{}", "L7 write".yellow()), - _ => p.l7_status.clone(), - }; - let ep = format!("{}:{}", p.endpoint_host, p.endpoint_port); - // Truncate mechanism for display - let mech = if p.mechanism.len() > 50 { - format!("{}...", &p.mechanism[..47]) - } else { - p.mechanism.clone() - }; - println!(" {:<30} {:<25} {:<15} {}", p.binary, ep, l7_display, mech); + let FindingPath::Exfil(p) = path; + by_binary.entry(&p.binary).or_default().push(p); + } + for (binary, ps) in &by_binary { + println!(" Binary: {}", binary.cyan()); + let mut endpoints: BTreeSet = BTreeSet::new(); + let mut methods: BTreeSet = BTreeSet::new(); + for p in ps { + endpoints.insert(format!("{}:{}", p.endpoint_host, p.endpoint_port)); + if !p.method.is_empty() { + methods.insert(p.method.clone()); + } + } + println!( + " Endpoints: {}", + endpoints.iter().cloned().collect::>().join(", ") + ); + if !methods.is_empty() { + println!( + " Methods: {}", + methods.iter().cloned().collect::>().join(", ") + ); } } println!(); } -fn render_write_bypass_paths(paths: &[FindingPath]) { - println!( - " {:<30} {:<25} {:<15} {}", - "Binary".bold(), - "Endpoint".bold(), - "Bypass".bold(), - "Intent".bold(), - ); - for path in paths { - if let FindingPath::WriteBypass(p) = path { - let ep = format!("{}:{}", p.endpoint_host, p.endpoint_port); - let bypass_display = match p.bypass_reason.as_str() { - "l4_only" => format!("{}", "L4-only".red()), - "l7_bypass_protocol" => format!("{}", "wire proto".red()), - _ => p.bypass_reason.clone(), - }; - println!( - " {:<30} {:<25} {:<15} {}", - p.binary, ep, bypass_display, p.policy_intent - ); +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::finding::ExfilPath; + + fn exfil_path(category_name: &str, method: &str, host: &str, port: u16) -> ExfilPath { + ExfilPath { + binary: "/usr/bin/curl".to_owned(), + endpoint_host: host.to_owned(), + endpoint_port: port, + mechanism: String::new(), + policy_name: "rule".to_owned(), + category: category_name.to_owned(), + method: method.to_owned(), } } - println!(); + + fn finding_with(category_name: &str, paths: Vec) -> Finding { + Finding { + query: category_name.to_owned(), + title: "test".to_owned(), + description: String::new(), + paths: paths.into_iter().map(FindingPath::Exfil).collect(), + remediation: vec![], + accepted: false, + accepted_reason: String::new(), + } + } + + #[test] + fn shorthand_renders_capability_expansion_with_method() { + let f = finding_with( + category::CAPABILITY_EXPANSION, + vec![exfil_path( + category::CAPABILITY_EXPANSION, + "PUT", + "api.github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "capability_expansion: PUT on api.github.com:443 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_credential_reach_expansion() { + let f = finding_with( + category::CREDENTIAL_REACH_EXPANSION, + vec![exfil_path( + category::CREDENTIAL_REACH_EXPANSION, + "", + "uploads.github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "credential_reach_expansion: uploads.github.com:443 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_link_local() { + let f = finding_with( + category::LINK_LOCAL_REACH, + vec![exfil_path( + category::LINK_LOCAL_REACH, + "", + "169.254.169.254", + 80, + )], + ); + assert_eq!( + finding_shorthand(&f), + "link_local_reach: 169.254.169.254:80 via /usr/bin/curl" + ); + } + + #[test] + fn shorthand_renders_l7_bypass() { + let f = finding_with( + category::L7_BYPASS_CREDENTIALED, + vec![exfil_path( + category::L7_BYPASS_CREDENTIALED, + "", + "github.com", + 443, + )], + ); + assert_eq!( + finding_shorthand(&f), + "l7_bypass_credentialed: github.com:443 via /usr/bin/curl" + ); + } } diff --git a/crates/openshell-sandbox/src/skills/policy_advisor.md b/crates/openshell-sandbox/src/skills/policy_advisor.md index 8ca64f977..724d17b66 100644 --- a/crates/openshell-sandbox/src/skills/policy_advisor.md +++ b/crates/openshell-sandbox/src/skills/policy_advisor.md @@ -46,8 +46,14 @@ operations. Each `addRule` carries a complete narrow `NetworkPolicyRule`. `port`, `binary`, `rule_missing`, and `detail` as evidence. 2. Fetch the current policy from `/v1/policy/current`. 3. Fetch recent denials from `/v1/denials` if the response body is incomplete. -4. Prefer L7 REST rules for REST APIs. Use L4 only for non-REST protocols or - when the client tunnels opaque traffic that OpenShell cannot inspect. +4. Prefer L7 REST rules for REST APIs. **Proposals against hosts where no + credential is in scope auto-approve** (see Auto-approval below). Any + credentialed reach or capability change goes to human review — that is + the design. L7 is still the agent-speed path because the prover can + precisely describe the change (which method was added on which path); + L4 to a credentialed host loses that precision. Use L4 only when the + binary's wire protocol is opaque to L7 inspection (`ssh`, `nc`, + `git-remote-http`) or the host has no documented REST surface. 5. Draft the narrowest rule: exact host, exact port, exact binary when known, exact method, and the smallest safe path. 6. Submit the proposal, save `accepted_chunk_ids` from the response, and @@ -119,10 +125,86 @@ A complete narrow REST-inspected rule looks like this: } ``` +## Auto-approval + +Auto-approval is opt-in via the `proposal_approval_mode` setting, +managed through the standard settings model. Reviewers set it at the +gateway scope (fleet-wide) with `openshell settings set --global +proposal_approval_mode auto` or at the sandbox scope with `openshell +settings set proposal_approval_mode auto`. The CLI's `openshell +sandbox create --approval-mode auto` is a shorthand that writes the +sandbox-scoped setting at create time. Gateway scope wins when both are +set; the default (no setting) is `"manual"`. + +When auto-approval is enabled and the prover finds nothing new, the +gateway approves the chunk with actor `system:auto` and the +`CONFIG:APPROVED` audit event carries `auto=true`, `source=`, +`prover_delta=empty`, and `resolved_from=`. The +agent's `/wait` returns approved in ~1 second. When the prover does +find something — or the setting is `"manual"`/unset — the chunk lands +in `pending` for human review. + +The prover answers four formal questions about each proposed change. +Each "yes" answer is its own categorical finding — there is no +severity grade. Any finding blocks auto-approval. + +- **`link_local_reach`** — the proposal grants reach to a link-local IP + range (`169.254.0.0/16`, `fe80::/10`). Cloud metadata endpoints like + `169.254.169.254` live here. **Never** propose access to these — + these endpoints serve credentials regardless of what the sandbox + itself holds. +- **`l7_bypass_credentialed`** — the proposal lets a binary using a + wire protocol the L7 proxy cannot inspect (`/usr/bin/git`, + `/usr/lib/git-core/git-remote-http`, `/usr/bin/ssh`, `/usr/bin/nc`) + reach a host where a sandbox credential is in scope. Wire protocols + opaque to L7 are unbounded by L7 scoping; the reviewer must decide + whether to trust the binary with the credential. +- **`credential_reach_expansion`** — the proposal grants a binary + credentialed reach to a (host, port) it could not reach before. New + authenticated reach is a stated intent change — the reviewer + confirms whether the binary should be able to authenticate to the + host at all. +- **`capability_expansion`** — the proposal adds a new HTTP method on + a (binary, host, port) that already had credentialed reach. The + reviewer sees exactly which method was added and decides if it's + part of the agent's task. Mutating methods (PUT, POST, PATCH, + DELETE) are typical sources of this finding. + +What auto-approves (under `auto` mode): + +- Proposals where the prover finds zero of the four categories — for + example, L7 rules against hosts with no credential in scope + (public-content fetches from CDNs, schema URLs, public API + discovery). + +If your proposal escalates and you'd like it to auto-approve, look +first at whether the host actually needs a credentialed binary. A +public-content GET often doesn't, and switching to a different host +(or removing the credential dependency) makes the finding go away. +Credentialed mutations are *supposed* to escalate — propose the +narrow rule and wait for review. + +## Refining an earlier auto-suggested rule + +When the sandbox observes a denial it cannot scope to L7 — e.g., a binary +trying to connect to a host the proxy hasn't seen at the application layer +— it auto-drafts a broad L4 proposal so the operator has something concrete +to look at. These mechanistic drafts are visible to you alongside any other +pending proposals. + +If you see a pending mechanistic L4 draft you can do better than, just +submit a refined L7 proposal for the same `(host, port, binary)`. The +gateway will automatically reject the mechanistic draft with reason +"superseded by chunk X" — no extra cleanup or `supersedes_chunk_id` needed. +The new submission wins by structural overlap. + ## Norms - Do not propose wildcard hosts such as `**` or `*.com`. - Do not propose `access: full` to fix a single denied REST request. +- Do not propose access to link-local addresses (`169.254.0.0/16`, + `fe80::/10`). Cloud-metadata endpoints there can hand out the host's + credentials. - Do not include query strings, tokens, credentials, or secret values in paths. - Explain uncertainty in `intent_summary` instead of widening the rule. diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index 4bbfe24fc..9c3e11eec 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -22,6 +22,7 @@ openshell-driver-kubernetes = { path = "../openshell-driver-kubernetes" } openshell-driver-podman = { path = "../openshell-driver-podman" } openshell-ocsf = { path = "../openshell-ocsf" } openshell-policy = { path = "../openshell-policy" } +openshell-prover = { path = "../openshell-prover" } openshell-providers = { path = "../openshell-providers" } openshell-router = { path = "../openshell-router" } diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 315b06f3c..aac8a0fcc 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -45,11 +45,21 @@ use openshell_ocsf::{ }; use openshell_policy::{ PolicyMergeOp, ProviderPolicyLayer, compose_effective_policy, merge_policy, + serialize_sandbox_policy, +}; +use openshell_prover::{ + credentials::{Credential, CredentialSet}, + finding::{Finding, FindingPath}, + model::build_model, + policy::parse_policy_str, + queries::run_all_queries, + registry::load_embedded_binary_registry, + report::finding_shorthand, }; use openshell_providers::{get_default_profile, normalize_provider_type}; use prost::Message; use sha2::{Digest, Sha256}; -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr}; use std::sync::Arc; use tonic::{Request, Response, Status}; @@ -91,6 +101,45 @@ fn emit_gateway_policy_audit_log( detail, version, policy_hash, + &[], + ); + info!( + target: OCSF_TARGET, + sandbox_id = %sandbox_id, + message = %message + ); +} + +/// Emit a `CONFIG:APPROVED` audit event for an auto-approval — same event +/// class as a human approval, with extra unmapped fields carrying the +/// safety reasoning so the audit is reconstructable. `source` records the +/// proposer (`mechanistic` or `agent_authored`) for provenance. +/// `resolved_from` records the scope that supplied the `auto` mode setting +/// (`gateway`, `sandbox`, or `default`) so operators can see why a given +/// approval was auto vs manual. +fn emit_gateway_policy_auto_approve_audit_log( + sandbox_id: &str, + sandbox_name: &str, + detail: impl Into, + version: i64, + policy_hash: &str, + source: &str, + resolved_from: &str, +) { + let extra = [ + ("auto", "true".to_string()), + ("source", source.to_string()), + ("prover_delta", "empty".to_string()), + ("resolved_from", resolved_from.to_string()), + ]; + let message = build_gateway_policy_audit_message( + sandbox_id, + sandbox_name, + "approved", + detail, + version, + policy_hash, + &extra, ); info!( target: OCSF_TARGET, @@ -106,6 +155,7 @@ fn build_gateway_policy_audit_message( detail: impl Into, version: i64, policy_hash: &str, + extra_fields: &[(&str, String)], ) -> String { let ctx = SandboxContext { sandbox_id: sandbox_id.to_string(), @@ -127,6 +177,9 @@ fn build_gateway_policy_audit_message( if !policy_hash.is_empty() { builder = builder.unmapped("policy_hash", policy_hash.to_string()); } + for (key, value) in extra_fields { + builder = builder.unmapped(key, value.clone()); + } let event: OcsfEvent = builder.build(); event.format_shorthand() } @@ -304,311 +357,643 @@ fn summarize_draft_chunk_rule(chunk: &DraftChunkRecord) -> Result String { - let mut chars = input.chars(); - let truncated: String = chars.by_ref().take(max_chars).collect(); - if chars.next().is_some() { - format!("{truncated}...") - } else { - truncated +/// Run prover queries against the merged policy and render a short +/// human-readable verdict for the reviewer. The verdict reports only the +/// **delta** — findings the proposal introduces on top of the current policy. +/// Baseline gaps (pre-existing findings) are intentionally not surfaced here; +/// they belong on a posture surface, not on the per-proposal approval moment. +/// +/// The string is the entire output — no taxonomy, no greppable prefixes; the +/// reviewer reads it like an OCSF shorthand line. One of: +/// +/// - `prover: no new findings` +/// - `prover: N new finding(s)` followed by one ` : ` +/// line per finding path (categorical shorthand from `openshell-prover`) +/// - `merge failed: ` — proposal won't merge into the current +/// policy +/// - `policy invalid: ` — merged policy fails the cheap +/// structural safety check +/// - `validation unavailable` — gateway-side infrastructure failure (registry +/// load, YAML serialize/parse). Internal error detail is logged via +/// `warn!`, never exposed to the reviewer. +fn validation_result_for_agent_proposal( + current_policy: ProtoSandboxPolicy, + rule_name: &str, + proposed_rule: &NetworkPolicyRule, + credentials: &CredentialSet, +) -> String { + let merge_op = PolicyMergeOp::AddRule { + rule_name: rule_name.to_string(), + rule: proposed_rule.clone(), + }; + let merged = match merge_policy(current_policy.clone(), &[merge_op]) { + Ok(result) => result.policy, + Err(error) => return format!("merge failed: {}", one_line(&error.to_string())), + }; + if let Err(error) = validate_policy_safety(&merged) { + return format!("policy invalid: {}", one_line(&error.to_string())); } -} -fn is_sandbox_caller(request: &Request) -> bool { - oidc::is_sandbox_caller(request.metadata()) -} + let merged_findings = match run_prover_findings(&merged, credentials) { + Ok(findings) => findings, + Err(error) => { + warn!(error = %error, "prover validation unavailable for merged policy"); + return "validation unavailable".to_string(); + } + }; + // If the baseline prover run fails (e.g. the current policy uses a shape + // the prover hasn't caught up to yet), fall back to an empty baseline so + // every merged finding surfaces as new. Safer to over-warn than miss a + // real regression introduced by the proposal. + let base_findings = match run_prover_findings(¤t_policy, credentials) { + Ok(findings) => findings, + Err(error) => { + warn!(error = %error, "prover baseline run failed; treating baseline as empty"); + Vec::new() + } + }; -/// Sandbox-class callers may only perform sandbox-scoped policy sync. They -/// must not mutate global config or sandbox settings. -fn validate_sandbox_caller_update(req: &UpdateConfigRequest) -> Result<(), Status> { - if req.global { - return Err(Status::permission_denied( - "sandbox callers cannot mutate global config", - )); - } - if req.delete_setting { - return Err(Status::permission_denied( - "sandbox callers cannot delete settings", - )); - } - if req.name.trim().is_empty() { - return Err(Status::permission_denied( - "sandbox callers may only perform sandbox policy sync", - )); + let new_findings = finding_delta(&base_findings, &merged_findings); + if new_findings.is_empty() { + return "prover: no new findings".to_string(); } - if req.policy.is_none() || !req.setting_key.trim().is_empty() { - return Err(Status::permission_denied( - "sandbox callers may only perform sandbox policy sync", - )); + let count = new_findings.len(); + let mut out = format!( + "prover: {} new finding{}", + count, + if count == 1 { "" } else { "s" } + ); + for finding in &new_findings { + out.push_str("\n "); + out.push_str(&finding_shorthand(finding)); } - Ok(()) + out } -// --------------------------------------------------------------------------- -// Config handlers -// --------------------------------------------------------------------------- +/// Run the prover end-to-end against a single policy with the given +/// credential set. Returns the raw finding list, or a short error string +/// identifying which infrastructure step failed. +/// +/// The credential set is passed in because it's stable across all chunks in +/// one `SubmitPolicyAnalysis` batch — the caller builds it once and shares. +fn run_prover_findings( + policy: &ProtoSandboxPolicy, + credentials: &CredentialSet, +) -> Result, String> { + let yaml = + serialize_sandbox_policy(policy).map_err(|e| format!("serialize policy failed: {e}"))?; + let prover_policy = parse_policy_str(&yaml).map_err(|e| format!("parse policy failed: {e}"))?; + let registry = + load_embedded_binary_registry().map_err(|e| format!("load registry failed: {e}"))?; + let model = build_model(prover_policy, credentials.clone(), registry); + Ok(run_all_queries(&model)) +} -pub(super) async fn handle_get_sandbox_config( - state: &Arc, - request: Request, -) -> Result, Status> { - let sandbox_id = request.into_inner().sandbox_id; +/// Build a `CredentialSet` for the sandbox by walking its attached providers. +/// +/// v1 models "credential is present in scope for these hosts" — no scope +/// modeling. Each attached provider produces one [`Credential`] entry whose +/// `target_hosts` lists the hosts from the provider's profile endpoints. +/// Missing providers or providers whose type has no profile are skipped with +/// a `warn!` — the merged policy already excludes them at compose time, so +/// silently treating them as absent here keeps the credential set consistent +/// with the merged policy the prover validates against. +async fn build_credential_set_for_sandbox( + store: &Store, + provider_names: &[String], +) -> Result { + let mut credentials = Vec::new(); - let sandbox = state - .store - .get_message::(&sandbox_id) - .await - .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? - .ok_or_else(|| Status::not_found("sandbox not found"))?; - let sandbox_provider_names = sandbox - .spec - .as_ref() - .map(|spec| spec.providers.clone()) - .unwrap_or_default(); + for name in provider_names { + let Some(provider) = store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? + else { + warn!(provider_name = %name, "provider not found while building credential set; skipping"); + continue; + }; - // Try to get the latest policy from the policy history table. - let latest = state - .store - .get_latest_policy(&sandbox_id) - .await - .map_err(|e| Status::internal(format!("fetch policy history failed: {e}")))?; + let provider_type = provider.r#type.trim(); + let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { + let Some(profile) = get_default_profile(canonical_type) else { + warn!( + provider_name = %name, + provider_type, + "legacy provider type has no profile; skipping credential entry" + ); + continue; + }; + profile.clone() + } else { + let Some(profile) = + super::provider::get_provider_type_profile(store, provider_type).await? + else { + warn!( + provider_name = %name, + provider_type, + "provider type has no profile; skipping credential entry" + ); + continue; + }; + profile + }; - let mut policy_source = PolicySource::Sandbox; - let (mut policy, mut version, mut policy_hash) = if let Some(record) = latest { - let decoded = ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) - .map_err(|e| Status::internal(format!("decode policy failed: {e}")))?; - debug!( - sandbox_id = %sandbox_id, - version = record.version, - "GetSandboxConfig served from policy history" - ); - ( - Some(decoded), - u32::try_from(record.version).unwrap_or(0), - record.policy_hash, - ) - } else { - // Lazy backfill: no policy history exists yet. - let spec = sandbox - .spec - .as_ref() - .ok_or_else(|| Status::internal("sandbox has no spec"))?; + let target_hosts: Vec = profile + .endpoints + .iter() + .map(|ep| ep.host.to_lowercase()) + .filter(|h| !h.is_empty()) + .collect(); - match spec.policy.clone() { - None => { - debug!( - sandbox_id = %sandbox_id, - "GetSandboxConfig: no policy configured, returning empty response" - ); - (None, 0, String::new()) - } - Some(spec_policy) => { - let hash = deterministic_policy_hash(&spec_policy); - let payload = spec_policy.encode_to_vec(); - let policy_id = uuid::Uuid::new_v4().to_string(); + if target_hosts.is_empty() { + continue; + } - if let Err(e) = state - .store - .put_policy_revision(&policy_id, &sandbox_id, 1, &payload, &hash) - .await - { - warn!( - sandbox_id = %sandbox_id, - error = %e, - "Failed to backfill policy version 1" - ); - } else if let Err(e) = state - .store - .update_policy_status(&sandbox_id, 1, "loaded", None, None) - .await - { - warn!( - sandbox_id = %sandbox_id, - error = %e, - "Failed to mark backfilled policy as loaded" - ); - } + credentials.push(Credential { + name: name.clone(), + cred_type: provider_type.to_string(), + scopes: Vec::new(), + injected_via: String::new(), + target_hosts, + }); + } - info!( - sandbox_id = %sandbox_id, - "GetSandboxConfig served from spec (backfilled version 1)" - ); + Ok(CredentialSet { + credentials, + api_registries: HashMap::new(), + }) +} - (Some(spec_policy), 1, hash) - } +/// Stable identity key for a finding path. Deliberately excludes +/// `policy_name`: two paths with identical (binary, endpoint, mechanism) are +/// the same security gap whether they live in rule `foo` or rule `bar`. This +/// keeps the delta from spuriously surfacing baseline gaps just because the +/// proposal added a new rule name that produces the same gap shape. +fn finding_path_key(path: &FindingPath) -> String { + let FindingPath::Exfil(p) = path; + // Include the category and (for capability_expansion) the method so + // adding a new method on an already-reached host surfaces as a new + // path; reuse of an existing method does not. + format!( + "exfil|{}|{}:{}|{}|{}", + p.binary, p.endpoint_host, p.endpoint_port, p.category, p.method + ) +} + +/// Return the merged-policy findings that aren't already present in the +/// baseline. Comparison is per-(query, path) so that a single finding whose +/// evidence grew (e.g. a new method allowed on an already-reached host) +/// surfaces only the new evidence paths. +/// +/// **Category suppression:** `capability_expansion` paths whose (binary, +/// host, port) tuple appears in the `credential_reach_expansion` delta +/// are suppressed. A brand-new credentialed reach is described by the +/// reach-expansion finding alone; we don't double-report by also +/// flagging every method as a separate `capability_expansion`. +fn finding_delta(base: &[Finding], merged: &[Finding]) -> Vec { + use openshell_prover::finding::category; + + let base_keys: HashSet<(String, String)> = base + .iter() + .flat_map(|f| { + let query = f.query.clone(); + f.paths + .iter() + .map(move |p| (query.clone(), finding_path_key(p))) + }) + .collect(); + let mut delta: Vec = Vec::new(); + for finding in merged { + let new_paths: Vec = finding + .paths + .iter() + .filter(|p| !base_keys.contains(&(finding.query.clone(), finding_path_key(p)))) + .cloned() + .collect(); + if new_paths.is_empty() { + continue; } - }; + delta.push(Finding { + paths: new_paths, + ..finding.clone() + }); + } - let global_settings = load_global_settings(state.store.as_ref()).await?; - let sandbox_settings = - load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; - let providers_v2_enabled = - bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; + // Suppress capability_expansion paths whose (binary, host, port) + // appears in the credential_reach_expansion delta — a new reach is + // described once, by the reach-expansion category, not also by per- + // method capability findings. + let reach_tuples: HashSet<(String, String, u16)> = delta + .iter() + .filter(|f| f.query == category::CREDENTIAL_REACH_EXPANSION) + .flat_map(|f| { + f.paths.iter().map(|p| { + let FindingPath::Exfil(e) = p; + (e.binary.clone(), e.endpoint_host.clone(), e.endpoint_port) + }) + }) + .collect(); + delta.retain_mut(|f| { + if f.query != category::CAPABILITY_EXPANSION { + return true; + } + f.paths.retain(|p| { + let FindingPath::Exfil(e) = p; + !reach_tuples.contains(&(e.binary.clone(), e.endpoint_host.clone(), e.endpoint_port)) + }); + !f.paths.is_empty() + }); - let mut global_policy_version: u32 = 0; + delta +} - if let Some(global_policy) = decode_policy_from_global_settings(&global_settings)? { - policy = Some(global_policy.clone()); - policy_hash = deterministic_policy_hash(&global_policy); - policy_source = PolicySource::Global; - if version == 0 { - version = 1; +/// Collapse multi-line / multi-message error text to a single line so the +/// `validation_result` stays a clean, scannable string. +fn one_line(s: &str) -> String { + s.split('\n') + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect::>() + .join("; ") +} + +/// Auto-reject any pending chunks for the same sandbox that share the +/// `(host, port, binary)` of the newly-submitted chunk. Mode-agnostic: the +/// rule is "the latest submission for this endpoint wins; older pending +/// proposals are stale." +/// +/// In practice this implements the supersede behavior for the +/// `mechanistic`→`agent_authored` refinement loop: when the agent submits a +/// narrow L7 proposal in response to a denial, any pending mechanistic L4 +/// draft for the same key gets auto-rejected here, without the agent or the +/// proto needing an explicit `supersedes_chunk_id` field. +/// +/// Failures (DB error, scan error) are logged via `warn!` and the function +/// returns silently. The new chunk's persistence has already succeeded; +/// failing this cleanup pass should not abort the submission flow. +async fn supersede_other_pending_chunks_for_endpoint( + state: &Arc, + sandbox_id: &str, + new_chunk_id: &str, + host: &str, + port: i32, + binary: &str, +) { + // Empty host/port/binary should not supersede anything — the matcher would + // accidentally cover unrelated chunks. Defensive skip. + if host.is_empty() || port == 0 || binary.is_empty() { + return; + } + + let pending = match state + .store + .list_draft_chunks(sandbox_id, Some("pending")) + .await + { + Ok(records) => records, + Err(err) => { + warn!( + sandbox_id = %sandbox_id, + error = %err, + "supersede scan failed; older pending chunks (if any) remain pending" + ); + return; } - if let Ok(Some(global_rev)) = state + }; + + let now_ms = current_time_ms(); + for other in pending { + if other.id == new_chunk_id + || other.host != host + || other.port != port + || other.binary != binary + { + continue; + } + + let reason = format!("superseded by chunk {new_chunk_id}"); + match state .store - .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .update_draft_chunk_status(&other.id, "rejected", Some(now_ms), Some(&reason)) .await { - global_policy_version = u32::try_from(global_rev.version).unwrap_or(0); + Ok(_) => { + info!( + sandbox_id = %sandbox_id, + superseded_chunk = %other.id, + by_chunk = %new_chunk_id, + host = %host, + port = port, + binary = %binary, + "Auto-rejected pending chunk: superseded by newer submission for same (host, port, binary)" + ); + } + Err(err) => { + warn!( + chunk_id = %other.id, + error = %err, + "supersede auto-reject failed; chunk remains pending" + ); + } } } +} - if providers_v2_enabled - && !matches!(policy_source, PolicySource::Global) - && let Some(source_policy) = policy.as_ref() +/// If the just-submitted mechanistic chunk targets a `(host, port, binary)` +/// already covered by an approved `agent_authored` chunk, auto-reject the +/// mechanistic chunk on arrival. The agent has already handled this access +/// decision; the mechanistic draft would only add approval-queue noise. +/// +/// `agent_authored` submissions are NEVER self-rejected — that path remains +/// open for refinement. Only the mechanistic side is asymmetric. +async fn self_reject_mechanistic_if_already_covered( + state: &Arc, + sandbox_id: &str, + new_chunk_id: &str, + host: &str, + port: i32, + binary: &str, +) { + if host.is_empty() || port == 0 || binary.is_empty() { + return; + } + + let approved = match state + .store + .list_draft_chunks(sandbox_id, Some("approved")) + .await { - let provider_layers = - profile_provider_policy_layers(state.store.as_ref(), &sandbox_provider_names).await?; - if !provider_layers.is_empty() { - let effective_policy = compose_effective_policy(source_policy, &provider_layers); - policy_hash = deterministic_policy_hash(&effective_policy); - policy = Some(effective_policy); + Ok(records) => records, + Err(err) => { + warn!( + sandbox_id = %sandbox_id, + error = %err, + "approved-chunk scan for self-reject failed; mechanistic chunk remains pending" + ); + return; } - } + }; - let settings = merge_effective_settings(&global_settings, &sandbox_settings)?; - let config_revision = compute_config_revision(policy.as_ref(), &settings, policy_source); - let provider_env_revision = - compute_provider_env_revision(state.store.as_ref(), &sandbox_provider_names).await?; + // If any approved chunk for this sandbox already targets the same + // (host, port, binary), the mechanistic submission is redundant. + let covered_by = approved + .iter() + .find(|c| c.host == host && c.port == port && c.binary == binary); + let Some(covering) = covered_by else { + return; + }; - Ok(Response::new(GetSandboxConfigResponse { - policy, - version, - policy_hash, - settings, - config_revision, - policy_source: policy_source.into(), - global_policy_version, - provider_env_revision, - })) + let reason = format!( + "already covered by approved chunk {} (agent_authored or prior auto-approval)", + covering.id + ); + match state + .store + .update_draft_chunk_status( + new_chunk_id, + "rejected", + Some(current_time_ms()), + Some(&reason), + ) + .await + { + Ok(_) => { + info!( + sandbox_id = %sandbox_id, + chunk_id = %new_chunk_id, + covering_chunk = %covering.id, + host = %host, + port = port, + binary = %binary, + "Auto-rejected incoming mechanistic chunk: endpoint already covered by an approved chunk" + ); + } + Err(err) => { + warn!( + chunk_id = %new_chunk_id, + error = %err, + "mechanistic self-reject failed; chunk remains pending" + ); + } + } } -pub(super) async fn compute_provider_env_revision( +/// Internally approve a chunk on the auto-approval path: merge into the +/// active policy, flip status to "approved", notify watchers, and emit a +/// `CONFIG:APPROVED` audit event carrying `auto=true`, `source=`, +/// `prover_delta=empty` so the audit trail records why no human approved +/// this chunk. +/// +/// `source` is the `analysis_mode` of the originating submission +/// (`mechanistic` or `agent_authored`). The audit copy says "auto-approved: +/// no new prover findings" — never "safe" — because the claim is about the +/// prover's reasoning, not the world. +/// Resolve the effective proposal-approval mode for a sandbox. +/// +/// Precedence (matches the rest of the settings model): gateway scope wins +/// over sandbox scope. A reviewer can pin manual mode fleet-wide by setting +/// it globally; per-sandbox overrides only apply when no global is set. +/// +/// Returns `(auto_approve_enabled, resolved_from)` where `resolved_from` +/// is `"gateway"`, `"sandbox"`, or `"default"`. Only an exact `"auto"` +/// value enables auto-approval; any other string (including future- +/// reserved modes like `"auto_on_low_risk"`) is conservatively treated as +/// manual. +async fn resolve_proposal_approval_mode( store: &Store, - provider_names: &[String], -) -> Result { - let mut hasher = Sha256::new(); - hasher.update(b"openshell-provider-env-revision-v1"); + sandbox_name: &str, +) -> Result<(bool, &'static str), Status> { + let global = load_global_settings(store).await?; + if let Some(StoredSettingValue::String(value)) = + global.settings.get(settings::PROPOSAL_APPROVAL_MODE_KEY) + { + return Ok((value == "auto", "gateway")); + } - for provider_name in provider_names { - hasher.update(provider_name.as_bytes()); - match store - .get_by_name(Provider::object_type(), provider_name) - .await - .map_err(|e| { - Status::internal(format!("fetch provider '{provider_name}' failed: {e}")) - })? { - Some(record) => { - hasher.update(record.id.as_bytes()); - hasher.update(record.updated_at_ms.to_le_bytes()); + let sandbox = load_sandbox_settings(store, sandbox_name).await?; + if let Some(StoredSettingValue::String(value)) = + sandbox.settings.get(settings::PROPOSAL_APPROVAL_MODE_KEY) + { + return Ok((value == "auto", "sandbox")); + } - let provider = Provider::decode(record.payload.as_slice()).map_err(|e| { - Status::internal(format!("decode provider '{provider_name}' failed: {e}")) - })?; - hasher.update(provider.r#type.as_bytes()); + Ok((false, "default")) +} - let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); - credential_keys.sort(); - for key in credential_keys { - hasher.update(key.as_bytes()); - } - } - None => { - hasher.update(b"missing"); - } - } +async fn auto_approve_chunk( + state: &Arc, + sandbox_id: &str, + sandbox_name: &str, + chunk_id: &str, + source: &str, + resolved_from: &str, +) -> Result<(), Status> { + // Same gate the human-driven approve paths apply: if a global policy is + // active, sandbox-scoped chunk approvals are meaningless because + // `GetSandboxConfig` prefers the global policy. Auto-approving here + // would persist a sandbox revision that the runtime silently ignores + // and leave a misleading "approved" chunk in the table. Bail before + // touching state; the calling site logs this as `warn!` and leaves the + // chunk pending. + require_no_global_policy(state).await?; + + let chunk = state + .store + .get_draft_chunk(chunk_id) + .await + .map_err(|e| Status::internal(format!("fetch chunk failed: {e}")))? + .ok_or_else(|| Status::not_found("chunk not found"))?; + + // The chunk may have been superseded or rejected by something else + // between persist and auto-approve. Only approve from a pending state. + if chunk.status != "pending" { + return Ok(()); } - let digest = hasher.finalize(); - Ok(u64::from_le_bytes(digest[..8].try_into().map_err( - |_| Status::internal("provider env revision digest too short"), - )?)) -} + let (version, hash) = merge_chunk_into_policy(state.store.as_ref(), sandbox_id, &chunk).await?; + let chunk_summary = summarize_draft_chunk_rule(&chunk)?; -async fn profile_provider_policy_layers( - store: &Store, - provider_names: &[String], -) -> Result, Status> { - let mut layers = Vec::new(); + let now_ms = current_time_ms(); + state + .store + .update_draft_chunk_status(chunk_id, "approved", Some(now_ms), None) + .await + .map_err(|e| Status::internal(format!("update chunk status failed: {e}")))?; - for name in provider_names { - let provider = store - .get_message_by_name::(name) - .await - .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? - .ok_or_else(|| Status::failed_precondition(format!("provider '{name}' not found")))?; + state.sandbox_watch_bus.notify(sandbox_id); - let provider_type = provider.r#type.trim(); - let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { - let Some(profile) = get_default_profile(canonical_type) else { - warn!( - provider_name = %name, - provider_type, - "legacy provider type has no profile; skipping provider policy layer" - ); - continue; - }; - profile.clone() - } else { - let Some(profile) = - super::provider::get_provider_type_profile(store, provider_type).await? - else { - warn!( - provider_name = %name, - provider_type, - "provider type has no profile; skipping provider policy layer" - ); - continue; - }; - profile - }; + let source_label = if source.is_empty() { + "unspecified" + } else { + source + }; + emit_gateway_policy_auto_approve_audit_log( + sandbox_id, + sandbox_name, + format!( + "auto-approved: no new prover findings (source={source_label}) — chunk {chunk_id}: {chunk_summary}" + ), + version, + &hash, + source_label, + resolved_from, + ); - let rule_name = openshell_policy::provider_rule_name(provider.object_name()); - layers.push(ProviderPolicyLayer { - rule_name: rule_name.clone(), - rule: profile.network_policy_rule(&rule_name), - }); + info!( + sandbox_id = %sandbox_id, + chunk_id = %chunk_id, + rule_name = %chunk.rule_name, + version = version, + policy_hash = %hash, + source = %source_label, + resolved_from = %resolved_from, + "Auto-approved chunk: no new prover findings" + ); + + Ok(()) +} + +// TODO: share effective-policy lookup with `load_sandbox_policy` / +// `GetSandboxConfig`. They re-implement very similar global-settings + +// providers_v2 + compose logic; consolidating them is out of scope for the +// agent-authored proposal validation slice. +async fn current_effective_policy_for_sandbox( + state: &ServerState, + sandbox: &Sandbox, + sandbox_id: &str, +) -> Result { + let mut policy = if let Some(record) = state + .store + .get_latest_policy(sandbox_id) + .await + .map_err(|e| Status::internal(format!("fetch latest policy failed: {e}")))? + { + ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) + .map_err(|e| Status::internal(format!("decode current policy failed: {e}")))? + } else { + sandbox + .spec + .as_ref() + .and_then(|spec| spec.policy.clone()) + .unwrap_or_default() + }; + + let global_settings = load_global_settings(state.store.as_ref()).await?; + let policy_source = decode_policy_from_global_settings(&global_settings)?.map_or( + PolicySource::Sandbox, + |global_policy| { + policy = global_policy; + PolicySource::Global + }, + ); + + let providers_v2_enabled = + bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; + if providers_v2_enabled && !matches!(policy_source, PolicySource::Global) { + let provider_names = sandbox + .spec + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); + let provider_layers = + profile_provider_policy_layers(state.store.as_ref(), &provider_names).await?; + if !provider_layers.is_empty() { + policy = compose_effective_policy(&policy, &provider_layers); + } } - Ok(layers) + Ok(policy) } -fn bool_setting_enabled(settings: &StoredSettings, key: &str) -> Result { - match settings.settings.get(key) { - None => Ok(false), - Some(StoredSettingValue::Bool(value)) => Ok(*value), - Some(_) => Err(Status::internal(format!( - "setting '{key}' has invalid value type; expected bool" - ))), +fn truncate_for_log(input: &str, max_chars: usize) -> String { + let mut chars = input.chars(); + let truncated: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + format!("{truncated}...") + } else { + truncated } } -pub(super) async fn handle_get_gateway_config( - state: &Arc, - _request: Request, -) -> Result, Status> { - let global_settings = load_global_settings(state.store.as_ref()).await?; - let settings = materialize_global_settings(&global_settings)?; - Ok(Response::new(GetGatewayConfigResponse { - settings, - settings_revision: global_settings.revision, - })) +fn is_sandbox_caller(request: &Request) -> bool { + oidc::is_sandbox_caller(request.metadata()) } -pub(super) async fn handle_get_sandbox_provider_environment( +/// Sandbox-class callers may only perform sandbox-scoped policy sync. They +/// must not mutate global config or sandbox settings. +fn validate_sandbox_caller_update(req: &UpdateConfigRequest) -> Result<(), Status> { + if req.global { + return Err(Status::permission_denied( + "sandbox callers cannot mutate global config", + )); + } + if req.delete_setting { + return Err(Status::permission_denied( + "sandbox callers cannot delete settings", + )); + } + if req.name.trim().is_empty() { + return Err(Status::permission_denied( + "sandbox callers may only perform sandbox policy sync", + )); + } + if req.policy.is_none() || !req.setting_key.trim().is_empty() { + return Err(Status::permission_denied( + "sandbox callers may only perform sandbox policy sync", + )); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// Config handlers +// --------------------------------------------------------------------------- + +pub(super) async fn handle_get_sandbox_config( state: &Arc, - request: Request, -) -> Result, Status> { + request: Request, +) -> Result, Status> { let sandbox_id = request.into_inner().sandbox_id; let sandbox = state @@ -617,126 +1002,383 @@ pub(super) async fn handle_get_sandbox_provider_environment( .await .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? .ok_or_else(|| Status::not_found("sandbox not found"))?; - - let spec = sandbox + let sandbox_provider_names = sandbox .spec - .ok_or_else(|| Status::internal("sandbox has no spec"))?; + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); - let provider_names = spec.providers; - let provider_env_revision = - compute_provider_env_revision(state.store.as_ref(), &provider_names).await?; - let environment = - super::provider::resolve_provider_environment(state.store.as_ref(), &provider_names) - .await?; + // Try to get the latest policy from the policy history table. + let latest = state + .store + .get_latest_policy(&sandbox_id) + .await + .map_err(|e| Status::internal(format!("fetch policy history failed: {e}")))?; - info!( - sandbox_id = %sandbox_id, - provider_count = provider_names.len(), - env_count = environment.len(), - provider_env_revision, - "GetSandboxProviderEnvironment request completed successfully" - ); + let mut policy_source = PolicySource::Sandbox; + let (mut policy, mut version, mut policy_hash) = if let Some(record) = latest { + let decoded = ProtoSandboxPolicy::decode(record.policy_payload.as_slice()) + .map_err(|e| Status::internal(format!("decode policy failed: {e}")))?; + debug!( + sandbox_id = %sandbox_id, + version = record.version, + "GetSandboxConfig served from policy history" + ); + ( + Some(decoded), + u32::try_from(record.version).unwrap_or(0), + record.policy_hash, + ) + } else { + // Lazy backfill: no policy history exists yet. + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::internal("sandbox has no spec"))?; - Ok(Response::new(GetSandboxProviderEnvironmentResponse { - environment, - provider_env_revision, - })) -} + match spec.policy.clone() { + None => { + debug!( + sandbox_id = %sandbox_id, + "GetSandboxConfig: no policy configured, returning empty response" + ); + (None, 0, String::new()) + } + Some(spec_policy) => { + let hash = deterministic_policy_hash(&spec_policy); + let payload = spec_policy.encode_to_vec(); + let policy_id = uuid::Uuid::new_v4().to_string(); -// --------------------------------------------------------------------------- -// Update config handler (policy + settings mutations) -// --------------------------------------------------------------------------- + if let Err(e) = state + .store + .put_policy_revision(&policy_id, &sandbox_id, 1, &payload, &hash) + .await + { + warn!( + sandbox_id = %sandbox_id, + error = %e, + "Failed to backfill policy version 1" + ); + } else if let Err(e) = state + .store + .update_policy_status(&sandbox_id, 1, "loaded", None, None) + .await + { + warn!( + sandbox_id = %sandbox_id, + error = %e, + "Failed to mark backfilled policy as loaded" + ); + } -pub(super) async fn handle_update_config( - state: &Arc, - request: Request, -) -> Result, Status> { - let sandbox_caller = is_sandbox_caller(&request); - let req = request.into_inner(); - if sandbox_caller { - validate_sandbox_caller_update(&req)?; - } - let key = req.setting_key.trim(); - let has_policy = req.policy.is_some(); - let has_setting = !key.is_empty(); - let has_merge_ops = !req.merge_operations.is_empty(); - let mut mutation_count = 0_u8; - mutation_count += u8::from(has_policy); - mutation_count += u8::from(has_setting); - mutation_count += u8::from(has_merge_ops); + info!( + sandbox_id = %sandbox_id, + "GetSandboxConfig served from spec (backfilled version 1)" + ); - if mutation_count > 1 { - return Err(Status::invalid_argument( - "policy, setting_key, and merge_operations are mutually exclusive", - )); + (Some(spec_policy), 1, hash) + } + } + }; + + let global_settings = load_global_settings(state.store.as_ref()).await?; + let sandbox_settings = + load_sandbox_settings(state.store.as_ref(), sandbox.object_name()).await?; + let providers_v2_enabled = + bool_setting_enabled(&global_settings, settings::PROVIDERS_V2_ENABLED_KEY)?; + + let mut global_policy_version: u32 = 0; + + if let Some(global_policy) = decode_policy_from_global_settings(&global_settings)? { + policy = Some(global_policy.clone()); + policy_hash = deterministic_policy_hash(&global_policy); + policy_source = PolicySource::Global; + if version == 0 { + version = 1; + } + if let Ok(Some(global_rev)) = state + .store + .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .await + { + global_policy_version = u32::try_from(global_rev.version).unwrap_or(0); + } } - if mutation_count == 0 { - return Err(Status::invalid_argument( - "one of policy, setting_key, or merge_operations must be provided", - )); + + if providers_v2_enabled + && !matches!(policy_source, PolicySource::Global) + && let Some(source_policy) = policy.as_ref() + { + let provider_layers = + profile_provider_policy_layers(state.store.as_ref(), &sandbox_provider_names).await?; + if !provider_layers.is_empty() { + let effective_policy = compose_effective_policy(source_policy, &provider_layers); + policy_hash = deterministic_policy_hash(&effective_policy); + policy = Some(effective_policy); + } } - if req.global { - let _settings_guard = state.settings_mutex.lock().await; + let settings = merge_effective_settings(&global_settings, &sandbox_settings)?; + let config_revision = compute_config_revision(policy.as_ref(), &settings, policy_source); + let provider_env_revision = + compute_provider_env_revision(state.store.as_ref(), &sandbox_provider_names).await?; - if has_merge_ops { - return Err(Status::invalid_argument( - "merge_operations are not supported for global policy updates", - )); - } + Ok(Response::new(GetSandboxConfigResponse { + policy, + version, + policy_hash, + settings, + config_revision, + policy_source: policy_source.into(), + global_policy_version, + provider_env_revision, + })) +} - if has_policy { - if req.delete_setting { - return Err(Status::invalid_argument( - "delete_setting cannot be combined with policy payload", - )); - } - let mut new_policy = req.policy.ok_or_else(|| { - Status::invalid_argument("policy is required for global policy update") - })?; - openshell_policy::ensure_sandbox_process_identity(&mut new_policy); - validate_policy_safety(&new_policy)?; +pub(super) async fn compute_provider_env_revision( + store: &Store, + provider_names: &[String], +) -> Result { + let mut hasher = Sha256::new(); + hasher.update(b"openshell-provider-env-revision-v1"); - let payload = new_policy.encode_to_vec(); - let hash = deterministic_policy_hash(&new_policy); + for provider_name in provider_names { + hasher.update(provider_name.as_bytes()); + match store + .get_by_name(Provider::object_type(), provider_name) + .await + .map_err(|e| { + Status::internal(format!("fetch provider '{provider_name}' failed: {e}")) + })? { + Some(record) => { + hasher.update(record.id.as_bytes()); + hasher.update(record.updated_at_ms.to_le_bytes()); - let latest = state - .store - .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) - .await - .map_err(|e| Status::internal(format!("fetch latest global policy failed: {e}")))?; + let provider = Provider::decode(record.payload.as_slice()).map_err(|e| { + Status::internal(format!("decode provider '{provider_name}' failed: {e}")) + })?; + hasher.update(provider.r#type.as_bytes()); - if let Some(ref current) = latest - && current.policy_hash == hash - && current.status == "loaded" - { - let mut global_settings = load_global_settings(state.store.as_ref()).await?; - let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); - let changed = upsert_setting_value( - &mut global_settings.settings, - POLICY_SETTING_KEY, - stored_value, - ); - if changed { - global_settings.revision = global_settings.revision.wrapping_add(1); - save_global_settings(state.store.as_ref(), &global_settings).await?; + let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); + credential_keys.sort(); + for key in credential_keys { + hasher.update(key.as_bytes()); } - return Ok(Response::new(UpdateConfigResponse { - version: u32::try_from(current.version).unwrap_or(0), - policy_hash: hash, - settings_revision: global_settings.revision, - deleted: false, - })); } + None => { + hasher.update(b"missing"); + } + } + } - let next_version = latest.map_or(1, |r| r.version + 1); - let policy_id = uuid::Uuid::new_v4().to_string(); + let digest = hasher.finalize(); + Ok(u64::from_le_bytes(digest[..8].try_into().map_err( + |_| Status::internal("provider env revision digest too short"), + )?)) +} - state - .store - .put_policy_revision( - &policy_id, - GLOBAL_POLICY_SANDBOX_ID, +async fn profile_provider_policy_layers( + store: &Store, + provider_names: &[String], +) -> Result, Status> { + let mut layers = Vec::new(); + + for name in provider_names { + let provider = store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("failed to fetch provider '{name}': {e}")))? + .ok_or_else(|| Status::failed_precondition(format!("provider '{name}' not found")))?; + + let provider_type = provider.r#type.trim(); + let profile = if let Some(canonical_type) = normalize_provider_type(provider_type) { + let Some(profile) = get_default_profile(canonical_type) else { + warn!( + provider_name = %name, + provider_type, + "legacy provider type has no profile; skipping provider policy layer" + ); + continue; + }; + profile.clone() + } else { + let Some(profile) = + super::provider::get_provider_type_profile(store, provider_type).await? + else { + warn!( + provider_name = %name, + provider_type, + "provider type has no profile; skipping provider policy layer" + ); + continue; + }; + profile + }; + + let rule_name = openshell_policy::provider_rule_name(provider.object_name()); + layers.push(ProviderPolicyLayer { + rule_name: rule_name.clone(), + rule: profile.network_policy_rule(&rule_name), + }); + } + + Ok(layers) +} + +fn bool_setting_enabled(settings: &StoredSettings, key: &str) -> Result { + match settings.settings.get(key) { + None => Ok(false), + Some(StoredSettingValue::Bool(value)) => Ok(*value), + Some(_) => Err(Status::internal(format!( + "setting '{key}' has invalid value type; expected bool" + ))), + } +} + +pub(super) async fn handle_get_gateway_config( + state: &Arc, + _request: Request, +) -> Result, Status> { + let global_settings = load_global_settings(state.store.as_ref()).await?; + let settings = materialize_global_settings(&global_settings)?; + Ok(Response::new(GetGatewayConfigResponse { + settings, + settings_revision: global_settings.revision, + })) +} + +pub(super) async fn handle_get_sandbox_provider_environment( + state: &Arc, + request: Request, +) -> Result, Status> { + let sandbox_id = request.into_inner().sandbox_id; + + let sandbox = state + .store + .get_message::(&sandbox_id) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + + let spec = sandbox + .spec + .ok_or_else(|| Status::internal("sandbox has no spec"))?; + + let provider_names = spec.providers; + let provider_env_revision = + compute_provider_env_revision(state.store.as_ref(), &provider_names).await?; + let environment = + super::provider::resolve_provider_environment(state.store.as_ref(), &provider_names) + .await?; + + info!( + sandbox_id = %sandbox_id, + provider_count = provider_names.len(), + env_count = environment.len(), + provider_env_revision, + "GetSandboxProviderEnvironment request completed successfully" + ); + + Ok(Response::new(GetSandboxProviderEnvironmentResponse { + environment, + provider_env_revision, + })) +} + +// --------------------------------------------------------------------------- +// Update config handler (policy + settings mutations) +// --------------------------------------------------------------------------- + +pub(super) async fn handle_update_config( + state: &Arc, + request: Request, +) -> Result, Status> { + let sandbox_caller = is_sandbox_caller(&request); + let req = request.into_inner(); + if sandbox_caller { + validate_sandbox_caller_update(&req)?; + } + let key = req.setting_key.trim(); + let has_policy = req.policy.is_some(); + let has_setting = !key.is_empty(); + let has_merge_ops = !req.merge_operations.is_empty(); + let mut mutation_count = 0_u8; + mutation_count += u8::from(has_policy); + mutation_count += u8::from(has_setting); + mutation_count += u8::from(has_merge_ops); + + if mutation_count > 1 { + return Err(Status::invalid_argument( + "policy, setting_key, and merge_operations are mutually exclusive", + )); + } + if mutation_count == 0 { + return Err(Status::invalid_argument( + "one of policy, setting_key, or merge_operations must be provided", + )); + } + + if req.global { + let _settings_guard = state.settings_mutex.lock().await; + + if has_merge_ops { + return Err(Status::invalid_argument( + "merge_operations are not supported for global policy updates", + )); + } + + if has_policy { + if req.delete_setting { + return Err(Status::invalid_argument( + "delete_setting cannot be combined with policy payload", + )); + } + let mut new_policy = req.policy.ok_or_else(|| { + Status::invalid_argument("policy is required for global policy update") + })?; + openshell_policy::ensure_sandbox_process_identity(&mut new_policy); + validate_policy_safety(&new_policy)?; + + let payload = new_policy.encode_to_vec(); + let hash = deterministic_policy_hash(&new_policy); + + let latest = state + .store + .get_latest_policy(GLOBAL_POLICY_SANDBOX_ID) + .await + .map_err(|e| Status::internal(format!("fetch latest global policy failed: {e}")))?; + + if let Some(ref current) = latest + && current.policy_hash == hash + && current.status == "loaded" + { + let mut global_settings = load_global_settings(state.store.as_ref()).await?; + let stored_value = StoredSettingValue::Bytes(hex::encode(&payload)); + let changed = upsert_setting_value( + &mut global_settings.settings, + POLICY_SETTING_KEY, + stored_value, + ); + if changed { + global_settings.revision = global_settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &global_settings).await?; + } + return Ok(Response::new(UpdateConfigResponse { + version: u32::try_from(current.version).unwrap_or(0), + policy_hash: hash, + settings_revision: global_settings.revision, + deleted: false, + })); + } + + let next_version = latest.map_or(1, |r| r.version + 1); + let policy_id = uuid::Uuid::new_v4().to_string(); + + state + .store + .put_policy_revision( + &policy_id, + GLOBAL_POLICY_SANDBOX_ID, next_version, &payload, &hash, @@ -1347,6 +1989,35 @@ pub(super) async fn handle_submit_policy_analysis( .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))? .ok_or_else(|| Status::not_found("sandbox not found"))?; let sandbox_id = sandbox.object_id().to_string(); + // `current_policy` is captured ONCE at the top of the batch and frozen + // for every chunk's delta computation, even if an earlier chunk in the + // batch auto-approves and merges. This is intentional v1 behavior: + // multi-chunk batches with overlapping endpoints would otherwise have + // chunk N+1 fail to see chunk N's contribution, which is a degenerate + // case for the common single-chunk submission shape. If real workloads + // surface a problem with batches that interact across chunks, the right + // fix is to recompute baseline after each successful auto-approve. + let current_policy = current_effective_policy_for_sandbox(state, &sandbox, &sandbox_id).await?; + + // Auto-approval is an opt-in behavior, sourced from the settings model + // (sandbox or gateway scope) so it can be flipped on a running sandbox + // and managed fleet-wide. Default (no setting, or any value other than + // exact "auto") preserves OpenShell's default-deny posture: every + // proposal lands in `pending` for a human reviewer. + let (auto_approve_enabled, resolved_from) = + resolve_proposal_approval_mode(state.store.as_ref(), sandbox.object_name()).await?; + + // The credential set is stable across all chunks in this batch, so build + // it once. v1 captures presence only — no scope modeling — so the prover + // can answer "is there a credential in scope for this host?" but not + // "what action class does that credential authorize?" + let provider_names_for_creds: Vec = sandbox + .spec + .as_ref() + .map(|spec| spec.providers.clone()) + .unwrap_or_default(); + let credential_set = + build_credential_set_for_sandbox(state.store.as_ref(), &provider_names_for_creds).await?; let current_version = state .store @@ -1366,6 +2037,21 @@ pub(super) async fn handle_submit_policy_analysis( rejection_reasons.push("chunk missing rule_name".to_string()); continue; } + // `_provider_*` is the reserved namespace for rules synthesized from + // provider profiles during composition. Agent submissions that target + // those keys would merge directly into the provider rule and bypass + // the merge.rs guard that splits agent-authored chunks into their + // own rule so the prover sees their contribution honestly. Reject at + // the entry boundary — the agent never has reason to address a + // provider rule by name. + if chunk.rule_name.starts_with("_provider_") { + rejected += 1; + rejection_reasons.push(format!( + "chunk '{}' uses reserved '_provider_' rule-name prefix", + chunk.rule_name + )); + continue; + } if chunk.proposed_rule.is_none() { rejected += 1; rejection_reasons.push(format!("chunk '{}' missing proposed_rule", chunk.rule_name)); @@ -1389,6 +2075,17 @@ pub(super) async fn handle_submit_policy_analysis( .map(|b| b.path.clone()) .unwrap_or_default(); + // The prover runs on every proposal regardless of `analysis_mode`. + // Source provenance (mechanistic vs agent_authored) is preserved in + // OCSF audit fields, but the safety decision is grounded in the + // merged-policy consequence, not the author — proposer-agnostic. + let validation_result = validation_result_for_agent_proposal( + current_policy.clone(), + &chunk.rule_name, + chunk.proposed_rule.as_ref().expect("checked above"), + &credential_set, + ); + let record = DraftChunkRecord { // The handler proposes an id; the store may swap it for an // existing row's id on dedup. Always trust `effective_id` for @@ -1421,7 +2118,7 @@ pub(super) async fn handle_submit_policy_analysis( } else { now_ms }, - validation_result: String::new(), + validation_result: validation_result.clone(), rejection_reason: String::new(), }; // Mechanistic mode dedups N denials targeting the same endpoint @@ -1437,6 +2134,71 @@ pub(super) async fn handle_submit_policy_analysis( .await .map_err(|e| Status::internal(format!("persist draft chunk failed: {e}")))?; accepted += 1; + + // Implicit supersede: any other pending chunk for the same + // (host, port, binary) in this sandbox is now stale because this + // newer submission covers the same access decision. Auto-reject the + // older chunks with a clear reason. This is what lets the agent + // refine a mechanistic L4 draft into an L7 narrow proposal without + // any explicit `supersedes_chunk_id` plumbing — the gateway figures + // out the relationship by structural overlap. + supersede_other_pending_chunks_for_endpoint( + state, + &sandbox_id, + &effective_id, + &record.host, + record.port, + &record.binary, + ) + .await; + + // Asymmetric self-reject: if this is a mechanistic proposal that + // arrived AFTER an already-approved agent_authored chunk covered the + // same (host, port, binary), the mechanistic submission is + // redundant — the agent already handled it. Auto-reject so it + // doesn't pile up as approval-queue noise. Agent_authored + // submissions never self-reject; refinement is always allowed. + if req.analysis_mode == "mechanistic" { + self_reject_mechanistic_if_already_covered( + state, + &sandbox_id, + &effective_id, + &record.host, + record.port, + &record.binary, + ) + .await; + } + + // Auto-approval gate (proposer-agnostic, opt-in): only fire when + // BOTH the prover found nothing new in this proposal's delta AND + // the reviewer opted in via the `proposal_approval_mode` setting + // (gateway or sandbox scope). On any failure (merge conflict, + // status update error), the chunk stays pending so a human can + // review — never silently lose a proposal. The `validation_result` + // literal here is the canonical empty-delta verdict; any other + // string means findings or infrastructure error, both of which + // require human attention. + if auto_approve_enabled + && validation_result == "prover: no new findings" + && let Err(err) = auto_approve_chunk( + state, + &sandbox_id, + sandbox.object_name(), + &effective_id, + &req.analysis_mode, + resolved_from, + ) + .await + { + warn!( + chunk_id = %effective_id, + sandbox_id = %sandbox_id, + error = %err, + "auto-approval failed; chunk remains pending for human review" + ); + } + accepted_chunk_ids.push(effective_id); } @@ -3188,779 +3950,2177 @@ mod tests { let effective_policy = get_sandbox_policy(&state, "sb-v2-disabled").await; - assert!( - effective_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - !effective_policy - .network_policies - .contains_key("_provider_work_github") - ); - } + assert!( + effective_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + !effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + } + + #[tokio::test] + async fn sandbox_config_composes_provider_layers_when_v2_enabled() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-v2-enabled", + "v2-enabled", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-v2-enabled").await; + + assert!( + effective_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + assert!( + effective_policy + .network_policies + .get("_provider_work_github") + .unwrap() + .endpoints + .iter() + .any(|endpoint| endpoint.host == "api.github.com") + ); + } + + #[tokio::test] + async fn sandbox_config_skips_profileless_provider_types_when_v2_enabled() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("legacy-generic", "generic")) + .await + .unwrap(); + state + .store + .put_message(&test_provider("custom-provider", "custom")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-profileless", + "profileless", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["legacy-generic".to_string(), "custom-provider".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-profileless").await; + + assert_eq!(effective_policy.network_policies.len(), 1); + assert!( + effective_policy + .network_policies + .contains_key("sandbox_only") + ); + } + + #[tokio::test] + async fn sandbox_config_composition_is_jit_and_does_not_persist_provider_layers() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-jit", + "jit", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-jit").await; + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let persisted = state + .store + .get_latest_policy("sb-jit") + .await + .unwrap() + .expect("sandbox policy should be lazily backfilled"); + let persisted_policy = ProtoSandboxPolicy::decode(persisted.policy_payload.as_slice()) + .expect("persisted sandbox policy should decode"); + assert!( + persisted_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + !persisted_policy + .network_policies + .contains_key("_provider_work_github") + ); + } + + #[tokio::test] + async fn sandbox_config_preserves_overlapping_user_and_provider_rules() { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-overlap", + "overlap", + test_policy_with_rule("_provider_work_github", "api.github.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let effective_policy = get_sandbox_policy(&state, "sb-overlap").await; + + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + assert!( + effective_policy + .network_policies + .contains_key("_provider_work_github_2") + ); + assert_eq!( + effective_policy + .network_policies + .get("_provider_work_github") + .unwrap() + .endpoints[0] + .host, + "api.github.com" + ); + } + + #[tokio::test] + async fn provider_environment_resolution_is_unchanged_by_providers_v2_setting() { + use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-provider-env", + "provider-env", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let legacy_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-env".to_string(), + }), + ) + .await + .unwrap() + .into_inner() + .environment; + + enable_providers_v2(&state).await; + let v2_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-env".to_string(), + }), + ) + .await + .unwrap() + .into_inner() + .environment; + + assert_eq!(legacy_env, v2_env); + assert_eq!(v2_env.get("GITHUB_TOKEN"), Some(&"ghp-test".to_string())); + } + + #[tokio::test] + async fn provider_env_revision_changes_when_attached_provider_record_changes() { + use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + use std::time::Duration; + + let state = test_server_state().await; + let mut provider = test_provider("work-github", "github"); + state.store.put_message(&provider).await.unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-provider-revision", + "provider-revision", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + vec!["work-github".to_string()], + )) + .await + .unwrap(); + + let first = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-revision".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + + tokio::time::sleep(Duration::from_millis(2)).await; + provider + .credentials + .insert("GITHUB_TOKEN".to_string(), "rotated".to_string()); + state.store.put_message(&provider).await.unwrap(); + + let second = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-provider-revision".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + + assert_ne!( + first.provider_env_revision, second.provider_env_revision, + "provider object updates must trigger sandbox credential refresh" + ); + assert_eq!( + second.environment.get("GITHUB_TOKEN"), + Some(&"rotated".to_string()) + ); + } + + #[tokio::test] + async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { + use crate::grpc::sandbox::{ + handle_attach_sandbox_provider, handle_detach_sandbox_provider, + }; + use openshell_core::proto::{ + AttachSandboxProviderRequest, DetachSandboxProviderRequest, + GetSandboxProviderEnvironmentRequest, + }; + + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-attach-lifecycle", + "attach-lifecycle", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + Vec::new(), + )) + .await + .unwrap(); + + let baseline_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + !baseline_policy + .network_policies + .contains_key("_provider_work_github") + ); + let baseline_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + + handle_attach_sandbox_provider( + &state, + Request::new(AttachSandboxProviderRequest { + sandbox_name: "attach-lifecycle".to_string(), + provider_name: "work-github".to_string(), + }), + ) + .await + .unwrap(); + + let attached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + attached_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let attached_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + baseline_env.provider_env_revision, + attached_env.provider_env_revision + ); + assert_eq!( + attached_env.environment.get("GITHUB_TOKEN"), + Some(&"ghp-test".to_string()) + ); + + handle_detach_sandbox_provider( + &state, + Request::new(DetachSandboxProviderRequest { + sandbox_name: "attach-lifecycle".to_string(), + provider_name: "work-github".to_string(), + }), + ) + .await + .unwrap(); + + let detached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; + assert!( + !detached_policy + .network_policies + .contains_key("_provider_work_github") + ); + + let detached_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + attached_env.provider_env_revision, + detached_env.provider_env_revision + ); + assert!(!detached_env.environment.contains_key("GITHUB_TOKEN")); + } + + #[tokio::test] + #[allow(deprecated)] + async fn custom_imported_profile_policy_and_env_follow_attach_detach_lifecycle() { + use crate::grpc::provider::handle_import_provider_profiles; + use crate::grpc::sandbox::{ + handle_attach_sandbox_provider, handle_detach_sandbox_provider, + }; + use openshell_core::proto::{ + AttachSandboxProviderRequest, DetachSandboxProviderRequest, + GetSandboxProviderEnvironmentRequest, ImportProviderProfilesRequest, NetworkBinary, + ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, + ProviderProfileImportItem, + }; + + let state = test_server_state().await; + enable_providers_v2(&state).await; + handle_import_provider_profiles( + &state, + Request::new(ImportProviderProfilesRequest { + profiles: vec![ProviderProfileImportItem { + source: "custom-api.yaml".to_string(), + profile: Some(ProviderProfile { + id: "custom-api".to_string(), + display_name: "Custom API".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![ProviderProfileCredential { + name: "api_key".to_string(), + env_vars: vec!["CUSTOM_API_KEY".to_string()], + auth_style: "bearer".to_string(), + header_name: "authorization".to_string(), + required: true, + ..Default::default() + }], + endpoints: vec![NetworkEndpoint { + host: "api.custom.example".to_string(), + port: 443, + protocol: "rest".to_string(), + rules: vec![L7Rule { + allow: Some(openshell_core::proto::L7Allow { + method: "GET".to_string(), + path: "/v1/**".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/custom".to_string(), + harness: true, + }], + inference_capable: false, + }), + }], + }), + ) + .await + .unwrap(); + + let mut provider = test_provider("work-custom", "custom-api"); + provider.credentials = + std::iter::once(("CUSTOM_API_KEY".to_string(), "custom-secret".to_string())).collect(); + state.store.put_message(&provider).await.unwrap(); + state + .store + .put_message(&test_sandbox( + "sb-custom-attach-lifecycle", + "custom-attach-lifecycle", + test_policy_with_rule("sandbox_only", "sandbox.example.com"), + Vec::new(), + )) + .await + .unwrap(); + + let baseline_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + assert!( + !baseline_policy + .network_policies + .contains_key("_provider_work_custom") + ); + let baseline_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + + handle_attach_sandbox_provider( + &state, + Request::new(AttachSandboxProviderRequest { + sandbox_name: "custom-attach-lifecycle".to_string(), + provider_name: "work-custom".to_string(), + }), + ) + .await + .unwrap(); + + let attached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + let custom_rule = attached_policy + .network_policies + .get("_provider_work_custom") + .expect("custom provider rule should be composed after attach"); + assert_eq!(custom_rule.endpoints[0].host, "api.custom.example"); + assert_eq!(custom_rule.endpoints[0].protocol, "rest"); + assert_eq!(custom_rule.endpoints[0].rules.len(), 1); + assert_eq!(custom_rule.binaries[0].path, "/usr/bin/custom"); + + let attached_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + baseline_env.provider_env_revision, + attached_env.provider_env_revision + ); + assert_eq!( + attached_env.environment.get("CUSTOM_API_KEY"), + Some(&"custom-secret".to_string()) + ); + + handle_detach_sandbox_provider( + &state, + Request::new(DetachSandboxProviderRequest { + sandbox_name: "custom-attach-lifecycle".to_string(), + provider_name: "work-custom".to_string(), + }), + ) + .await + .unwrap(); + + let detached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; + assert!( + !detached_policy + .network_policies + .contains_key("_provider_work_custom") + ); + let detached_env = handle_get_sandbox_provider_environment( + &state, + Request::new(GetSandboxProviderEnvironmentRequest { + sandbox_id: "sb-custom-attach-lifecycle".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_ne!( + attached_env.provider_env_revision, + detached_env.provider_env_revision + ); + assert!(!detached_env.environment.contains_key("CUSTOM_API_KEY")); + } + + #[tokio::test] + async fn global_policy_suppresses_provider_profile_layers_when_v2_enabled() { + use openshell_core::proto::{ + GetSandboxConfigRequest, NetworkEndpoint, NetworkPolicyRule, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-github", "github")) + .await + .unwrap(); + + let sandbox_policy = SandboxPolicy { + network_policies: std::iter::once(( + "sandbox_only".to_string(), + NetworkPolicyRule { + name: "sandbox_only".to_string(), + endpoints: vec![NetworkEndpoint { + host: "sandbox.example.com".to_string(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + )) + .collect(), + ..Default::default() + }; + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-global-profile".to_string(), + name: "global-profile-sandbox".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(sandbox_policy), + providers: vec!["work-github".to_string()], + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + + let global_policy = SandboxPolicy { + network_policies: std::iter::once(( + "global_only".to_string(), + NetworkPolicyRule { + name: "global_only".to_string(), + endpoints: vec![NetworkEndpoint { + host: "global.example.com".to_string(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + )) + .collect(), + ..Default::default() + }; + let global_settings = StoredSettings { + revision: 1, + settings: [ + ( + settings::PROVIDERS_V2_ENABLED_KEY.to_string(), + StoredSettingValue::Bool(true), + ), + ( + POLICY_SETTING_KEY.to_string(), + StoredSettingValue::Bytes(hex::encode(global_policy.encode_to_vec())), + ), + ] + .into_iter() + .collect(), + }; + save_global_settings(state.store.as_ref(), &global_settings) + .await + .unwrap(); + + let response = handle_get_sandbox_config( + &state, + Request::new(GetSandboxConfigRequest { + sandbox_id: "sb-global-profile".to_string(), + }), + ) + .await + .unwrap() + .into_inner(); + + let effective_policy = response.policy.expect("global policy should be returned"); + assert_eq!(response.policy_source, PolicySource::Global as i32); + assert!( + effective_policy + .network_policies + .contains_key("global_only") + ); + assert!( + !effective_policy + .network_policies + .contains_key("sandbox_only") + ); + assert!( + !effective_policy + .network_policies + .contains_key("_provider_work_github") + ); + } + + #[tokio::test] + async fn sandbox_policy_backfill_on_update_when_no_baseline() { + use openshell_core::proto::{FilesystemPolicy, LandlockPolicy, SandboxPhase, SandboxSpec}; + + let store = Store::connect("sqlite::memory:").await.unwrap(); + + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-backfill".to_string(), + name: "backfill-sandbox".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: None, + ..Default::default() + }), + phase: SandboxPhase::Provisioning as i32, + ..Default::default() + }; + store.put_message(&sandbox).await.unwrap(); + + let new_policy = ProtoSandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + include_workdir: true, + read_only: vec!["/usr".into()], + read_write: vec!["/tmp".into()], + }), + landlock: Some(LandlockPolicy { + compatibility: "best_effort".into(), + }), + process: Some(openshell_core::proto::ProcessPolicy { + run_as_user: "sandbox".into(), + run_as_group: "sandbox".into(), + }), + ..Default::default() + }; + + let mut sandbox = store + .get_message::("sb-backfill") + .await + .unwrap() + .unwrap(); + if let Some(ref mut spec) = sandbox.spec { + spec.policy = Some(new_policy.clone()); + } + store.put_message(&sandbox).await.unwrap(); + + let loaded = store + .get_message::("sb-backfill") + .await + .unwrap() + .unwrap(); + let policy = loaded.spec.unwrap().policy.unwrap(); + assert_eq!(policy.version, 1); + assert!(policy.filesystem.is_some()); + assert_eq!(policy.process.unwrap().run_as_user, "sandbox"); + } + + /// Test helper: pin the proposal approval mode for a sandbox via the + /// settings model, mirroring what `openshell settings set + /// proposal_approval_mode ` would do at runtime. + async fn seed_sandbox_approval_mode(state: &Arc, sandbox_name: &str, mode: &str) { + let mut settings = load_sandbox_settings(state.store.as_ref(), sandbox_name) + .await + .unwrap(); + settings.settings.insert( + settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + StoredSettingValue::String(mode.to_string()), + ); + settings.revision = settings.revision.wrapping_add(1); + save_sandbox_settings(state.store.as_ref(), sandbox_name, &settings) + .await + .unwrap(); + } + + /// Test helper: pin the gateway-wide proposal approval mode, mirroring + /// `openshell settings set --global proposal_approval_mode `. + async fn seed_global_approval_mode(state: &Arc, mode: &str) { + let mut settings = load_global_settings(state.store.as_ref()).await.unwrap(); + settings.settings.insert( + settings::PROPOSAL_APPROVAL_MODE_KEY.to_string(), + StoredSettingValue::String(mode.to_string()), + ); + settings.revision = settings.revision.wrapping_add(1); + save_global_settings(state.store.as_ref(), &settings) + .await + .unwrap(); + } + + async fn test_server_state() -> Arc { + let store = Arc::new( + Store::connect("sqlite::memory:?cache=shared") + .await + .unwrap(), + ); + let compute = new_test_runtime(store.clone()).await; + Arc::new(ServerState::new( + Config::new(None).with_database_url("sqlite::memory:?cache=shared"), + store, + compute, + SandboxIndex::new(), + SandboxWatchBus::new(), + TracingLogBus::new(), + Arc::new(SupervisorSessionRegistry::new()), + None, + )) + } + + #[tokio::test] + async fn draft_chunk_handler_lifecycle_round_trip() { + use openshell_core::proto::{ + GetDraftPolicyRequest, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec, + }; + + let state = test_server_state().await; + // Attach a github provider so the proposal below has a credential in + // scope for api.github.com. This causes the prover to emit a HIGH + // finding (L4 + credential in scope), keeping the chunk pending so + // the manual approve/reject lifecycle this test exercises is + // reachable. Without a provider, the proposal would auto-approve and + // the lifecycle assertions would no longer apply. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-draft-flow".to_string(), + name: "draft-flow".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: None, + providers: vec!["github-pat".to_string()], + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + let sandbox_name = sandbox.object_name().to_string(); + + let proposed_rule = NetworkPolicyRule { + name: "allow_github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + let submit = handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_github".to_string(), + proposed_rule: Some(proposed_rule.clone()), + rationale: "observed denied request".to_string(), + confidence: 0.85, + hit_count: 3, + first_seen_ms: 100, + last_seen_ms: 200, + binary: "/usr/bin/curl".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(submit.accepted_chunks, 1); + assert_eq!(submit.rejected_chunks, 0); + assert_eq!(submit.accepted_chunk_ids.len(), 1); + assert!(!submit.accepted_chunk_ids[0].is_empty()); + + let draft_policy = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(draft_policy.draft_version, 1); + assert_eq!(draft_policy.chunks.len(), 1); + // The proposal is L4 to a host with a credential in scope, so the + // prover emits a HIGH finding and the chunk stays pending for the + // manual approve path this test exercises. + assert_eq!(draft_policy.chunks[0].status, "pending"); + let chunk_id = draft_policy.chunks[0].id.clone(); + + let approve = handle_approve_draft_chunk( + &state, + Request::new(ApproveDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(approve.policy_version, 1); + assert!(!approve.policy_hash.is_empty()); + + let history_after_approve = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(history_after_approve.entries.len(), 2); + assert_eq!(history_after_approve.entries[0].event_type, "proposed"); + assert_eq!(history_after_approve.entries[1].event_type, "approved"); + assert_eq!(history_after_approve.entries[1].chunk_id, chunk_id); + + let policies_after_approve = handle_list_sandbox_policies( + &state, + Request::new(ListSandboxPoliciesRequest { + name: sandbox_name.clone(), + limit: 10, + offset: 0, + global: false, + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(policies_after_approve.revisions.len(), 1); + assert_eq!(policies_after_approve.revisions[0].version, 1); + + let undo = handle_undo_draft_chunk( + &state, + Request::new(UndoDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(undo.policy_version, 2); + assert!(!undo.policy_hash.is_empty()); + + let draft_policy_after_undo = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(draft_policy_after_undo.chunks.len(), 1); + assert_eq!(draft_policy_after_undo.chunks[0].status, "pending"); + + let history_after_undo = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(history_after_undo.entries.len(), 1); + assert_eq!(history_after_undo.entries[0].event_type, "proposed"); + + let policies_after_undo = handle_list_sandbox_policies( + &state, + Request::new(ListSandboxPoliciesRequest { + name: sandbox_name.clone(), + limit: 10, + offset: 0, + global: false, + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(policies_after_undo.revisions.len(), 2); + assert_eq!(policies_after_undo.revisions[0].version, 2); + assert_eq!(policies_after_undo.revisions[1].version, 1); + + let cleared = handle_clear_draft_chunks( + &state, + Request::new(ClearDraftChunksRequest { + name: sandbox_name.clone(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!(cleared.chunks_cleared, 1); + + let draft_policy_after_clear = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + assert!(draft_policy_after_clear.chunks.is_empty()); + + let history_after_clear = handle_get_draft_history( + &state, + Request::new(GetDraftHistoryRequest { name: sandbox_name }), + ) + .await + .unwrap() + .into_inner(); + assert!(history_after_clear.entries.is_empty()); + } + + /// A reviewer's free-form rejection reason must round-trip through + /// persistence and surface on the chunk via `GetDraftPolicy`, so the + /// in-sandbox agent can read the guidance and redraft. The MVP-v2 agent + /// feedback loop hangs off this guarantee. + #[tokio::test] + async fn reject_with_reason_persists_into_chunk_for_agent_readback() { + use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; + + let state = test_server_state().await; + let sandbox_name = "agent-feedback-loop".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-feedback".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: None, + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + + let proposed_rule = NetworkPolicyRule { + name: "allow_example".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - #[tokio::test] - async fn sandbox_config_composes_provider_layers_when_v2_enabled() { - let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-v2-enabled", - "v2-enabled", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + let submit = handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_example".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "agent intent".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap() + .into_inner(); + let chunk_id = submit.accepted_chunk_ids[0].clone(); - let effective_policy = get_sandbox_policy(&state, "sb-v2-enabled").await; + let guidance = "scope to docs/ paths only, not all repo contents"; + handle_reject_draft_chunk( + &state, + Request::new(RejectDraftChunkRequest { + name: sandbox_name.clone(), + chunk_id: chunk_id.clone(), + reason: guidance.to_string(), + }), + ) + .await + .unwrap(); - assert!( - effective_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github") - ); - assert!( - effective_policy - .network_policies - .get("_provider_work_github") - .unwrap() - .endpoints - .iter() - .any(|endpoint| endpoint.host == "api.github.com") + let draft = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + let rejected = draft + .chunks + .iter() + .find(|c| c.id == chunk_id) + .expect("rejected chunk should still be visible"); + assert_eq!(rejected.status, "rejected"); + assert_eq!( + rejected.rejection_reason, guidance, + "reviewer's free-form reason must round-trip into the chunk for agent readback" ); + // The prover now runs on every proposal regardless of analysis_mode. + // For this rule (L4 to api.example.com, no provider attached, no + // credential in scope), v1 calibration emits no finding — so the + // verdict is the clean "no new findings" string, not empty. + assert_eq!(rejected.validation_result, "prover: no new findings"); } #[tokio::test] - async fn sandbox_config_skips_profileless_provider_types_when_v2_enabled() { + async fn agent_authored_exact_l7_proposal_gets_prover_pass_verdict() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; + let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("legacy-generic", "generic")) - .await - .unwrap(); - state - .store - .put_message(&test_provider("custom-provider", "custom")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-profileless", - "profileless", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["legacy-generic".to_string(), "custom-provider".to_string()], - )) - .await - .unwrap(); + let sandbox_name = "agent-l7-verdict".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-l7-verdict".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + // Opt this sandbox into auto-approval via the settings model — same + // path the CLI's `--approval-mode auto` exercises — to test the + // empty-delta → approved path. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let effective_policy = get_sandbox_policy(&state, "sb-profileless").await; + let proposed_rule = NetworkPolicyRule { + name: "github_contents_write".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/org/repo/contents/demo/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - assert_eq!(effective_policy.network_policies.len(), 1); - assert!( - effective_policy - .network_policies - .contains_key("sandbox_only") + handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_write".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "write one demo file".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap(); + + let draft = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "exact L7 PUT against an inspected endpoint should not introduce \ + any new findings over baseline; got: {verdict}" + ); + // Auto-approval gate: empty delta + sandbox opted into auto mode → + // status flips to approved without human action. The canonical + // happy path for agent speed. + assert_eq!( + draft.chunks[0].status, "approved", + "empty-delta agent-authored proposal under auto mode must auto-approve; \ + got status: {}", + draft.chunks[0].status ); } + /// Implicit supersede: when a refined agent-authored proposal lands for + /// the same `(host, port, binary)` as a pending mechanistic chunk, the + /// older mechanistic chunk is auto-rejected with a "superseded by + /// chunk X" reason. This is the refinement loop without a + /// `supersedes_chunk_id` field — structural overlap is enough. #[tokio::test] - async fn sandbox_config_composition_is_jit_and_does_not_persist_provider_layers() { + async fn agent_authored_submission_supersedes_pending_mechanistic_for_same_endpoint() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; + let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); + // github provider attached so the mechanistic L4 lands a HIGH + // finding and stays pending. state .store - .put_message(&test_sandbox( - "sb-jit", - "jit", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) + .put_message(&test_provider("github-pat", "github")) .await .unwrap(); + let sandbox_name = "supersede-flow".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-supersede-flow".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + + // Step 1: mechanistic submits a broad L4 grant; the prover flags it + // HIGH, so it lands in pending. + let mechanistic_rule = NetworkPolicyRule { + name: "allow_api_github_com_443".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let mechanistic_submit = handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "mechanistic".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "allow_api_github_com_443".to_string(), + proposed_rule: Some(mechanistic_rule), + rationale: "Allow /usr/bin/curl to connect to api.github.com:443.".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap() + .into_inner(); + let mechanistic_chunk_id = mechanistic_submit.accepted_chunk_ids[0].clone(); - let effective_policy = get_sandbox_policy(&state, "sb-jit").await; + // Sanity-check: the mechanistic chunk is pending and carries a HIGH + // finding. + let draft = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name.clone(), + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + let mech = draft + .chunks + .iter() + .find(|c| c.id == mechanistic_chunk_id) + .expect("mechanistic chunk present"); + assert_eq!(mech.status, "pending"); + // Mechanistic L4 with credential in scope flags as new credentialed + // reach for the binary on the host. assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github") + mech.validation_result + .contains("credential_reach_expansion"), + "mechanistic L4 with credential in scope should emit \ + credential_reach_expansion; got: {}", + mech.validation_result ); - let persisted = state - .store - .get_latest_policy("sb-jit") - .await - .unwrap() - .expect("sandbox policy should be lazily backfilled"); - let persisted_policy = ProtoSandboxPolicy::decode(persisted.policy_payload.as_slice()) - .expect("persisted sandbox policy should decode"); - assert!( - persisted_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - !persisted_policy - .network_policies - .contains_key("_provider_work_github") - ); - } + // Step 2: the agent refines into a narrow L7 proposal for the SAME + // (host, port, binary). Under the v1 calibration, an L7 PUT on a + // host where the binary already had credentialed reach (read-only) + // emits a capability_expansion finding (new method on already- + // reached host) rather than a fresh reach expansion. The agent + // chunk stays pending for human review. The mechanistic chunk gets + // auto-rejected as superseded regardless of the agent chunk's own + // validation verdict — supersede is unconditional on `(host, port, + // binary)` overlap. + let agent_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/owner/name/contents/path/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let agent_submit = handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_put".to_string(), + proposed_rule: Some(agent_rule), + rationale: "refined L7 scope for the demo write".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap() + .into_inner(); + let agent_chunk_id = agent_submit.accepted_chunk_ids[0].clone(); - #[tokio::test] - async fn sandbox_config_preserves_overlapping_user_and_provider_rules() { - let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-overlap", - "overlap", - test_policy_with_rule("_provider_work_github", "api.github.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + let draft_after = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); - let effective_policy = get_sandbox_policy(&state, "sb-overlap").await; + let agent = draft_after + .chunks + .iter() + .find(|c| c.id == agent_chunk_id) + .expect("agent chunk present"); + let mech_after = draft_after + .chunks + .iter() + .find(|c| c.id == mechanistic_chunk_id) + .expect("mechanistic chunk should still be visible (with new status)"); - assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github") + assert_eq!( + agent.status, "pending", + "agent-authored L7 PUT with credential in scope must land in pending; \ + the baseline policy has no pre-existing rule for curl on api.github.com \ + so the agent's chunk grants brand-new credentialed reach. got: {}", + agent.status ); assert!( - effective_policy - .network_policies - .contains_key("_provider_work_github_2") + agent + .validation_result + .contains("credential_reach_expansion"), + "agent chunk should carry credential_reach_expansion (new credentialed reach \ + on api.github.com); got: {}", + agent.validation_result ); assert_eq!( - effective_policy - .network_policies - .get("_provider_work_github") - .unwrap() - .endpoints[0] - .host, - "api.github.com" + mech_after.status, "rejected", + "older mechanistic chunk for same (host, port, binary) should be superseded; \ + got: {}", + mech_after.status + ); + assert!( + mech_after.rejection_reason.contains(&agent_chunk_id), + "rejection reason should cite the superseding chunk id; got: {}", + mech_after.rejection_reason + ); + assert!( + mech_after.rejection_reason.contains("superseded"), + "rejection reason should explain the supersede; got: {}", + mech_after.rejection_reason ); } + /// Auto-approval is **proposer-agnostic**: a mechanistic proposal whose + /// prover delta is empty auto-approves the same way an agent-authored one + /// does. Source provenance is preserved in the audit trail (OCSF event + /// `source=mechanistic`) but does not change the safety decision. #[tokio::test] - async fn provider_environment_resolution_is_unchanged_by_providers_v2_setting() { - use openshell_core::proto::GetSandboxProviderEnvironmentRequest; + async fn mechanistic_proposal_with_empty_delta_also_auto_approves() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; let state = test_server_state().await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-provider-env", - "provider-env", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) - .await - .unwrap(); + let sandbox_name = "mechanistic-clean".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-mechanistic-clean".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // No providers → no credential in scope for the proposed host. + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + // Opt into auto mode via the settings model to test the + // proposer-agnostic gate. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let legacy_env = handle_get_sandbox_provider_environment( + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-env".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "mechanistic".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "Allow /usr/bin/curl to connect to example.com:443.".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner() - .environment; + .unwrap(); - enable_providers_v2(&state).await; - let v2_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-env".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() - .into_inner() - .environment; - - assert_eq!(legacy_env, v2_env); - assert_eq!(v2_env.get("GITHUB_TOKEN"), Some(&"ghp-test".to_string())); + .into_inner(); + let verdict = &draft.chunks[0].validation_result; + assert_eq!(verdict, "prover: no new findings"); + assert_eq!( + draft.chunks[0].status, "approved", + "empty-delta mechanistic proposal under auto mode must auto-approve \ + (proposer-agnostic); got status: {}", + draft.chunks[0].status + ); } + /// `protocol: rest, access: full` on a host where the binary had no + /// prior credentialed reach: the prover emits + /// `credential_reach_expansion`. (The per-method `capability_expansion` + /// paths are suppressed by the gateway delta because the reach is + /// new; one finding describes the change, not eight.) #[tokio::test] - async fn provider_env_revision_changes_when_attached_provider_record_changes() { - use openshell_core::proto::GetSandboxProviderEnvironmentRequest; - use std::time::Duration; + async fn agent_authored_l7_full_with_credential_emits_reach_expansion() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; let state = test_server_state().await; - let mut provider = test_provider("work-github", "github"); - state.store.put_message(&provider).await.unwrap(); state .store - .put_message(&test_sandbox( - "sb-provider-revision", - "provider-revision", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - vec!["work-github".to_string()], - )) + .put_message(&test_provider("github-pat", "github")) .await .unwrap(); + let sandbox_name = "l7-full-with-cred".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-l7-full-with-cred".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let first = handle_get_sandbox_provider_environment( + // L7-annotated (protocol: rest, enforce) but access: full — no + // method/path bound. Credential in scope. + let proposed_rule = NetworkPolicyRule { + name: "github_l7_full".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + access: "full".to_string(), + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-revision".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_l7_full".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "broad L7 dressing".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner(); - - tokio::time::sleep(Duration::from_millis(2)).await; - provider - .credentials - .insert("GITHUB_TOKEN".to_string(), "rotated".to_string()); - state.store.put_message(&provider).await.unwrap(); + .unwrap(); - let second = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-provider-revision".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - - assert_ne!( - first.provider_env_revision, second.provider_env_revision, - "provider object updates must trigger sandbox credential refresh" + let verdict = &draft.chunks[0].validation_result; + assert!( + verdict.contains("credential_reach_expansion"), + "L7 `access: full` on a host the binary did not previously reach must emit \ + credential_reach_expansion; got: {verdict}" + ); + // Capability_expansion paths for the same (binary, host:port) are + // suppressed when the reach itself is new — one finding, not many. + assert!( + !verdict.contains("capability_expansion"), + "capability_expansion must be suppressed when reach itself is new; got: {verdict}" ); assert_eq!( - second.environment.get("GITHUB_TOKEN"), - Some(&"rotated".to_string()) + draft.chunks[0].status, "pending", + "any prover finding must keep the chunk in pending despite auto mode; got: {}", + draft.chunks[0].status ); } + /// Acceptance criterion #7: default approval mode is manual. A sandbox + /// with no `proposal_approval_mode` setting at either scope must NOT + /// auto-approve empty-delta proposals; the chunk lands in `pending` for + /// human review. This is the default-deny safeguard: auto-approval is + /// an explicit opt-in, not a global behavior change shipped under a + /// feature. #[tokio::test] - async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { - use crate::grpc::sandbox::{ - handle_attach_sandbox_provider, handle_detach_sandbox_provider, - }; + async fn empty_delta_does_not_auto_approve_when_mode_unset() { use openshell_core::proto::{ - AttachSandboxProviderRequest, DetachSandboxProviderRequest, - GetSandboxProviderEnvironmentRequest, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - enable_providers_v2(&state).await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-attach-lifecycle", - "attach-lifecycle", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - Vec::new(), - )) - .await - .unwrap(); - - let baseline_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - !baseline_policy - .network_policies - .contains_key("_provider_work_github") - ); - let baseline_env = handle_get_sandbox_provider_environment( - &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), + let sandbox_name = "default-manual-mode".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-default-manual-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), }), - ) - .await - .unwrap() - .into_inner(); - - handle_attach_sandbox_provider( - &state, - Request::new(AttachSandboxProviderRequest { - sandbox_name: "attach-lifecycle".to_string(), - provider_name: "work-github".to_string(), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // No approval-mode setting seeded at sandbox or gateway + // scope — the resolver must treat absence as "manual". + ..Default::default() }), - ) - .await - .unwrap(); - - let attached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - attached_policy - .network_policies - .contains_key("_provider_work_github") - ); + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); - let attached_env = handle_get_sandbox_provider_environment( - &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), - }), - ) - .await - .unwrap() - .into_inner(); - assert_ne!( - baseline_env.provider_env_revision, - attached_env.provider_env_revision - ); - assert_eq!( - attached_env.environment.get("GITHUB_TOKEN"), - Some(&"ghp-test".to_string()) - ); + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - handle_detach_sandbox_provider( + handle_submit_policy_analysis( &state, - Request::new(DetachSandboxProviderRequest { - sandbox_name: "attach-lifecycle".to_string(), - provider_name: "work-github".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — prover sees no finding".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await .unwrap(); - let detached_policy = get_sandbox_policy(&state, "sb-attach-lifecycle").await; - assert!( - !detached_policy - .network_policies - .contains_key("_provider_work_github") - ); - - let detached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-attach-lifecycle".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_ne!( - attached_env.provider_env_revision, - detached_env.provider_env_revision + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "prover should still emit no findings; gate is downstream", + ); + assert_eq!( + draft.chunks[0].status, "pending", + "default (unset) proposal_approval_mode must not auto-approve; \ + chunk should wait for human review. got status: {}", + draft.chunks[0].status ); - assert!(!detached_env.environment.contains_key("GITHUB_TOKEN")); } + /// Unknown `proposal_approval_mode` strings (typos, future-mode values + /// the gateway doesn't yet know about) fall back to manual. This locks + /// in forward-compat: a future CLI that learns about `"auto_on_low_risk"` + /// can never accidentally bypass an older gateway's review gate just by + /// virtue of an unrecognized value defaulting to "auto." #[tokio::test] - #[allow(deprecated)] - async fn custom_imported_profile_policy_and_env_follow_attach_detach_lifecycle() { - use crate::grpc::provider::handle_import_provider_profiles; - use crate::grpc::sandbox::{ - handle_attach_sandbox_provider, handle_detach_sandbox_provider, - }; + async fn empty_delta_does_not_auto_approve_when_mode_unknown_string() { use openshell_core::proto::{ - AttachSandboxProviderRequest, DetachSandboxProviderRequest, - GetSandboxProviderEnvironmentRequest, ImportProviderProfilesRequest, NetworkBinary, - ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, - ProviderProfileImportItem, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - enable_providers_v2(&state).await; - handle_import_provider_profiles( - &state, - Request::new(ImportProviderProfilesRequest { - profiles: vec![ProviderProfileImportItem { - source: "custom-api.yaml".to_string(), - profile: Some(ProviderProfile { - id: "custom-api".to_string(), - display_name: "Custom API".to_string(), - description: String::new(), - category: ProviderProfileCategory::Other as i32, - credentials: vec![ProviderProfileCredential { - name: "api_key".to_string(), - env_vars: vec!["CUSTOM_API_KEY".to_string()], - auth_style: "bearer".to_string(), - header_name: "authorization".to_string(), - required: true, - ..Default::default() - }], - endpoints: vec![NetworkEndpoint { - host: "api.custom.example".to_string(), - port: 443, - protocol: "rest".to_string(), - rules: vec![L7Rule { - allow: Some(openshell_core::proto::L7Allow { - method: "GET".to_string(), - path: "/v1/**".to_string(), - ..Default::default() - }), - }], - ..Default::default() - }], - binaries: vec![NetworkBinary { - path: "/usr/bin/custom".to_string(), - harness: true, - }], - inference_capable: false, + let sandbox_name = "unknown-mode".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-unknown-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() }), + ..Default::default() + }), + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + // A future-CLI value the current gateway doesn't recognize. + seed_sandbox_approval_mode(&state, &sandbox_name, "auto_on_low_risk").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4".to_string(), + ..Default::default() }], + ..Default::default() }), ) .await .unwrap(); - let mut provider = test_provider("work-custom", "custom-api"); - provider.credentials = - std::iter::once(("CUSTOM_API_KEY".to_string(), "custom-secret".to_string())).collect(); - state.store.put_message(&provider).await.unwrap(); - state - .store - .put_message(&test_sandbox( - "sb-custom-attach-lifecycle", - "custom-attach-lifecycle", - test_policy_with_rule("sandbox_only", "sandbox.example.com"), - Vec::new(), - )) - .await - .unwrap(); - - let baseline_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - assert!( - !baseline_policy - .network_policies - .contains_key("_provider_work_custom") - ); - let baseline_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); + assert_eq!( + draft.chunks[0].status, "pending", + "unknown approval-mode strings must fall back to manual; \ + only the literal \"auto\" opts in. got: {}", + draft.chunks[0].status + ); + } - handle_attach_sandbox_provider( + /// Explicit `"manual"` is equivalent to the unset default — chunk lands + /// in pending even with empty delta. + #[tokio::test] + async fn empty_delta_does_not_auto_approve_when_mode_explicit_manual() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "explicit-manual-mode".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-explicit-manual-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "manual").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(AttachSandboxProviderRequest { - sandbox_name: "custom-attach-lifecycle".to_string(), - provider_name: "work-custom".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — prover sees no finding".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await .unwrap(); - let attached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - let custom_rule = attached_policy - .network_policies - .get("_provider_work_custom") - .expect("custom provider rule should be composed after attach"); - assert_eq!(custom_rule.endpoints[0].host, "api.custom.example"); - assert_eq!(custom_rule.endpoints[0].protocol, "rest"); - assert_eq!(custom_rule.endpoints[0].rules.len(), 1); - assert_eq!(custom_rule.binaries[0].path, "/usr/bin/custom"); - - let attached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_ne!( - baseline_env.provider_env_revision, - attached_env.provider_env_revision - ); assert_eq!( - attached_env.environment.get("CUSTOM_API_KEY"), - Some(&"custom-secret".to_string()) + draft.chunks[0].status, "pending", + "explicit manual mode must equal default mode — no auto-approval; \ + got: {}", + draft.chunks[0].status ); + } - handle_detach_sandbox_provider( + /// Gateway-scope `proposal_approval_mode = "auto"` enables auto-approval + /// for any sandbox under that gateway, with no per-sandbox setting + /// required. This is the fleet-wide opt-in path — a reviewer flips the + /// gateway setting once and every sandbox without an explicit override + /// gets prover-gated auto-approval. + #[tokio::test] + async fn empty_delta_auto_approves_from_gateway_scope_setting() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "gateway-auto-mode".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-gateway-auto-mode".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + // Fleet-wide opt-in — no sandbox-scope setting. + seed_global_approval_mode(&state, "auto").await; + + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(DetachSandboxProviderRequest { - sandbox_name: "custom-attach-lifecycle".to_string(), - provider_name: "work-custom".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — empty delta".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await .unwrap(); - let detached_policy = get_sandbox_policy(&state, "sb-custom-attach-lifecycle").await; - assert!( - !detached_policy - .network_policies - .contains_key("_provider_work_custom") - ); - let detached_env = handle_get_sandbox_provider_environment( + let draft = handle_get_draft_policy( &state, - Request::new(GetSandboxProviderEnvironmentRequest { - sandbox_id: "sb-custom-attach-lifecycle".to_string(), + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_ne!( - attached_env.provider_env_revision, - detached_env.provider_env_revision + assert_eq!( + draft.chunks[0].status, "approved", + "empty-delta proposal must auto-approve when the gateway-scope \ + setting is \"auto\" and no sandbox-scope override exists. got: {}", + draft.chunks[0].status ); - assert!(!detached_env.environment.contains_key("CUSTOM_API_KEY")); } + /// Gateway scope wins over sandbox scope. A reviewer can pin manual mode + /// fleet-wide; a per-sandbox `"auto"` value is silently ignored. Matches + /// the existing settings precedence convention (global wins, sandbox is + /// the per-sandbox override only when no global is set). #[tokio::test] - async fn global_policy_suppresses_provider_profile_layers_when_v2_enabled() { + async fn gateway_manual_overrides_sandbox_auto() { use openshell_core::proto::{ - GetSandboxConfigRequest, NetworkEndpoint, NetworkPolicyRule, SandboxPhase, - SandboxPolicy, SandboxSpec, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; - state - .store - .put_message(&test_provider("work-github", "github")) - .await - .unwrap(); - - let sandbox_policy = SandboxPolicy { - network_policies: std::iter::once(( - "sandbox_only".to_string(), - NetworkPolicyRule { - name: "sandbox_only".to_string(), - endpoints: vec![NetworkEndpoint { - host: "sandbox.example.com".to_string(), - port: 443, - ..Default::default() - }], - ..Default::default() - }, - )) - .collect(), - ..Default::default() - }; + let sandbox_name = "gateway-pinned-manual".to_string(); let sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-global-profile".to_string(), - name: "global-profile-sandbox".to_string(), + id: "sb-gateway-pinned-manual".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, - labels: HashMap::new(), + labels: std::collections::HashMap::new(), }), spec: Some(SandboxSpec { - policy: Some(sandbox_policy), - providers: vec!["work-github".to_string()], + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }), phase: SandboxPhase::Ready as i32, ..Default::default() }; state.store.put_message(&sandbox).await.unwrap(); + // Gateway pins manual; the sandbox-scope override is supplied (test + // helper bypasses the UpdateConfig precondition, simulating the + // before-pin state) to prove the resolver still picks the gateway + // value. + seed_global_approval_mode(&state, "manual").await; + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let global_policy = SandboxPolicy { - network_policies: std::iter::once(( - "global_only".to_string(), - NetworkPolicyRule { - name: "global_only".to_string(), - endpoints: vec![NetworkEndpoint { - host: "global.example.com".to_string(), - port: 443, - ..Default::default() - }], - ..Default::default() - }, - )) - .collect(), - ..Default::default() - }; - let global_settings = StoredSettings { - revision: 1, - settings: [ - ( - settings::PROVIDERS_V2_ENABLED_KEY.to_string(), - StoredSettingValue::Bool(true), - ), - ( - POLICY_SETTING_KEY.to_string(), - StoredSettingValue::Bytes(hex::encode(global_policy.encode_to_vec())), - ), - ] - .into_iter() - .collect(), + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], }; - save_global_settings(state.store.as_ref(), &global_settings) - .await - .unwrap(); - let response = handle_get_sandbox_config( + handle_submit_policy_analysis( &state, - Request::new(GetSandboxConfigRequest { - sandbox_id: "sb-global-profile".to_string(), + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "un-credentialed L4 — empty delta".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner(); + .unwrap(); - let effective_policy = response.policy.expect("global policy should be returned"); - assert_eq!(response.policy_source, PolicySource::Global as i32); - assert!( - effective_policy - .network_policies - .contains_key("global_only") - ); - assert!( - !effective_policy - .network_policies - .contains_key("sandbox_only") - ); - assert!( - !effective_policy - .network_policies - .contains_key("_provider_work_github") + let draft = handle_get_draft_policy( + &state, + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), + }), + ) + .await + .unwrap() + .into_inner(); + assert_eq!( + draft.chunks[0].status, "pending", + "gateway-scope \"manual\" must win over sandbox-scope \"auto\"; \ + got: {}", + draft.chunks[0].status ); } + /// Agent submissions targeting a `_provider_*` rule name are rejected at + /// the submit boundary. Provider-synthesized rules are a reserved + /// namespace; an agent that addresses one by name could otherwise + /// circumvent the merge guard that splits agent contributions into their + /// own rule (so the prover sees them honestly). #[tokio::test] - async fn sandbox_policy_backfill_on_update_when_no_baseline() { - use openshell_core::proto::{FilesystemPolicy, LandlockPolicy, SandboxPhase, SandboxSpec}; - - let store = Store::connect("sqlite::memory:").await.unwrap(); + async fn submit_rejects_reserved_provider_rule_name_prefix() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + let state = test_server_state().await; + let sandbox_name = "reject-provider-prefix".to_string(); let sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-backfill".to_string(), - name: "backfill-sandbox".to_string(), + id: "sb-reject-provider-prefix".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }), - phase: SandboxPhase::Provisioning as i32, + phase: SandboxPhase::Ready as i32, ..Default::default() }; - store.put_message(&sandbox).await.unwrap(); + state.store.put_message(&sandbox).await.unwrap(); - let new_policy = ProtoSandboxPolicy { - version: 1, - filesystem: Some(FilesystemPolicy { - include_workdir: true, - read_only: vec!["/usr".into()], - read_write: vec!["/tmp".into()], - }), - landlock: Some(LandlockPolicy { - compatibility: "best_effort".into(), - }), - process: Some(openshell_core::proto::ProcessPolicy { - run_as_user: "sandbox".into(), - run_as_group: "sandbox".into(), - }), - ..Default::default() + let proposed_rule = NetworkPolicyRule { + name: "github".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], }; - let mut sandbox = store - .get_message::("sb-backfill") - .await - .unwrap() - .unwrap(); - if let Some(ref mut spec) = sandbox.spec { - spec.policy = Some(new_policy.clone()); - } - store.put_message(&sandbox).await.unwrap(); - - let loaded = store - .get_message::("sb-backfill") - .await - .unwrap() - .unwrap(); - let policy = loaded.spec.unwrap().policy.unwrap(); - assert_eq!(policy.version, 1); - assert!(policy.filesystem.is_some()); - assert_eq!(policy.process.unwrap().run_as_user, "sandbox"); - } + let response = handle_submit_policy_analysis( + &state, + Request::new(SubmitPolicyAnalysisRequest { + name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "_provider_work_github".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "should be rejected — addresses provider rule by name".to_string(), + ..Default::default() + }], + ..Default::default() + }), + ) + .await + .unwrap() + .into_inner(); - async fn test_server_state() -> Arc { - let store = Arc::new( - Store::connect("sqlite::memory:?cache=shared") - .await - .unwrap(), + assert_eq!(response.accepted_chunks, 0, "chunk must be rejected"); + assert_eq!(response.rejected_chunks, 1); + assert!( + response + .rejection_reasons + .iter() + .any(|r| r.contains("_provider_")), + "rejection reason must cite the reserved-prefix rule. got: {:?}", + response.rejection_reasons, ); - let compute = new_test_runtime(store.clone()).await; - Arc::new(ServerState::new( - Config::new(None).with_database_url("sqlite::memory:?cache=shared"), - store, - compute, - SandboxIndex::new(), - SandboxWatchBus::new(), - TracingLogBus::new(), - Arc::new(SupervisorSessionRegistry::new()), - None, - )) } + /// v1 calibration row: **L4 with a credential in scope → HIGH finding.** + /// The sandbox has a github provider attached, so a credential is in + /// scope for api.github.com. A broad L4 proposal therefore lands in + /// pending with a HIGH finding. #[tokio::test] - async fn draft_chunk_handler_lifecycle_round_trip() { + async fn agent_authored_l4_proposal_with_credential_records_high_finding() { use openshell_core::proto::{ - GetDraftPolicyRequest, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec, + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, }; let state = test_server_state().await; + // Attach a github provider so a credential is in scope for api.github.com. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + let sandbox_name = "agent-l4-with-cred".to_string(); let sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-draft-flow".to_string(), - name: "draft-flow".to_string(), + id: "sb-agent-l4-with-cred".to_string(), + name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], ..Default::default() }), phase: SandboxPhase::Ready as i32, ..Default::default() }; state.store.put_message(&sandbox).await.unwrap(); - let sandbox_name = sandbox.object_name().to_string(); let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + name: "github_l4".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "api.github.com".to_string(), port: 443, ..Default::default() }], @@ -3970,207 +6130,438 @@ mod tests { }], }; - let submit = handle_submit_policy_analysis( + handle_submit_policy_analysis( &state, Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), proposed_chunks: vec![PolicyChunk { - rule_name: "allow_example".to_string(), - proposed_rule: Some(proposed_rule.clone()), - rationale: "observed denied request".to_string(), - confidence: 0.85, - hit_count: 3, - first_seen_ms: 100, - last_seen_ms: 200, - binary: "/usr/bin/curl".to_string(), + rule_name: "github_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "broad fallback".to_string(), ..Default::default() }], ..Default::default() }), ) .await - .unwrap() - .into_inner(); - assert_eq!(submit.accepted_chunks, 1); - assert_eq!(submit.rejected_chunks, 0); - assert_eq!(submit.accepted_chunk_ids.len(), 1); - assert!(!submit.accepted_chunk_ids[0].is_empty()); + .unwrap(); - let draft_policy = handle_get_draft_policy( + let draft = handle_get_draft_policy( &state, Request::new(GetDraftPolicyRequest { - name: sandbox_name.clone(), + name: sandbox_name, status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_eq!(draft_policy.draft_version, 1); - assert_eq!(draft_policy.chunks.len(), 1); - assert_eq!(draft_policy.chunks[0].status, "pending"); - let chunk_id = draft_policy.chunks[0].id.clone(); + let verdict = &draft.chunks[0].validation_result; + let first_line = verdict.lines().next().unwrap_or(""); + assert!( + first_line.starts_with("prover: ") && first_line.contains("new finding"), + "expected first line like `prover: N new finding(s)`, got: {verdict}" + ); + assert!( + verdict.contains("credential_reach_expansion"), + "L4 + credential in scope emits credential_reach_expansion (the binary gains \ + credentialed reach to a new host:port); got: {verdict}" + ); + assert!( + verdict.contains("api.github.com:443"), + "expected the finding line to cite the proposed endpoint, got: {verdict}" + ); + } - let approve = handle_approve_draft_chunk( - &state, - Request::new(ApproveDraftChunkRequest { + /// v1 calibration row: **L4 with NO credential in scope → no finding.** + /// Without an attached provider, no credential targets api.github.com, + /// so the prover treats the L4 grant as bounded (no privileged action + /// available) and emits nothing. The proposal verdict reads + /// `prover: no new findings`, eligible for auto-approval. + #[tokio::test] + async fn agent_authored_l4_proposal_without_credential_emits_no_finding() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "agent-l4-no-cred".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-l4-no-cred".to_string(), name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(approve.policy_version, 1); - assert!(!approve.policy_hash.is_empty()); + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // No providers — credential set will be empty. + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); - let history_after_approve = handle_get_draft_history( + let proposed_rule = NetworkPolicyRule { + name: "anon_l4".to_string(), + endpoints: vec![NetworkEndpoint { + host: "example.com".to_string(), + port: 443, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(GetDraftHistoryRequest { + Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "anon_l4".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "no privileged access available".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner(); - assert_eq!(history_after_approve.entries.len(), 2); - assert_eq!(history_after_approve.entries[0].event_type, "proposed"); - assert_eq!(history_after_approve.entries[1].event_type, "approved"); - assert_eq!(history_after_approve.entries[1].chunk_id, chunk_id); + .unwrap(); - let policies_after_approve = handle_list_sandbox_policies( + let draft = handle_get_draft_policy( &state, - Request::new(ListSandboxPoliciesRequest { - name: sandbox_name.clone(), - limit: 10, - offset: 0, - global: false, + Request::new(GetDraftPolicyRequest { + name: sandbox_name, + status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_eq!(policies_after_approve.revisions.len(), 1); - assert_eq!(policies_after_approve.revisions[0].version, 1); + let verdict = &draft.chunks[0].validation_result; + assert_eq!( + verdict, "prover: no new findings", + "L4 grant with no credential in scope is bounded in v1; got: {verdict}" + ); + } + + /// v1 calibration row: **link-local host → HIGH finding regardless of + /// credentials.** Even with no provider attached, a proposal targeting + /// `169.254.169.254` (AWS IMDS / cloud metadata) emits a HIGH finding. + /// This is the one categorical safety floor v1 ships. + #[tokio::test] + async fn agent_authored_link_local_proposal_records_high_finding() { + use openshell_core::proto::{ + FilesystemPolicy, NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxPolicy, + SandboxSpec, + }; + + let state = test_server_state().await; + let sandbox_name = "agent-link-local".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-link-local".to_string(), + name: sandbox_name.clone(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + }), + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + // Deliberately no provider — link-local should still fire. + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); + + let proposed_rule = NetworkPolicyRule { + name: "metadata_endpoint".to_string(), + endpoints: vec![NetworkEndpoint { + host: "169.254.169.254".to_string(), + port: 80, + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; - let undo = handle_undo_draft_chunk( + handle_submit_policy_analysis( &state, - Request::new(UndoDraftChunkRequest { + Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "metadata_endpoint".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "agent is curious about IMDS".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner(); - assert_eq!(undo.policy_version, 2); - assert!(!undo.policy_hash.is_empty()); + .unwrap(); - let draft_policy_after_undo = handle_get_draft_policy( + let draft = handle_get_draft_policy( &state, Request::new(GetDraftPolicyRequest { - name: sandbox_name.clone(), + name: sandbox_name, status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert_eq!(draft_policy_after_undo.chunks.len(), 1); - assert_eq!(draft_policy_after_undo.chunks[0].status, "pending"); + let verdict = &draft.chunks[0].validation_result; + assert!( + verdict.contains("link_local_reach"), + "link-local proposal must emit link_local_reach regardless of credentials; \ + got: {verdict}" + ); + assert!( + verdict.contains("169.254.169.254"), + "finding line must cite the link-local host; got: {verdict}" + ); + } - let history_after_undo = handle_get_draft_history( - &state, - Request::new(GetDraftHistoryRequest { - name: sandbox_name.clone(), - }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(history_after_undo.entries.len(), 1); - assert_eq!(history_after_undo.entries[0].event_type, "proposed"); + #[tokio::test] + async fn agent_authored_validation_uses_providers_v2_effective_policy() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7DenyRule, L7Rule, NetworkBinary, NetworkEndpoint, + ProviderProfile, ProviderProfileCategory, SandboxPhase, SandboxPolicy, SandboxSpec, + StoredProviderProfile, + }; - let policies_after_undo = handle_list_sandbox_policies( - &state, - Request::new(ListSandboxPoliciesRequest { + let state = test_server_state().await; + enable_providers_v2(&state).await; + state + .store + .put_message(&test_provider("work-custom", "custom-api")) + .await + .unwrap(); + state + .store + .put_message(&StoredProviderProfile { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "profile-custom-api".to_string(), + name: "custom-api".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + }), + profile: Some(ProviderProfile { + id: "custom-api".to_string(), + display_name: "Custom API".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: Vec::new(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + deny_rules: vec![L7DenyRule { + method: "DELETE".to_string(), + path: "/repos/*".to_string(), + ..Default::default() + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + inference_capable: false, + }), + }) + .await + .unwrap(); + + let sandbox_name = "agent-provider-effective-policy".to_string(); + let sandbox = Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sb-agent-provider-effective-policy".to_string(), name: sandbox_name.clone(), - limit: 10, - offset: 0, - global: false, + created_at_ms: 1_000_000, + labels: HashMap::new(), }), - ) - .await - .unwrap() - .into_inner(); - assert_eq!(policies_after_undo.revisions.len(), 2); - assert_eq!(policies_after_undo.revisions[0].version, 2); - assert_eq!(policies_after_undo.revisions[1].version, 1); + spec: Some(SandboxSpec { + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["work-custom".to_string()], + ..Default::default() + }), + phase: SandboxPhase::Ready as i32, + ..Default::default() + }; + state.store.put_message(&sandbox).await.unwrap(); - let cleared = handle_clear_draft_chunks( + let proposed_rule = NetworkPolicyRule { + name: "github_contents_write".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/org/repo/contents/demo/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + + handle_submit_policy_analysis( &state, - Request::new(ClearDraftChunksRequest { + Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_write".to_string(), + proposed_rule: Some(proposed_rule), + rationale: "write one demo file".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap() - .into_inner(); - assert_eq!(cleared.chunks_cleared, 1); + .unwrap(); - let draft_policy_after_clear = handle_get_draft_policy( + let draft = handle_get_draft_policy( &state, Request::new(GetDraftPolicyRequest { - name: sandbox_name.clone(), + name: sandbox_name, status_filter: String::new(), }), ) .await .unwrap() .into_inner(); - assert!(draft_policy_after_clear.chunks.is_empty()); - - let history_after_clear = handle_get_draft_history( - &state, - Request::new(GetDraftHistoryRequest { name: sandbox_name }), - ) - .await - .unwrap() - .into_inner(); - assert!(history_after_clear.entries.is_empty()); + let verdict = &draft.chunks[0].validation_result; + let first_line = verdict.lines().next().unwrap_or(""); + assert!( + first_line.starts_with("prover: "), + "validation should run end-to-end against the providers-v2 composed \ + effective policy and produce a prover verdict; got: {verdict}" + ); + assert!( + !verdict.contains("validation unavailable"), + "providers-v2 composition must not break the prover pipeline; \ + got: {verdict}" + ); } - /// A reviewer's free-form rejection reason must round-trip through - /// persistence and surface on the chunk via `GetDraftPolicy`, so the - /// in-sandbox agent can read the guidance and redraft. The MVP-v2 agent - /// feedback loop hangs off this guarantee. + /// End-to-end loop test against the v1 calibration and the auto-approval + /// gate. Mirrors the two-path flow in `examples/agent-driven-policy-management`: + /// + /// 1. Un-credentialed L7 proposal (raw.githubusercontent.com GET) → + /// prover sees no findings → sandbox in `auto` mode → chunk + /// auto-approves without human action. + /// + /// 2. Credentialed L7 proposal (api.github.com PUT) → prover sees + /// `github_token` in scope, emits MEDIUM → chunk lands in pending + /// for human review even under `auto` mode. + /// + /// This is the deterministic counterpart of the demo's product UX + /// claim: "narrow safe = free, narrow credentialed = one approval." #[tokio::test] - async fn reject_with_reason_persists_into_chunk_for_agent_readback() { - use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; + async fn full_loop_under_v2_auto_mode_splits_credentialed_and_uncredentialed() { + use openshell_core::proto::{ + FilesystemPolicy, L7Allow, L7Rule, NetworkBinary, NetworkEndpoint, SandboxPhase, + SandboxPolicy, SandboxSpec, + }; let state = test_server_state().await; - let sandbox_name = "agent-feedback-loop".to_string(); + enable_providers_v2(&state).await; + + // Github provider attached: a credential ends up in scope for + // api.github.com (PUT proposal flags MEDIUM). raw.githubusercontent.com + // is not declared by any provider, so the bootstrap fetch is + // un-credentialed and auto-approves. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); + + let sandbox_name = "full-loop-v2".to_string(); let sandbox = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { - id: "sb-feedback".to_string(), + id: "sb-full-loop-v2".to_string(), name: sandbox_name.clone(), created_at_ms: 1_000_000, labels: std::collections::HashMap::new(), }), spec: Some(SandboxSpec { - policy: None, + policy: Some(SandboxPolicy { + version: 1, + filesystem: Some(FilesystemPolicy { + read_write: vec!["/sandbox".to_string()], + ..Default::default() + }), + ..Default::default() + }), + providers: vec!["github-pat".to_string()], ..Default::default() }), phase: SandboxPhase::Ready as i32, ..Default::default() }; state.store.put_message(&sandbox).await.unwrap(); + seed_sandbox_approval_mode(&state, &sandbox_name, "auto").await; - let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + // ── Step 1: un-credentialed GET → expected auto-approve ── + let uncredentialed_rule = NetworkPolicyRule { + name: "github_raw_openapi_get".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "raw.githubusercontent.com".to_string(), port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "GET".to_string(), + path: "/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json" + .to_string(), + ..Default::default() + }), + }], ..Default::default() }], binaries: vec![NetworkBinary { @@ -4178,15 +6569,15 @@ mod tests { ..Default::default() }], }; - - let submit = handle_submit_policy_analysis( + let step1 = handle_submit_policy_analysis( &state, Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), + analysis_mode: "agent_authored".to_string(), proposed_chunks: vec![PolicyChunk { - rule_name: "allow_example".to_string(), - proposed_rule: Some(proposed_rule), - rationale: "agent intent".to_string(), + rule_name: "github_raw_openapi_get".to_string(), + proposed_rule: Some(uncredentialed_rule), + rationale: "fetch the public github openapi description".to_string(), ..Default::default() }], ..Default::default() @@ -4195,19 +6586,48 @@ mod tests { .await .unwrap() .into_inner(); - let chunk_id = submit.accepted_chunk_ids[0].clone(); + let step1_chunk_id = step1.accepted_chunk_ids[0].clone(); - let guidance = "scope to docs/ paths only, not all repo contents"; - handle_reject_draft_chunk( + // ── Step 2: credentialed PUT → expected MEDIUM, pending ── + let credentialed_rule = NetworkPolicyRule { + name: "github_contents_put".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.github.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/repos/owner/name/contents/path/file.md".to_string(), + ..Default::default() + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }; + let step2 = handle_submit_policy_analysis( &state, - Request::new(RejectDraftChunkRequest { + Request::new(SubmitPolicyAnalysisRequest { name: sandbox_name.clone(), - chunk_id: chunk_id.clone(), - reason: guidance.to_string(), + analysis_mode: "agent_authored".to_string(), + proposed_chunks: vec![PolicyChunk { + rule_name: "github_contents_put".to_string(), + proposed_rule: Some(credentialed_rule), + rationale: "write the demo file via the GitHub Contents API".to_string(), + ..Default::default() + }], + ..Default::default() }), ) .await - .unwrap(); + .unwrap() + .into_inner(); + let step2_chunk_id = step2.accepted_chunk_ids[0].clone(); let draft = handle_get_draft_policy( &state, @@ -4219,18 +6639,57 @@ mod tests { .await .unwrap() .into_inner(); - let rejected = draft + + let step1_chunk = draft .chunks .iter() - .find(|c| c.id == chunk_id) - .expect("rejected chunk should still be visible"); - assert_eq!(rejected.status, "rejected"); + .find(|c| c.id == step1_chunk_id) + .expect("step1 chunk present"); + let step2_chunk = draft + .chunks + .iter() + .find(|c| c.id == step2_chunk_id) + .expect("step2 chunk present"); + assert_eq!( - rejected.rejection_reason, guidance, - "reviewer's free-form reason must round-trip into the chunk for agent readback" + step1_chunk.status, "approved", + "un-credentialed L7 proposal under v2 + auto mode must auto-approve; got: {}", + step1_chunk.status + ); + assert_eq!( + step1_chunk.validation_result, "prover: no new findings", + "un-credentialed L7 verdict should be `no new findings`; got: {}", + step1_chunk.validation_result + ); + + assert_eq!( + step2_chunk.status, "pending", + "credentialed L7 PUT under v2 + auto mode must stay pending; got: {}", + step2_chunk.status + ); + // This test's spec policy has no pre-existing rule for curl on + // api.github.com, so the agent's chunk grants brand-new + // credentialed reach: the finding is credential_reach_expansion, + // not capability_expansion. (The capability_expansion path is + // suppressed by the delta because the reach is new — one finding + // per change, not two.) The demo's policy.template.yaml has + // github_api_readonly which exercises the capability_expansion + // path; that's covered by the supersede test above. + assert!( + step2_chunk + .validation_result + .contains("credential_reach_expansion"), + "credentialed PUT on a host the binary did not previously reach must carry \ + credential_reach_expansion; got: {}", + step2_chunk.validation_result + ); + assert!( + !step2_chunk + .validation_result + .contains("capability_expansion"), + "capability_expansion must be suppressed when reach itself is new; got: {}", + step2_chunk.validation_result ); - // validation_result is unpopulated until the prover runs (#1097). - assert!(rejected.validation_result.is_empty()); } /// Two agent-authored proposals targeting the same host/port/binary must @@ -4564,6 +7023,14 @@ mod tests { use openshell_core::proto::{NetworkBinary, NetworkEndpoint, SandboxPhase, SandboxSpec}; let state = test_server_state().await; + // Attach a github provider so the L4 proposal below has a credential + // in scope and the prover emits a HIGH finding — keeps the chunk + // pending so this cross-sandbox approve check is reachable. + state + .store + .put_message(&test_provider("github-pat", "github")) + .await + .unwrap(); let sandbox_a = Sandbox { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { id: "sb-draft-owner".to_string(), @@ -4573,6 +7040,7 @@ mod tests { }), spec: Some(SandboxSpec { policy: None, + providers: vec!["github-pat".to_string()], ..Default::default() }), phase: SandboxPhase::Ready as i32, @@ -4596,9 +7064,9 @@ mod tests { state.store.put_message(&sandbox_b).await.unwrap(); let proposed_rule = NetworkPolicyRule { - name: "allow_example".to_string(), + name: "allow_github".to_string(), endpoints: vec![NetworkEndpoint { - host: "api.example.com".to_string(), + host: "api.github.com".to_string(), port: 443, ..Default::default() }], @@ -4708,6 +7176,7 @@ mod tests { "gateway merged incremental policy op: add-allow api.github.com:443 [POST /repos/*/issues]", 7, "sha256:testhash", + &[], ); assert_eq!( @@ -4716,6 +7185,50 @@ mod tests { ); } + /// Auto-approval audit messages carry `auto=true`, `source=`, and + /// `prover_delta=empty` as extra unmapped fields so a reviewer can + /// reconstruct the safety reasoning without needing to grep the chunk + /// table. The message text itself says "auto-approved: no new prover + /// findings" — never "safe" — because the claim is about the prover's + /// reasoning, not the world. + #[test] + fn build_gateway_policy_audit_message_carries_auto_approve_provenance() { + let extra = [ + ("auto", "true".to_string()), + ("source", "agent_authored".to_string()), + ("prover_delta", "empty".to_string()), + ]; + let message = build_gateway_policy_audit_message( + "sb-123", + "demo-sandbox", + "approved", + "auto-approved: no new prover findings (source=agent_authored) — chunk abc: add-rule x", + 12, + "sha256:autohash", + &extra, + ); + assert!( + message.contains("CONFIG:APPROVED"), + "auto-approval reuses CONFIG:APPROVED; got: {message}" + ); + assert!( + message.contains("auto-approved: no new prover findings"), + "audit copy must say `no new prover findings`, not `safe`; got: {message}" + ); + assert!( + message.contains("auto:true"), + "missing auto field: {message}" + ); + assert!( + message.contains("source:agent_authored"), + "missing source field: {message}" + ); + assert!( + message.contains("prover_delta:empty"), + "missing prover_delta field: {message}" + ); + } + #[test] fn summarize_cli_policy_merge_op_formats_rest_allow_rules() { let operation = PolicyMergeOp::AddAllowRules { diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 190123cfe..4d604d974 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -12,12 +12,16 @@ Run the full agent-driven policy loop end-to-end: 3. The agent reads `/etc/openshell/skills/policy_advisor.md`, drafts the narrowest rule needed, and submits it to `http://policy.local/v1/proposals`. It saves the returned `chunk_id`. -4. The agent calls `GET /v1/proposals/{chunk_id}/wait?timeout=300` — a single +4. The gateway merges the proposed rule with the current sandbox policy, runs + the policy prover, and stores a concise `validation_result` on the pending + chunk. This is deterministic control-plane evidence, not agent prose. +5. The agent calls `GET /v1/proposals/{chunk_id}/wait?timeout=300` — a single HTTP request that the supervisor holds open until the developer decides. This is the load-bearing UX point: the agent burns zero LLM tokens while it waits; it's literally sleeping on a socket. -5. You approve the proposal from the host with one keystroke. -6. The agent's `/wait` returns within ~1 second of the approval. The sandbox +6. You approve the proposal from the host with one keystroke after seeing the + exact rule and the prover verdict in `openshell rule get`. +7. The agent's `/wait` returns within ~1 second of the approval. The sandbox has hot-reloaded the merged policy; the agent retries the original PUT once and exits. @@ -78,6 +82,8 @@ reject with `--reason "scope to docs/ paths only"` and the agent reads | `DEMO_KEEP_SANDBOX` | `0` (set `1` to inspect the sandbox after the demo) | | `DEMO_MANUAL_APPROVE` | `0` (set `1` to pause for host-side `rule approve` / `rule reject --reason`) | | `DEMO_APPROVAL_TIMEOUT_SECS` | `240` (auto), `1800` (manual mode) | +| `DEMO_CODEX_MODEL` | `gpt-5.4-mini` (pinned for ChatGPT-account compatibility; override if your account supports a different model) | +| `DEMO_CODEX_REASONING` | `low` (the demo task is mechanical; `medium`/`high` slow it down without changing outcomes) | | `OPENSHELL_BIN` | `target/debug/openshell` if present, else `openshell` on `PATH` | ## What the agent sees @@ -99,12 +105,29 @@ with three parts, each with a different trust level: | `validation_result` (prover output) | gateway-side prover | trust signal — but this surface is in progress (see [RFC 0001](../../rfc/0001-agent-driven-policy-management.md)) | The MVP today shows the structured rule plus the agent's rationale in -`openshell rule get` and the TUI inbox panel. The demo's `openshell rule -approve-all` auto-approves to keep the loop short — in a real session a -developer reviews the structured grant before pressing `a`. Prover-backed -validation badges, computed reachability deltas, and a richer "this is what -the rule actually permits" summary are the next phase. For now, **always -approve based on the structured rule, not the agent's rationale.** +`openshell rule get` and the TUI inbox panel. With prover validation wired +into the gateway, `openshell rule get` also shows a `Validation:` line for +agent-authored chunks. The value is the prover's verdict in OCSF-shorthand +style — one short, scannable string per chunk: + +```text +Validation: prover: no new findings +``` + +```text +Validation: prover: 1 new finding + capability_expansion: PUT on api.github.com:443 via /usr/bin/curl +``` + +Other possible verdicts: `validation unavailable` (gateway-side prover infra +issue — surfaces in the gateway log, not as proposal failure), `merge failed: +…` (proposal won't merge into the current policy), and `policy invalid: …` +(merged policy fails the structural safety check). + +Read the structured rule (Endpoints + Binary). Read the Validation line. +Approve if both look right. The demo's `openshell rule approve-all` +auto-approves to keep the loop short; in a real session a developer makes +that judgment per chunk before pressing `a`. ## Going further diff --git a/examples/agent-driven-policy-management/agent-task.md b/examples/agent-driven-policy-management/agent-task.md index e83a653bd..69e1a4e55 100644 --- a/examples/agent-driven-policy-management/agent-task.md +++ b/examples/agent-driven-policy-management/agent-task.md @@ -3,8 +3,11 @@ # Agent Task -You are running inside an OpenShell sandbox. Your job is to write one -markdown file to GitHub via the GitHub Contents API. +You are running inside an OpenShell sandbox. Your job has two steps. Each +step starts with a `curl` call that the L7 proxy will deny; for each +denial, read `/etc/openshell/skills/policy_advisor.md`, draft a narrow +proposal, submit it to `http://policy.local/v1/proposals`, wait on +`GET /v1/proposals/{chunk_id}/wait?timeout=300`, then retry. ## Target @@ -15,50 +18,90 @@ markdown file to GitHub via the GitHub Contents API. ## What to do +### Step 1 — Fetch a public schema (un-credentialed action) + +1. Fetch the well-known GitHub OpenAPI description from + `https://raw.githubusercontent.com/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json` + with `curl`. Do not send any authentication header — this resource is + public. Save the response to `/tmp/github-openapi.json`. + + The first attempt will fail with HTTP 403 and a JSON body containing + `"error": "policy_denied"`. Read the body — it tells you which layer + denied you, which method/path was missing, and where to learn more. + +2. Read `/etc/openshell/skills/policy_advisor.md` and follow it. Submit + the narrowest possible proposal to `http://policy.local/v1/proposals` + — exact host (`raw.githubusercontent.com`), exact port (443), exact + method (`GET`), exact path (`/github/rest-api-description/main/descriptions/api.github.com/api.github.com.json`), + binary `/usr/bin/curl`. Do not propose wildcard hosts. Save + `accepted_chunk_ids[0]`. + +3. Block on the gateway's decision by calling + `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. + + - `status: "approved"` — retry the fetch once. Because the host has + no credentials in scope (it's a public-content endpoint), the + prover sees no findings and the gateway auto-approves the + proposal without human review. The whole step typically completes + in ~1 second after submission. + - `status: "rejected"` — read `rejection_reason`. If it has text, + address the specific feedback and submit a revised proposal; + otherwise stop and tell the developer you can't proceed. + - `status: "pending"` with `timed_out: true` — re-issue the same + `/wait` request once. Each `/wait` is one long-lived HTTP call. + +4. After the retry, verify the file exists at `/tmp/github-openapi.json` + and is non-empty. + +### Step 2 — Write to GitHub (credentialed action) + 1. PUT the file with `curl` against `https://api.github.com/repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. - Authenticate with the `GITHUB_TOKEN` environment variable. Do not echo the - token. Body content (base64-encoded into the JSON `content` field): + Authenticate with the `GITHUB_TOKEN` environment variable. Do not + echo the token. Body content (base64-encoded into the JSON `content` + field): ``` # OpenShell policy advisor demo Run id: {{RUN_ID}} - Written from inside an OpenShell sandbox after a narrowly-scoped policy - proposal was approved by the developer. + Written from inside an OpenShell sandbox after a narrowly-scoped + policy proposal was reviewed by the developer. ``` 2. The first attempt will fail with HTTP 403 and a JSON body containing - `"error": "policy_denied"`. Read the body — it tells you which layer denied - you (`l7`/`rest`), which method/path was missing, and where to learn more. + `"error": "policy_denied"`. Read the body — it tells you which layer + denied you (`l7`/`rest`), which method/path was missing, and where to + learn more. -3. Read `/etc/openshell/skills/policy_advisor.md` and follow it. Submit the - narrowest possible proposal to `http://policy.local/v1/proposals` — exact - host, exact port, exact method, exact path, binary `/usr/bin/curl`. Do not - include query strings. Do not propose wildcard hosts. The 202 response - carries `accepted_chunk_ids`; this demo submits one rule per proposal, so - the list always has exactly one element. Save `accepted_chunk_ids[0]`, - you need it for step 4. +3. Submit the narrowest possible proposal to + `http://policy.local/v1/proposals` — exact host (`api.github.com`), + exact port (443), exact method (`PUT`), exact path + (`/repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`), binary + `/usr/bin/curl`. Do not include query strings. Do not propose + wildcard hosts. Save `accepted_chunk_ids[0]`. 4. Block on the developer's decision by calling - `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. This is - a single HTTP request that the supervisor holds open until the developer - approves or rejects; do not run a polling loop yourself. + `GET http://policy.local/v1/proposals/{chunk_id}/wait?timeout=300`. + - This time the prover emits a `capability_expansion` finding: PUT + is a new method on a host the binary already had credentialed + reach to (read-only). That's a stated intent change, so the + gateway holds the chunk in `pending` for human review instead of + auto-approving. The `/wait` call still parks on a socket — zero + LLM tokens burn while the human decides. - `status: "approved"` — retry the PUT once. Policy has hot-reloaded. - - `status: "rejected"` — read `rejection_reason`. If it has text, address - the specific feedback and submit a revised proposal (back to step 3); - otherwise stop and tell the developer you can't proceed. - - `status: "pending"` with `timed_out: true` — the supervisor returned - without a decision after the full timeout window elapsed. Immediately - re-issue the same `/wait` request once. Each `/wait` is one long-lived - HTTP call; do not sleep, do not loop with a short timeout, do not - decrease `timeout=300`. + - `status: "rejected"` — read `rejection_reason`. If it has text, + address the specific feedback and submit a revised proposal (back + to step 3); otherwise stop and tell the developer you can't + proceed. + - `status: "pending"` with `timed_out: true` — re-issue the same + `/wait` request once. 5. On a successful PUT (HTTP 200 or 201), print a short summary showing - `content.path` and `content.html_url` from the GitHub response. Do not - print the full response body. + `content.path` and `content.html_url` from the GitHub response. Do + not print the full response body. If anything is unclear, prefer making a narrower proposal and asking for approval again over widening the rule. diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index a3e1d1836..1a451da38 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -5,25 +5,11 @@ # Agent-driven policy management demo. # -# Runs the full loop end-to-end: -# -# 1. A Codex agent inside an OpenShell sandbox attempts a PUT that the L7 -# proxy denies with a structured policy_denied 403. -# 2. The agent reads /etc/openshell/skills/policy_advisor.md. -# 3. The agent submits a narrow proposal (exact host, port, method, path) -# to policy.local and saves the returned chunk_id. -# 4. The agent blocks on `GET /v1/proposals/{chunk_id}/wait` — one HTTP -# call that sleeps on a socket. THE AGENT BURNS ZERO LLM TOKENS WHILE -# IT WAITS; this is the load-bearing UX win over polling. -# 5. The developer (this script, simulating the host side) sees the pending -# proposal in `openshell rule get` and approves it. -# 6. The agent's /wait returns approved within ~1 second of the approval, -# retries the original PUT once against the hot-reloaded policy, and -# exits. -# -# The whole loop is feature-flagged behind agent_policy_proposals_enabled and -# requires no GitHub credentials beyond the repo write token already used by -# the existing demo flow. +# Shows the approval loop in one run: +# deny → agent proposes narrow access → gateway validates → approve → retry. +# A public raw.githubusercontent.com GET auto-approves; the GitHub PUT waits +# for review because a GitHub credential is in scope. See README.md for the +# full walkthrough. set -euo pipefail @@ -51,6 +37,8 @@ DEMO_FILE_PATH="${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md" DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-demo-${DEMO_RUN_ID}}" DEMO_CODEX_PROVIDER_NAME="${DEMO_CODEX_PROVIDER_NAME:-codex-policy-demo-${DEMO_RUN_ID}}" DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-demo-${DEMO_RUN_ID}}" +DEMO_CODEX_MODEL="${DEMO_CODEX_MODEL:-gpt-5.4-mini}" +DEMO_CODEX_LOCAL_BIN="${DEMO_CODEX_LOCAL_BIN:-}" DEMO_MANUAL_APPROVE="${DEMO_MANUAL_APPROVE:-0}" # Manual approvals need more headroom than the auto-approve loop — a human # reads the proposal, thinks, and decides. Bump the default to 30 min when @@ -134,19 +122,18 @@ spin_clear() { # — a sed delimiter collision in one of the substitutions blanks the entire # log tail, hiding the very failure context we're trying to surface. redact_log() { - python3 - \ - "${DEMO_GITHUB_TOKEN:-}" \ - "${CODEX_AUTH_ACCESS_TOKEN:-}" \ - "${CODEX_AUTH_REFRESH_TOKEN:-}" \ - "${CODEX_AUTH_ACCOUNT_ID:-}" \ - <<'PY' + python3 -c ' import sys tokens = [t for t in sys.argv[1:] if t] for line in sys.stdin: for t in tokens: line = line.replace(t, "[redacted]") sys.stdout.write(line) -PY +' \ + "${DEMO_GITHUB_TOKEN:-}" \ + "${CODEX_AUTH_ACCESS_TOKEN:-}" \ + "${CODEX_AUTH_REFRESH_TOKEN:-}" \ + "${CODEX_AUTH_ACCOUNT_ID:-}" } fail() { @@ -186,6 +173,20 @@ cleanup() { fi fi + # Restore the providers_v2_enabled setting to what it was before this + # run. The demo opts in to v2 composition so provider profiles + # contribute to the effective policy; restore so the host's broader + # workflow isn't affected. + if [[ -n "${PRIOR_PROVIDERS_V2_FLAG:-}" ]]; then + if [[ "$PRIOR_PROVIDERS_V2_FLAG" == "(unset)" ]]; then + "$OPENSHELL_BIN" settings delete --global --key providers_v2_enabled --yes \ + >/dev/null 2>&1 || true + else + "$OPENSHELL_BIN" settings set --global --key providers_v2_enabled \ + --value "$PRIOR_PROVIDERS_V2_FLAG" --yes >/dev/null 2>&1 || true + fi + fi + if [[ $status -eq 0 ]]; then rm -rf "$TMP_DIR" else @@ -219,7 +220,7 @@ resolve_github_token() { resolve_codex_auth() { [[ -f "${HOME}/.codex/auth.json" ]] || fail "missing local Codex sign-in; run: codex login" - export CODEX_AUTH_ACCESS_TOKEN CODEX_AUTH_REFRESH_TOKEN CODEX_AUTH_ACCOUNT_ID + export CODEX_AUTH_ACCESS_TOKEN CODEX_AUTH_REFRESH_TOKEN CODEX_AUTH_ACCOUNT_ID DEMO_CODEX_MODEL CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "${HOME}/.codex/auth.json")" CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "${HOME}/.codex/auth.json")" CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "${HOME}/.codex/auth.json")" @@ -330,7 +331,13 @@ render_payload() { -e "s|{{FILE_PATH}}|${DEMO_FILE_PATH}|g" \ -e "s|{{RUN_ID}}|${DEMO_RUN_ID}|g" \ "$TASK_TEMPLATE" > "${PAYLOAD_DIR}/agent-task.md" - cp "$SANDBOX_AGENT" "${PAYLOAD_DIR}/sandbox-agent.sh" + sed "s|DEMO_CODEX_MODEL=\"\${DEMO_CODEX_MODEL:-gpt-5.4-mini}\"|DEMO_CODEX_MODEL=\"\${DEMO_CODEX_MODEL:-${DEMO_CODEX_MODEL}}\"|" \ + "$SANDBOX_AGENT" > "${PAYLOAD_DIR}/sandbox-agent.sh" + if [[ -n "$DEMO_CODEX_LOCAL_BIN" ]]; then + [[ -x "$DEMO_CODEX_LOCAL_BIN" ]] || fail "DEMO_CODEX_LOCAL_BIN is not executable: $DEMO_CODEX_LOCAL_BIN" + cp "$DEMO_CODEX_LOCAL_BIN" "${PAYLOAD_DIR}/codex" + chmod +x "${PAYLOAD_DIR}/codex" + fi cp "$POLICY_TEMPLATE" "$POLICY_FILE" } @@ -347,7 +354,7 @@ create_providers() { "$OPENSHELL_BIN" provider create \ --name "$DEMO_GITHUB_PROVIDER_NAME" \ - --type generic \ + --type github \ --credential DEMO_GITHUB_TOKEN >/dev/null info "providers created (codex, github) — credentials injected as env vars only" @@ -357,9 +364,10 @@ start_agent_sandbox() { step "Launching sandbox; agent will hit a policy block and draft a proposal" "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - info "initial policy: read-only access to api.github.com (no PUT)" - info "agent task: PUT /repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}" - info "live log: ${AGENT_LOG}" + info "policy: raw GitHub schema path denied; GitHub writes denied" + info "approval: auto for no new findings; review for credential risk" + info "target: PUT /repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}" + info "log: ${AGENT_LOG}" # `--upload :/sandbox` preserves the source directory basename # (matches `scp -r`/`cp -r`, see PRs #952 / #1028), so `${PAYLOAD_DIR}` @@ -372,6 +380,7 @@ start_agent_sandbox() { --provider "$DEMO_CODEX_PROVIDER_NAME" \ --provider "$DEMO_GITHUB_PROVIDER_NAME" \ --policy "$POLICY_FILE" \ + --approval-mode auto \ --upload "${PAYLOAD_DIR}:/sandbox" \ --no-git-ignore \ --no-auto-providers \ @@ -381,51 +390,97 @@ start_agent_sandbox() { AGENT_PID="$!" } -# Strip the rule_get output down to the lines a developer needs to make an -# informed approve/reject decision: rationale, binary, endpoint. Filters the -# noisy fields (UUID, agent-generated rule_name, hardcoded confidence, -# duplicate Binaries) until `openshell rule get` learns to print L7 -# method/path itself (tracked separately). -# -# `openshell rule get` colorizes labels with ANSI escapes; strip them before -# parsing so the field-name match works in piped contexts. +# Strip `rule get` down to the approval contract: chunk, binary, access, +# and the prover's categorical findings (no severity grade — the prover +# emits category names like `credential_reach_expansion` and +# `capability_expansion`). summarize_pending() { local pending="$1" sed 's/\x1b\[[0-9;]*m//g' "$pending" \ | awk ' - /Rationale:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } - /Binary:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } - /Endpoints:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } + BEGIN { + in_validation = 0 + chunk_count = 0 + validation_printed = 0 + } + /^[[:space:]]*Chunk:/ { + in_validation = 0 + chunk_count++ + validation_printed = 0 + if (chunk_count > 1) print "" + sub(/^[[:space:]]*/, "") + chunk_id = $2 + short_id = substr(chunk_id, 1, 8) + print " Request " chunk_count ": chunk " short_id + next + } + /Binary:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + sub(/^Binary:/, "Binary: ") + print " " $0 + next + } + /Endpoints:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + if (!validation_printed) { + print " Prover: no verdict shown" + validation_printed = 1 + } + sub(/^Endpoints:/, "Access: ") + print " " $0 + next + } + /Validation:/ { + in_validation = 1 + validation_printed = 1 + sub(/^[[:space:]]*/, "") + sub(/^Validation:[[:space:]]*(prover:[[:space:]]*)?/, "Prover: ") + print " " $0 + next + } + /Rationale:/ { + in_validation = 0 + sub(/^[[:space:]]*/, "") + sub(/^Rationale:/, "Reason: ") + print " " $0 + next + } + # Indented continuation lines of the validation block are + # category-named finding rows (e.g., + # `capability_expansion: PUT on api.github.com:443 via /usr/bin/curl`). + in_validation && /^[[:space:]]+(credential_reach_expansion|capability_expansion|l7_bypass_credentialed|link_local_reach):/ { + sub(/^[[:space:]]*/, "") + print " Finding: " $0 + next + } + { in_validation = 0 } ' } +pending_requires_review() { + local pending="$1" + local clean + # Empty-delta chunks can appear in the pending view for a moment before the + # gateway records auto-approval. Keep the demo focused on actual review + # work: findings, merge failures, or policy validation failures. + clean="$(sed 's/\x1b\[[0-9;]*m//g' "$pending")" + if grep -Eq 'Validation: (prover: [1-9][0-9]* new finding|merge failed|policy invalid)|^[[:space:]]+(credential_reach_expansion|capability_expansion|l7_bypass_credentialed|link_local_reach):' <<<"$clean"; then + return 0 + fi + if grep -q 'Validation:' <<<"$clean"; then + return 1 + fi + return 0 +} + narrate_sandbox_workflow() { - info "Inside the sandbox right now:" - info "" - info " • agent: ${DIM}curl -X PUT https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/...${RESET}" - info " • L7 proxy denies the write and returns a structured 403 the" - info " agent can parse and act on:" - cat </dev/null 2>&1; then spin_clear if ! wait "$AGENT_PID"; then @@ -484,27 +542,44 @@ approve_pending_until_agent_exits() { fi AGENT_PID="" if (( approval_count == 0 )); then - fail "agent exited before any pending proposal appeared" + info "agent exited with zero review approvals (all proposals auto-approved)" + else + info "agent exited after ${approval_count} review approval(s)" fi - info "agent exited after ${approval_count} approval(s)" return fi - # Anything pending? Approve and keep watching — the agent may - # redraft if a previous proposal didn't yield the access it needed. + # Anything pending needs an explicit host-side decision. Auto mode only + # bypasses this when the gateway validation finds no new risk. if "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$pending" 2>/dev/null \ && grep -q "Chunk:" "$pending" && grep -q "pending" "$pending"; then + if ! pending_requires_review "$pending"; then + spin_wait "waiting for auto-approvals to settle" 2 + continue + fi spin_clear info "" - info "${GREEN}proposal received:${RESET}" + info "${YELLOW}approval requested${RESET}" summarize_pending "$pending" if [[ "$DEMO_MANUAL_APPROVE" == "1" ]]; then approve_manually "$pending" else - step "Approving — the agent's /wait will return within ~1s" - "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" \ - | awk '/approved/ { print " " $0 }' + info "" + spin_wait "letting the proposal land before approving" 2 + spin_clear + step "Approving for demo" + local approve_output + if ! approve_output="$("$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" 2>&1)"; then + if grep -q "no pending chunks to approve" <<<"$approve_output"; then + info " decision already recorded" + else + printf "%s\n" "$approve_output" >&2 + fail "could not approve pending proposal" + fi + else + awk '/approved/ { print " " $0 }' <<<"$approve_output" + fi fi approval_count=$((approval_count + 1)) fi @@ -532,21 +607,13 @@ verify_github_write() { jq -r '" file: \(.path)", " url: \(.html_url)"' "$body" } -# Print the OCSF JSONL trace, filtered to the three events that *are* the -# demo's story: the L7 PUT deny, the policy hot-reload, and the L7 PUT allow. -# The native OCSF shorthand is informative and consistent with the rest of -# OpenShell's logging — keep it as-is rather than re-formatting. +# Print the concise OCSF trace that shows deny, proposal, decision, reload, +# and successful retry. show_logs() { - step "Policy decision trace (OCSF)" - # Filter to the events that tell the loop's story end-to-end, ordered by - # the trace's own timestamps: - # HTTP:PUT DENIED — initial proxy enforcement - # CONFIG:PROPOSED — agent submitted a chunk to the gateway - # CONFIG:APPROVED/REJECTED — developer decided; agent's /wait woke up - # CONFIG:LOADED — supervisor hot-reloaded the merged policy - # HTTP:PUT ALLOWED — agent's retry succeeded + step "Decision trace" "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 200 2>&1 \ - | grep -E 'HTTP:PUT.*(DENIED|ALLOWED)|CONFIG:(PROPOSED|APPROVED|REJECTED|LOADED)' \ + | grep -E 'HTTP:PUT.*(DENIED|ALLOWED)|agent_authored proposal|auto-approved: no new prover findings \(source=agent_authored\)|gateway approved draft chunk .*PUT|Policy reloaded successfully' \ + | grep -v 'source=mechanistic' \ | sed 's/^/ /' || true } @@ -557,14 +624,26 @@ enable_agent_proposals() { # delete` rather than a value write. local prior prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ - | grep -o '"agent_policy_proposals_enabled"[^,}]*' \ - | grep -o 'true\|false' | head -1)" + | jq -r '.settings.agent_policy_proposals_enabled // empty | tostring | select(. == "true" or . == "false")')" PRIOR_PROPOSALS_FLAG="${prior:-(unset)}" "$OPENSHELL_BIN" settings set --global \ --key agent_policy_proposals_enabled --value true --yes >/dev/null \ || fail "could not enable agent_policy_proposals_enabled globally" } +enable_providers_v2() { + # Providers-v2 composition is behind a global flag. The demo opts in + # so provider profiles (codex, github) contribute to the effective + # policy via composition. Cleanup restores the prior value. + local prior + prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ + | jq -r '.settings.providers_v2_enabled // empty | tostring | select(. == "true" or . == "false")')" + PRIOR_PROVIDERS_V2_FLAG="${prior:-(unset)}" + "$OPENSHELL_BIN" settings set --global \ + --key providers_v2_enabled --value true --yes >/dev/null \ + || fail "could not enable providers_v2_enabled globally" +} + main() { validate_env @@ -574,6 +653,7 @@ main() { render_payload create_providers enable_agent_proposals + enable_providers_v2 show_run_summary diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml index e920277b5..0498ecfcc 100644 --- a/examples/agent-driven-policy-management/policy.template.yaml +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -3,13 +3,21 @@ # Initial sandbox policy for the agent-driven policy demo. # -# The agent inside the sandbox can: -# - reach Codex's model and auth endpoints (codex) -# - clone Codex plugin repos read-only (codex_plugins) -# - read api.github.com via curl (github_api_readonly) +# The demo exercises two flavors of denial-→-propose-→-decision: # -# The agent CANNOT write to GitHub yet. That's the proposal it has to draft -# and ask the developer to approve. +# - Step 1 hits raw.githubusercontent.com (no credential in scope). The +# host is pre-listed at L7 with no allowed paths, so the agent's GET +# structured-403's. The agent proposes the exact path; the prover +# sees no credential exposure and the gateway auto-approves. +# +# - Step 2 hits api.github.com PUT (github credential in scope). The +# host is pre-allowed for read-only access, so the PUT +# structured-403's. The agent proposes the narrow PUT path; the +# prover sees github_token in scope and emits MEDIUM. The chunk +# lands in pending for human review; demo.sh approves on behalf. +# +# This shows both halves of the loop in one run: free path for safe +# changes, single human approval for credentialed ones. version: 1 @@ -35,35 +43,41 @@ network_policies: - { host: ab.chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } binaries: - { path: /usr/bin/codex } + - { path: /sandbox/payload/codex } - { path: /usr/bin/node } - { path: "/usr/lib/node_modules/@openai/**" } - codex_plugins: - name: codex-plugins + github_api_readonly: + # api.github.com pre-allowed for read-only access. Writes (PUT/POST/PATCH/DELETE) + # structured-403 at L7 — the agent proposes the specific method/path, + # and the prover gates on credential-in-scope (github provider attached). + name: github-api-readonly endpoints: - - host: github.com + - host: api.github.com port: 443 protocol: rest enforcement: enforce - rules: - - allow: - method: GET - path: "/openai/plugins.git/info/refs*" - - allow: - method: POST - path: "/openai/plugins.git/git-upload-pack" + access: read-only binaries: - - { path: /usr/bin/git } - - { path: /usr/lib/git-core/git-remote-http } - - { path: "/usr/lib/node_modules/@openai/**" } + - { path: /usr/bin/curl } - github_api_readonly: - name: github-api-readonly + github_raw_scoped: + # raw.githubusercontent.com — pre-listed at L7 with one bootstrap + # path so the L7 validator accepts the rule. The agent must propose + # any additional GET paths it actually needs. Each new proposal is + # un-credentialed (no provider declares this host), so the prover + # sees no findings and the gateway auto-approves narrow scoped reads + # when `proposal_approval_mode = auto` (set via `--approval-mode auto` + # at create or via `openshell settings set` at runtime). + name: github-raw-scoped endpoints: - - host: api.github.com + - host: raw.githubusercontent.com port: 443 protocol: rest enforcement: enforce - access: read-only + rules: + - allow: + method: GET + path: /github/rest-api-description/main/README.md binaries: - { path: /usr/bin/curl } diff --git a/examples/agent-driven-policy-management/sandbox-agent.sh b/examples/agent-driven-policy-management/sandbox-agent.sh index 052535c35..45449dd92 100755 --- a/examples/agent-driven-policy-management/sandbox-agent.sh +++ b/examples/agent-driven-policy-management/sandbox-agent.sh @@ -74,9 +74,29 @@ cd "$WORK" # compare runs. DEMO_CODEX_REASONING="${DEMO_CODEX_REASONING:-low}" -exec codex exec \ - --skip-git-repo-check \ - --sandbox danger-full-access \ - --ephemeral \ +# Pin the model to one that ChatGPT-account Codex users can reach and that is +# quick enough for the mechanical proposal loop. Override with DEMO_CODEX_MODEL +# if your account supports something different. +DEMO_CODEX_MODEL="${DEMO_CODEX_MODEL:-gpt-5.4-mini}" +CODEX_BIN="${CODEX_BIN:-codex}" +if [[ -x /sandbox/payload/codex ]]; then + CODEX_BIN="/sandbox/payload/codex" +fi + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${DEMO_CODEX_MODEL}\"" \ -c "model_reasoning_effort=\"${DEMO_CODEX_REASONING}\"" \ "$(cat /sandbox/payload/agent-task.md)" diff --git a/proto/openshell.proto b/proto/openshell.proto index e4a1b0673..60b83edd1 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -261,6 +261,12 @@ message SandboxSpec { // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the // first available GPU. string gpu_device = 10; + // Field 11 was `proposal_approval_mode`. The approval mode is now a + // runtime setting (gateway or sandbox scope) read via UpdateConfig / + // GetSandboxConfig, so it can be flipped on a running sandbox and + // managed fleet-wide. + reserved 11; + reserved "proposal_approval_mode"; } // Public sandbox template mapped onto compute-driver template inputs. diff --git a/providers/github.yaml b/providers/github.yaml index cc24ae922..daf7f8316 100644 --- a/providers/github.yaml +++ b/providers/github.yaml @@ -13,11 +13,15 @@ credentials: auth_style: bearer header_name: authorization endpoints: + # api.github.com is the REST API surface. Defaults to read-only — + # writes require an explicit policy proposal so the agentic loop + + # prover can audit each capability change. - host: api.github.com port: 443 protocol: rest - access: read-write + access: read-only enforcement: enforce + # github.com is the git transport (clone / fetch by default). - host: github.com port: 443 protocol: rest